From 63e47a2249d3d16e95aed7fed55f35716fc69081 Mon Sep 17 00:00:00 2001 From: Tobias Koppers Date: Fri, 9 Aug 2024 11:25:58 +0200 Subject: [PATCH 01/46] add lmdb persisting and restoring --- Cargo.lock | 24 +++ .../crates/turbo-tasks-backend/Cargo.toml | 2 + .../turbo-tasks-backend/src/backend/mod.rs | 134 ++++++++++-- .../src/backend/operation/mod.rs | 29 ++- .../src/backend/operation/update_cell.rs | 2 +- .../src/backend/storage.rs | 78 ++++++- .../src/backing_storage.rs | 23 ++ .../crates/turbo-tasks-backend/src/data.rs | 14 +- .../crates/turbo-tasks-backend/src/lib.rs | 4 +- .../src/lmdb_backing_storage.rs | 199 ++++++++++++++++++ .../src/utils/chunked_vec.rs | 21 +- .../src/utils/ptr_eq_arc.rs | 4 + .../turbo-tasks-backend/tests/test_config.trs | 19 +- .../crates/turbo-tasks-testing/tests/basic.rs | 16 +- turbopack/crates/turbo-tasks/src/lib.rs | 2 +- turbopack/crates/turbo-tasks/src/task/mod.rs | 2 +- 16 files changed, 526 insertions(+), 47 deletions(-) create mode 100644 turbopack/crates/turbo-tasks-backend/src/backing_storage.rs create mode 100644 turbopack/crates/turbo-tasks-backend/src/lmdb_backing_storage.rs diff --git a/Cargo.lock b/Cargo.lock index 9cf6ad835a3f4..6d0d8ef2cbe91 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -3609,6 +3609,28 @@ version = "0.4.1" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "b4ce301924b7887e9d637144fdade93f9dfff9b60981d4ac161db09720d39aa5" +[[package]] +name = "lmdb" +version = "0.8.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "5b0908efb5d6496aa977d96f91413da2635a902e5e31dbef0bfb88986c248539" +dependencies = [ + "bitflags 1.3.2", + "libc", + "lmdb-sys", +] + +[[package]] +name = "lmdb-sys" +version = "0.8.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "d5b392838cfe8858e86fac37cf97a0e8c55cc60ba0a18365cadc33092f128ce9" +dependencies = [ + "cc", + "libc", + "pkg-config", +] + [[package]] name = "lock_api" version = "0.4.10" @@ -8574,10 +8596,12 @@ dependencies = [ "anyhow", "async-trait", "auto-hash-map", + "bincode", "dashmap", "either", "hashbrown 0.14.5", "indexmap 1.9.3", + "lmdb", "once_cell", "parking_lot", "rand", diff --git a/turbopack/crates/turbo-tasks-backend/Cargo.toml b/turbopack/crates/turbo-tasks-backend/Cargo.toml index f51271f10f948..df94cd7602cc3 100644 --- a/turbopack/crates/turbo-tasks-backend/Cargo.toml +++ b/turbopack/crates/turbo-tasks-backend/Cargo.toml @@ -16,10 +16,12 @@ workspace = true anyhow = { workspace = true } async-trait = { workspace = true } auto-hash-map = { workspace = true } +bincode = "1.3.3" dashmap = { workspace = true, features = ["raw-api"]} either = { workspace = true } hashbrown = { workspace = true } indexmap = { workspace = true } +lmdb = "0.8.0" once_cell = { workspace = true } parking_lot = { workspace = true } rand = { workspace = true } diff --git a/turbopack/crates/turbo-tasks-backend/src/backend/mod.rs b/turbopack/crates/turbo-tasks-backend/src/backend/mod.rs index 63bb787963d4c..2fe67fe4f1eb8 100644 --- a/turbopack/crates/turbo-tasks-backend/src/backend/mod.rs +++ b/turbopack/crates/turbo-tasks-backend/src/backend/mod.rs @@ -10,10 +10,10 @@ use std::{ mem::take, pin::Pin, sync::{ - atomic::{AtomicUsize, Ordering}, + atomic::{AtomicU64, AtomicUsize, Ordering}, Arc, }, - time::Duration, + time::{Duration, Instant}, }; use anyhow::{bail, Result}; @@ -44,6 +44,7 @@ use crate::{ }, storage::{get, get_many, remove, Storage}, }, + backing_storage::BackingStorage, data::{ ActiveType, AggregationNumber, CachedDataItem, CachedDataItemIndex, CachedDataItemKey, CachedDataItemValue, CachedDataUpdate, CellRef, InProgressCellState, InProgressState, @@ -91,6 +92,8 @@ pub enum TransientTask { } pub struct TurboTasksBackend { + start_time: Instant, + persisted_task_id_factory: IdFactoryWithReuse, transient_task_id_factory: IdFactoryWithReuse, @@ -116,18 +119,20 @@ pub struct TurboTasksBackend { /// Condition Variable that is triggered when a snapshot is completed and /// operations can continue. snapshot_completed: Condvar, -} + /// The timestamp of the last started snapshot. + last_snapshot: AtomicU64, -impl Default for TurboTasksBackend { - fn default() -> Self { - Self::new() - } + backing_storage: Arc, } impl TurboTasksBackend { - pub fn new() -> Self { + pub fn new(backing_storage: Arc) -> Self { Self { - persisted_task_id_factory: IdFactoryWithReuse::new(1, (TRANSIENT_TASK_BIT - 1) as u64), + start_time: Instant::now(), + persisted_task_id_factory: IdFactoryWithReuse::new( + *backing_storage.next_free_task_id() as u64, + (TRANSIENT_TASK_BIT - 1) as u64, + ), transient_task_id_factory: IdFactoryWithReuse::new( TRANSIENT_TASK_BIT as u64, u32::MAX as u64, @@ -141,6 +146,8 @@ impl TurboTasksBackend { snapshot_request: Mutex::new(SnapshotRequest::new()), operations_suspended: Condvar::new(), snapshot_completed: Condvar::new(), + last_snapshot: AtomicU64::new(0), + backing_storage, } } @@ -386,7 +393,10 @@ impl TurboTasksBackend { let _ = reader_task.add(CachedDataItem::CellDependency { target, value: () }); } } - return Ok(Ok(CellContent(Some(content)).into_typed(cell.type_id))); + return Ok(Ok(TypedCellContent( + cell.type_id, + CellContent(Some(content.1)), + ))); } // Check cell index range (cell might not exist at all) @@ -446,6 +456,10 @@ impl TurboTasksBackend { if let Some(task_type) = self.task_cache.lookup_reverse(&task_id) { return Some(task_type); } + if let Some(task_type) = self.backing_storage.reverse_lookup_task_cache(task_id) { + let _ = self.task_cache.try_insert(task_type.clone(), task_id); + return Some(task_type); + } None } @@ -459,9 +473,70 @@ impl TurboTasksBackend { ) } } + + fn snapshot(&self) -> Option { + let mut snapshot_request = self.snapshot_request.lock(); + snapshot_request.snapshot_requested = true; + let active_operations = self + .in_progress_operations + .fetch_or(SNAPSHOT_REQUESTED_BIT, std::sync::atomic::Ordering::Relaxed); + if active_operations != 0 { + self.operations_suspended + .wait_while(&mut snapshot_request, |_| { + self.in_progress_operations + .load(std::sync::atomic::Ordering::Relaxed) + != SNAPSHOT_REQUESTED_BIT + }); + } + let suspended_operations = snapshot_request + .suspended_operations + .iter() + .map(|op| op.arc().clone()) + .collect::>(); + drop(snapshot_request); + let persisted_storage_log = take(&mut *self.persisted_storage_log.lock()); + let persisted_task_cache_log = take(&mut *self.persisted_task_cache_log.lock()); + let mut snapshot_request = self.snapshot_request.lock(); + snapshot_request.snapshot_requested = false; + self.in_progress_operations + .fetch_sub(SNAPSHOT_REQUESTED_BIT, std::sync::atomic::Ordering::Relaxed); + self.snapshot_completed.notify_all(); + let snapshot_time = Instant::now(); + drop(snapshot_request); + + let mut counts: HashMap = HashMap::new(); + for CachedDataUpdate { task, .. } in persisted_storage_log.iter() { + *counts.entry(*task).or_default() += 1; + } + + if !persisted_task_cache_log.is_empty() || !persisted_storage_log.is_empty() { + if let Err(err) = self.backing_storage.save_snapshot( + suspended_operations, + persisted_task_cache_log, + persisted_storage_log, + ) { + println!("Persising failed: {:#?}", err); + return None; + } + println!("Snapshot saved"); + } + + for (task_id, count) in counts { + self.storage + .access_mut(task_id) + .persistance_state_mut() + .finish_persisting_items(count); + } + + Some(snapshot_time) + } } impl Backend for TurboTasksBackend { + fn startup(&self, turbo_tasks: &dyn TurboTasksBackendApi) { + turbo_tasks.schedule_backend_background_job(BackendJobId::from(1)); + } + fn get_or_create_persistent_task( &self, task_type: CachedTaskType, @@ -473,6 +548,12 @@ impl Backend for TurboTasksBackend { return task_id; } + if let Some(task_id) = self.backing_storage.forward_lookup_task_cache(&task_type) { + let _ = self.task_cache.try_insert(Arc::new(task_type), task_id); + self.connect_child(parent_task, task_id, turbo_tasks); + return task_id; + } + let task_type = Arc::new(task_type); let task_id = self.persisted_task_id_factory.get(); if let Err(existing_task_id) = self.task_cache.try_insert(task_type.clone(), task_id) { @@ -929,12 +1010,31 @@ impl Backend for TurboTasksBackend { stale } - fn run_backend_job( - &self, - _: BackendJobId, - _: &dyn TurboTasksBackendApi, - ) -> Pin + Send + 'static)>> { - todo!() + fn run_backend_job<'a>( + &'a self, + id: BackendJobId, + turbo_tasks: &'a dyn TurboTasksBackendApi, + ) -> Pin + Send + 'a>> { + Box::pin(async move { + if *id == 1 { + const SNAPSHOT_INTERVAL: Duration = Duration::from_secs(1); + + let last_snapshot = self.last_snapshot.load(Ordering::Relaxed); + let last_snapshot = self.start_time + Duration::from_millis(last_snapshot); + let elapsed = last_snapshot.elapsed(); + if elapsed < SNAPSHOT_INTERVAL { + tokio::time::sleep(SNAPSHOT_INTERVAL - elapsed).await; + } + + if let Some(last_snapshot) = self.snapshot() { + let last_snapshot = last_snapshot.duration_since(self.start_time); + self.last_snapshot + .store(last_snapshot.as_millis() as u64, Ordering::Relaxed); + + turbo_tasks.schedule_backend_background_job(id); + } + } + }) } fn try_read_task_output( @@ -984,7 +1084,7 @@ impl Backend for TurboTasksBackend { let ctx = self.execute_context(turbo_tasks); let task = ctx.task(task_id); if let Some(content) = get!(task, CellData { cell }) { - Ok(CellContent(Some(content.clone())).into_typed(cell.type_id)) + Ok(CellContent(Some(content.1.clone())).into_typed(cell.type_id)) } else { Ok(CellContent(None).into_typed(cell.type_id)) } diff --git a/turbopack/crates/turbo-tasks-backend/src/backend/operation/mod.rs b/turbopack/crates/turbo-tasks-backend/src/backend/operation/mod.rs index 74dbe91f67e46..e2d1f30f2a61d 100644 --- a/turbopack/crates/turbo-tasks-backend/src/backend/operation/mod.rs +++ b/turbopack/crates/turbo-tasks-backend/src/backend/operation/mod.rs @@ -52,8 +52,25 @@ impl<'a> ExecuteContext<'a> { } pub fn task(&self, task_id: TaskId) -> TaskGuard<'a> { + let mut task = self.backend.storage.access_mut(task_id); + if !task.persistance_state().is_restored() { + if task_id.is_transient() { + task.persistance_state_mut().set_restored(); + } else { + // Avoid holding the lock too long since this can also affect other tasks + drop(task); + let items = self.backend.backing_storage.lookup_data(task_id); + task = self.backend.storage.access_mut(task_id); + if !task.persistance_state().is_restored() { + for item in items { + task.add(item); + } + task.persistance_state_mut().set_restored(); + } + } + } TaskGuard { - task: self.backend.storage.access_mut(task_id), + task, task_id, backend: self.backend, } @@ -160,7 +177,7 @@ impl TaskGuard<'_> { self.task.add(item) } else if self.task.add(item.clone()) { let (key, value) = item.into_key_and_value(); - // TODO task.persistance_state.add_persisting_item(); + self.task.persistance_state_mut().add_persisting_item(); self.backend .persisted_storage_log .lock() @@ -190,7 +207,7 @@ impl TaskGuard<'_> { key.clone(), value.clone(), )); - // TODO task.persistance_state.add_persisting_item(); + self.task.persistance_state_mut().add_persisting_item(); self.backend .persisted_storage_log .lock() @@ -204,7 +221,7 @@ impl TaskGuard<'_> { let item = CachedDataItem::from_key_and_value(key.clone(), value); if let Some(old) = self.task.insert(item) { if old.is_persistent() { - // TODO task.persistance_state.add_persisting_item(); + self.task.persistance_state_mut().add_persisting_item(); self.backend .persisted_storage_log .lock() @@ -264,7 +281,7 @@ impl TaskGuard<'_> { new }); if add_persisting_item { - // TODO task.persistance_state.add_persisting_item(); + task.persistance_state_mut().add_persisting_item(); } } @@ -273,7 +290,7 @@ impl TaskGuard<'_> { if let Some(value) = old_value { if key.is_persistent() && value.is_persistent() { let key = key.clone(); - // TODO task.persistance_state.add_persisting_item(); + self.task.persistance_state_mut().add_persisting_item(); self.backend .persisted_storage_log .lock() diff --git a/turbopack/crates/turbo-tasks-backend/src/backend/operation/update_cell.rs b/turbopack/crates/turbo-tasks-backend/src/backend/operation/update_cell.rs index b7b4e185839bc..705ab5660f3e1 100644 --- a/turbopack/crates/turbo-tasks-backend/src/backend/operation/update_cell.rs +++ b/turbopack/crates/turbo-tasks-backend/src/backend/operation/update_cell.rs @@ -16,7 +16,7 @@ impl UpdateCellOperation { let old_content = if let CellContent(Some(new_content)) = content { task.insert(CachedDataItem::CellData { cell, - value: new_content, + value: new_content.into_typed(cell.type_id), }) } else { task.remove(&CachedDataItemKey::CellData { cell }) diff --git a/turbopack/crates/turbo-tasks-backend/src/backend/storage.rs b/turbopack/crates/turbo-tasks-backend/src/backend/storage.rs index b5cb827b2a179..4b455b0c8ec15 100644 --- a/turbopack/crates/turbo-tasks-backend/src/backend/storage.rs +++ b/turbopack/crates/turbo-tasks-backend/src/backend/storage.rs @@ -16,6 +16,40 @@ use crate::{ utils::dash_map_multi::{get_multiple_mut, RefMut}, }; +const UNRESTORED: u32 = u32::MAX; + +pub struct PersistanceState { + value: u32, +} + +impl Default for PersistanceState { + fn default() -> Self { + Self { value: UNRESTORED } + } +} + +impl PersistanceState { + pub fn set_restored(&mut self) { + self.value = 0; + } + + pub fn add_persisting_item(&mut self) { + self.value += 1; + } + + pub fn finish_persisting_items(&mut self, count: u32) { + self.value -= count; + } + + pub fn is_restored(&self) -> bool { + self.value != UNRESTORED + } + + pub fn is_fully_persisted(&self) -> bool { + self.value == 0 + } +} + const INDEX_THRESHOLD: usize = 1024; type IndexedMap = AutoMap< @@ -27,8 +61,14 @@ pub enum InnerStorage where T::Key: Indexed, { - Plain { map: AutoMap }, - Indexed { map: IndexedMap }, + Plain { + map: AutoMap, + persistance_state: PersistanceState, + }, + Indexed { + map: IndexedMap, + persistance_state: PersistanceState, + }, } impl InnerStorage @@ -38,11 +78,38 @@ where fn new() -> Self { Self::Plain { map: AutoMap::new(), + persistance_state: PersistanceState::default(), + } + } + + pub fn persistance_state(&self) -> &PersistanceState { + match self { + InnerStorage::Plain { + persistance_state, .. + } => persistance_state, + InnerStorage::Indexed { + persistance_state, .. + } => persistance_state, + } + } + + pub fn persistance_state_mut(&mut self) -> &mut PersistanceState { + match self { + InnerStorage::Plain { + persistance_state, .. + } => persistance_state, + InnerStorage::Indexed { + persistance_state, .. + } => persistance_state, } } fn check_threshold(&mut self) { - let InnerStorage::Plain { map: plain_map } = self else { + let InnerStorage::Plain { + map: plain_map, + persistance_state, + } = self + else { return; }; if plain_map.len() >= INDEX_THRESHOLD { @@ -51,7 +118,10 @@ where let index = key.index(); map.entry(index).or_default().insert(key, value); } - *self = InnerStorage::Indexed { map }; + *self = InnerStorage::Indexed { + map, + persistance_state: take(persistance_state), + }; } } diff --git a/turbopack/crates/turbo-tasks-backend/src/backing_storage.rs b/turbopack/crates/turbo-tasks-backend/src/backing_storage.rs new file mode 100644 index 0000000000000..6b6707c476e51 --- /dev/null +++ b/turbopack/crates/turbo-tasks-backend/src/backing_storage.rs @@ -0,0 +1,23 @@ +use std::sync::Arc; + +use anyhow::Result; +use turbo_tasks::{backend::CachedTaskType, TaskId}; + +use crate::{ + backend::AnyOperation, + data::{CachedDataItem, CachedDataUpdate}, + utils::chunked_vec::ChunkedVec, +}; + +pub trait BackingStorage { + fn next_free_task_id(&self) -> TaskId; + fn save_snapshot( + &self, + operations: Vec>, + task_cache_updates: ChunkedVec<(Arc, TaskId)>, + data_updates: ChunkedVec, + ) -> Result<()>; + fn forward_lookup_task_cache(&self, key: &CachedTaskType) -> Option; + fn reverse_lookup_task_cache(&self, task_id: TaskId) -> Option>; + fn lookup_data(&self, task_id: TaskId) -> Vec; +} diff --git a/turbopack/crates/turbo-tasks-backend/src/data.rs b/turbopack/crates/turbo-tasks-backend/src/data.rs index 390ac1c093d16..ce3f99e989b72 100644 --- a/turbopack/crates/turbo-tasks-backend/src/data.rs +++ b/turbopack/crates/turbo-tasks-backend/src/data.rs @@ -2,7 +2,7 @@ use serde::{Deserialize, Serialize}; use turbo_tasks::{ event::{Event, EventListener}, util::SharedError, - CellId, KeyValuePair, SharedReference, TaskId, ValueTypeId, + CellId, KeyValuePair, TaskId, TypedSharedReference, ValueTypeId, }; use crate::backend::indexed::Indexed; @@ -115,7 +115,7 @@ pub struct AggregationNumber { pub effective: u32, } -#[derive(Debug, Clone, KeyValuePair)] +#[derive(Debug, Clone, KeyValuePair, Serialize, Deserialize)] pub enum CachedDataItem { // Output Output { @@ -143,7 +143,7 @@ pub enum CachedDataItem { // Cells CellData { cell: CellId, - value: SharedReference, + value: TypedSharedReference, }, CellTypeMaxIndex { cell_type: ValueTypeId, @@ -207,14 +207,17 @@ pub enum CachedDataItem { }, // Transient Root Type + #[serde(skip)] AggregateRoot { value: RootState, }, // Transient In Progress state + #[serde(skip)] InProgress { value: InProgressState, }, + #[serde(skip)] InProgressCell { cell: CellId, value: InProgressCellState, @@ -237,6 +240,7 @@ pub enum CachedDataItem { }, // Transient Error State + #[serde(skip)] Error { value: SharedError, }, @@ -411,11 +415,7 @@ impl CachedDataItemValue { #[derive(Debug)] pub struct CachedDataUpdate { - // TODO persistence - #[allow(dead_code)] pub task: TaskId, - #[allow(dead_code)] pub key: CachedDataItemKey, - #[allow(dead_code)] pub value: Option, } diff --git a/turbopack/crates/turbo-tasks-backend/src/lib.rs b/turbopack/crates/turbo-tasks-backend/src/lib.rs index 488754fc8f39d..0eebd29cb4d76 100644 --- a/turbopack/crates/turbo-tasks-backend/src/lib.rs +++ b/turbopack/crates/turbo-tasks-backend/src/lib.rs @@ -3,7 +3,9 @@ #![allow(dead_code)] mod backend; +mod backing_storage; mod data; +mod lmdb_backing_storage; mod utils; -pub use self::backend::TurboTasksBackend; +pub use self::{backend::TurboTasksBackend, lmdb_backing_storage::LmdbBackingStorage}; diff --git a/turbopack/crates/turbo-tasks-backend/src/lmdb_backing_storage.rs b/turbopack/crates/turbo-tasks-backend/src/lmdb_backing_storage.rs new file mode 100644 index 0000000000000..5239eb18dda82 --- /dev/null +++ b/turbopack/crates/turbo-tasks-backend/src/lmdb_backing_storage.rs @@ -0,0 +1,199 @@ +use std::{ + collections::{hash_map::Entry, HashMap}, + error::Error, + path::Path, + sync::Arc, + thread::available_parallelism, +}; + +use anyhow::Result; +use lmdb::{Database, DatabaseFlags, Environment, EnvironmentFlags, Transaction, WriteFlags}; +use turbo_tasks::{backend::CachedTaskType, KeyValuePair, TaskId}; + +use crate::{ + backend::AnyOperation, + backing_storage::BackingStorage, + data::{CachedDataItem, CachedDataItemKey, CachedDataItemValue, CachedDataUpdate}, + utils::chunked_vec::ChunkedVec, +}; + +const META_KEY_OPERATIONS: u32 = 0; +const META_KEY_NEXT_FREE_TASK_ID: u32 = 1; + +struct IntKey([u8; 4]); + +impl IntKey { + fn new(value: u32) -> Self { + Self(value.to_be_bytes()) + } +} + +impl AsRef<[u8]> for IntKey { + fn as_ref(&self) -> &[u8] { + &self.0 + } +} + +fn as_u32(result: Result<&[u8], E>) -> Result { + let bytes = result?; + let n = bytes.try_into().map(u32::from_be_bytes)?; + Ok(n) +} + +pub struct LmdbBackingStorage { + env: Environment, + meta_db: Database, + data_db: Database, + forward_task_cache_db: Database, + reverse_task_cache_db: Database, +} + +impl LmdbBackingStorage { + pub fn new(path: &Path) -> Result { + println!("opening lmdb {:?}", path); + let env = Environment::new() + .set_flags(EnvironmentFlags::WRITE_MAP | EnvironmentFlags::NO_META_SYNC) + .set_max_readers((available_parallelism().map_or(16, |v| v.get()) * 8) as u32) + .set_max_dbs(4) + .set_map_size(20 * 1024 * 1024 * 1024) + .open(path)?; + let meta_db = env.create_db(Some("meta"), DatabaseFlags::INTEGER_KEY)?; + let data_db = env.create_db(Some("data"), DatabaseFlags::INTEGER_KEY)?; + let forward_task_cache_db = + env.create_db(Some("forward_task_cache"), DatabaseFlags::empty())?; + let reverse_task_cache_db = + env.create_db(Some("reverse_task_cache"), DatabaseFlags::INTEGER_KEY)?; + Ok(Self { + env, + meta_db, + data_db, + forward_task_cache_db, + reverse_task_cache_db, + }) + } +} + +impl BackingStorage for LmdbBackingStorage { + fn next_free_task_id(&self) -> TaskId { + let Ok(tx) = self.env.begin_ro_txn() else { + return TaskId::from(1); + }; + let next_free_task_id = + as_u32(tx.get(self.meta_db, &IntKey::new(META_KEY_NEXT_FREE_TASK_ID))).unwrap_or(1); + let _ = tx.commit(); + TaskId::from(next_free_task_id) + } + + fn save_snapshot( + &self, + operations: Vec>, + task_cache_updates: ChunkedVec<(Arc, TaskId)>, + data_updates: ChunkedVec, + ) -> Result<()> { + let mut tx = self.env.begin_rw_txn()?; + let mut next_task_id = + as_u32(tx.get(self.meta_db, &IntKey::new(META_KEY_NEXT_FREE_TASK_ID))).unwrap_or(1); + for (task_type, task_id) in task_cache_updates.iter() { + let task_id = **task_id; + let task_type = bincode::serialize(&task_type)?; + tx.put( + self.forward_task_cache_db, + &task_type, + &task_id.to_be_bytes(), + WriteFlags::empty(), + )?; + tx.put( + self.reverse_task_cache_db, + &IntKey::new(task_id), + &task_type, + WriteFlags::empty(), + )?; + next_task_id = next_task_id.max(task_id + 1); + } + tx.put( + self.meta_db, + &IntKey::new(META_KEY_NEXT_FREE_TASK_ID), + &next_task_id.to_be_bytes(), + WriteFlags::empty(), + )?; + let operations = bincode::serialize(&operations)?; + tx.put( + self.meta_db, + &IntKey::new(META_KEY_OPERATIONS), + &operations, + WriteFlags::empty(), + )?; + let mut updated_items: HashMap> = + HashMap::new(); + for CachedDataUpdate { task, key, value } in data_updates.into_iter() { + let data = match updated_items.entry(task) { + Entry::Occupied(entry) => entry.into_mut(), + Entry::Vacant(entry) => { + let mut map = HashMap::new(); + if let Ok(old_data) = tx.get(self.data_db, &IntKey::new(*task)) { + let old_data: Vec = bincode::deserialize(old_data)?; + for item in old_data { + let (key, value) = item.into_key_and_value(); + map.insert(key, value); + } + } + entry.insert(map) + } + }; + if let Some(value) = value { + data.insert(key, value); + } else { + data.remove(&key); + } + } + for (task_id, data) in updated_items { + let vec: Vec = data + .into_iter() + .map(|(key, value)| CachedDataItem::from_key_and_value(key, value)) + .collect(); + let value = bincode::serialize(&vec)?; + tx.put( + self.data_db, + &IntKey::new(*task_id), + &value, + WriteFlags::empty(), + )?; + } + tx.commit()?; + Ok(()) + } + + fn forward_lookup_task_cache(&self, task_type: &CachedTaskType) -> Option { + let tx = self.env.begin_ro_txn().ok()?; + let task_type = bincode::serialize(task_type).ok()?; + let result = tx + .get(self.forward_task_cache_db, &task_type) + .ok() + .and_then(|v| v.try_into().ok()) + .map(|v| TaskId::from(u32::from_be_bytes(v))); + tx.commit().ok()?; + result + } + + fn reverse_lookup_task_cache(&self, task_id: TaskId) -> Option> { + let tx = self.env.begin_ro_txn().ok()?; + let result = tx + .get(self.reverse_task_cache_db, &(*task_id).to_be_bytes()) + .ok() + .and_then(|v| v.try_into().ok()) + .and_then(|v: [u8; 4]| bincode::deserialize(&v).ok()); + tx.commit().ok()?; + result + } + + fn lookup_data(&self, task_id: TaskId) -> Vec { + fn lookup(this: &LmdbBackingStorage, task_id: TaskId) -> Result> { + let tx = this.env.begin_ro_txn()?; + let bytes = tx.get(this.data_db, &IntKey::new(*task_id))?; + let result = bincode::deserialize(bytes)?; + tx.commit()?; + Ok(result) + } + lookup(self, task_id).unwrap_or_default() + } +} diff --git a/turbopack/crates/turbo-tasks-backend/src/utils/chunked_vec.rs b/turbopack/crates/turbo-tasks-backend/src/utils/chunked_vec.rs index 46292f79e5e72..c5e2014715e29 100644 --- a/turbopack/crates/turbo-tasks-backend/src/utils/chunked_vec.rs +++ b/turbopack/crates/turbo-tasks-backend/src/utils/chunked_vec.rs @@ -2,18 +2,25 @@ pub struct ChunkedVec { chunks: Vec>, } +impl Default for ChunkedVec { + fn default() -> Self { + Self::new() + } +} + impl ChunkedVec { pub fn new() -> Self { Self { chunks: Vec::new() } } pub fn len(&self) -> usize { - if let Some(last) = self.chunks.last() { - let free = last.capacity() - self.len(); - cummulative_chunk_size(self.chunks.len() - 1) - free - } else { - 0 + for (i, chunk) in self.chunks.iter().enumerate().rev() { + if !chunk.is_empty() { + let free = chunk.capacity() - chunk.len(); + return cummulative_chunk_size(i) - free; + } } + 0 } pub fn push(&mut self, item: T) { @@ -42,6 +49,10 @@ impl ChunkedVec { len: self.len(), } } + + pub fn is_empty(&self) -> bool { + self.chunks.first().map_or(true, |chunk| chunk.is_empty()) + } } fn chunk_size(chunk_index: usize) -> usize { diff --git a/turbopack/crates/turbo-tasks-backend/src/utils/ptr_eq_arc.rs b/turbopack/crates/turbo-tasks-backend/src/utils/ptr_eq_arc.rs index 7889de80e05ee..fb5bcf4681bb8 100644 --- a/turbopack/crates/turbo-tasks-backend/src/utils/ptr_eq_arc.rs +++ b/turbopack/crates/turbo-tasks-backend/src/utils/ptr_eq_arc.rs @@ -10,6 +10,10 @@ impl PtrEqArc { pub fn new(value: T) -> Self { Self(Arc::new(value)) } + + pub fn arc(&self) -> &Arc { + &self.0 + } } impl From> for PtrEqArc { diff --git a/turbopack/crates/turbo-tasks-backend/tests/test_config.trs b/turbopack/crates/turbo-tasks-backend/tests/test_config.trs index 7387c44aaf3dd..71510aae294a4 100644 --- a/turbopack/crates/turbo-tasks-backend/tests/test_config.trs +++ b/turbopack/crates/turbo-tasks-backend/tests/test_config.trs @@ -1,3 +1,18 @@ -|_name, _initial| { - turbo_tasks::TurboTasks::new(turbo_tasks_backend::TurboTasksBackend::new()) +|_name, initial | { + let path = std::path::PathBuf::from(concat!( + env!("OUT_DIR"), + "/.cache/", + module_path!(), + )); + if initial { + let _ = std::fs::remove_dir_all(&path); + } + std::fs::create_dir_all(&path).unwrap(); + turbo_tasks::TurboTasks::new( + turbo_tasks_backend::TurboTasksBackend::new( + Arc::new(turbo_tasks_backend::LmdbBackingStorage::new( + &path.as_path() + ).unwrap()) + ) + ) } diff --git a/turbopack/crates/turbo-tasks-testing/tests/basic.rs b/turbopack/crates/turbo-tasks-testing/tests/basic.rs index 7cddba2e2ce67..e1ea72133a2e0 100644 --- a/turbopack/crates/turbo-tasks-testing/tests/basic.rs +++ b/turbopack/crates/turbo-tasks-testing/tests/basic.rs @@ -15,9 +15,12 @@ async fn basic() { assert_eq!(output1.await?.value, 123); let input = Value { value: 42 }.cell(); - let output2 = func(input); + let output2 = func_transient(input); assert_eq!(output2.await?.value, 42); + let output3 = func_persistent(output1); + assert_eq!(output3.await?.value, 123); + anyhow::Ok(()) }) .await @@ -30,13 +33,22 @@ struct Value { } #[turbo_tasks::function] -async fn func(input: Vc) -> Result> { +async fn func_transient(input: Vc) -> Result> { + println!("func_transient"); + let value = input.await?.value; + Ok(Value { value }.cell()) +} + +#[turbo_tasks::function] +async fn func_persistent(input: Vc) -> Result> { + println!("func_persistent"); let value = input.await?.value; Ok(Value { value }.cell()) } #[turbo_tasks::function] async fn func_without_args() -> Result> { + println!("func_without_args"); let value = 123; Ok(Value { value }.cell()) } diff --git a/turbopack/crates/turbo-tasks/src/lib.rs b/turbopack/crates/turbo-tasks/src/lib.rs index e111b0e6fa007..f8163e7ad5c24 100644 --- a/turbopack/crates/turbo-tasks/src/lib.rs +++ b/turbopack/crates/turbo-tasks/src/lib.rs @@ -108,7 +108,7 @@ pub use read_ref::ReadRef; use rustc_hash::FxHasher; pub use serialization_invalidation::SerializationInvalidator; pub use state::{State, TransientState}; -pub use task::{task_input::TaskInput, SharedReference}; +pub use task::{task_input::TaskInput, SharedReference, TypedSharedReference}; pub use trait_ref::{IntoTraitRef, TraitRef}; pub use turbo_tasks_macros::{function, value_impl, value_trait, KeyValuePair, TaskInput}; pub use value::{TransientInstance, TransientValue, Value}; diff --git a/turbopack/crates/turbo-tasks/src/task/mod.rs b/turbopack/crates/turbo-tasks/src/task/mod.rs index 14e192ebd3def..5fcba182a25a9 100644 --- a/turbopack/crates/turbo-tasks/src/task/mod.rs +++ b/turbopack/crates/turbo-tasks/src/task/mod.rs @@ -6,6 +6,6 @@ pub(crate) mod task_output; pub use from_task_input::FromTaskInput; pub use function::{AsyncFunctionMode, FunctionMode, IntoTaskFn, TaskFn}; -pub use shared_reference::SharedReference; +pub use shared_reference::{SharedReference, TypedSharedReference}; pub use task_input::TaskInput; pub use task_output::TaskOutput; From c9f303a37aada270f45e53faac68e7e677ea1fe6 Mon Sep 17 00:00:00 2001 From: Tobias Koppers Date: Fri, 9 Aug 2024 10:50:05 +0200 Subject: [PATCH 02/46] pass test name to test_config to construct db name --- .../crates/turbo-tasks-backend/tests/test_config.trs | 9 ++++----- 1 file changed, 4 insertions(+), 5 deletions(-) diff --git a/turbopack/crates/turbo-tasks-backend/tests/test_config.trs b/turbopack/crates/turbo-tasks-backend/tests/test_config.trs index 71510aae294a4..2f4c2421a61e7 100644 --- a/turbopack/crates/turbo-tasks-backend/tests/test_config.trs +++ b/turbopack/crates/turbo-tasks-backend/tests/test_config.trs @@ -1,9 +1,8 @@ -|_name, initial | { - let path = std::path::PathBuf::from(concat!( +|name, initial| { + let path = std::path::PathBuf::from(format!(concat!( env!("OUT_DIR"), - "/.cache/", - module_path!(), - )); + "/.cache/{}", + ), name)); if initial { let _ = std::fs::remove_dir_all(&path); } From a08b82254e45b73699445e8b4f9c25521f142e59 Mon Sep 17 00:00:00 2001 From: Tobias Koppers Date: Fri, 9 Aug 2024 11:48:36 +0200 Subject: [PATCH 03/46] continue uncompleted operations --- .../turbo-tasks-backend/src/backend/mod.rs | 10 ++++++++ .../src/backend/operation/mod.rs | 16 +++++++++++++ .../src/backing_storage.rs | 1 + .../src/lmdb_backing_storage.rs | 24 +++++++++++++------ 4 files changed, 44 insertions(+), 7 deletions(-) diff --git a/turbopack/crates/turbo-tasks-backend/src/backend/mod.rs b/turbopack/crates/turbo-tasks-backend/src/backend/mod.rs index 2fe67fe4f1eb8..f9f7696de2cff 100644 --- a/turbopack/crates/turbo-tasks-backend/src/backend/mod.rs +++ b/turbopack/crates/turbo-tasks-backend/src/backend/mod.rs @@ -534,6 +534,16 @@ impl TurboTasksBackend { impl Backend for TurboTasksBackend { fn startup(&self, turbo_tasks: &dyn TurboTasksBackendApi) { + // Continue all uncompleted operations + // They can't be interrupted by a snapshot since the snapshotting job has not been scheduled + // yet. + let uncompleted_operations = self.backing_storage.uncompleted_operations(); + let ctx = self.execute_context(turbo_tasks); + for op in uncompleted_operations { + op.execute(&ctx); + } + + // Schedule the snapshot job turbo_tasks.schedule_backend_background_job(BackendJobId::from(1)); } diff --git a/turbopack/crates/turbo-tasks-backend/src/backend/operation/mod.rs b/turbopack/crates/turbo-tasks-backend/src/backend/operation/mod.rs index e2d1f30f2a61d..83acabd47af8c 100644 --- a/turbopack/crates/turbo-tasks-backend/src/backend/operation/mod.rs +++ b/turbopack/crates/turbo-tasks-backend/src/backend/operation/mod.rs @@ -362,6 +362,22 @@ pub enum AnyOperation { Nested(Vec), } +impl AnyOperation { + pub fn execute(self, ctx: &ExecuteContext<'_>) { + match self { + AnyOperation::ConnectChild(op) => op.execute(ctx), + AnyOperation::Invalidate(op) => op.execute(ctx), + AnyOperation::CleanupOldEdges(op) => op.execute(ctx), + AnyOperation::AggregationUpdate(op) => op.execute(ctx), + AnyOperation::Nested(ops) => { + for op in ops { + op.execute(ctx); + } + } + } + } +} + impl_operation!(ConnectChild connect_child::ConnectChildOperation); impl_operation!(Invalidate invalidate::InvalidateOperation); impl_operation!(CleanupOldEdges cleanup_old_edges::CleanupOldEdgesOperation); diff --git a/turbopack/crates/turbo-tasks-backend/src/backing_storage.rs b/turbopack/crates/turbo-tasks-backend/src/backing_storage.rs index 6b6707c476e51..cb1b2da8a4129 100644 --- a/turbopack/crates/turbo-tasks-backend/src/backing_storage.rs +++ b/turbopack/crates/turbo-tasks-backend/src/backing_storage.rs @@ -11,6 +11,7 @@ use crate::{ pub trait BackingStorage { fn next_free_task_id(&self) -> TaskId; + fn uncompleted_operations(&self) -> Vec; fn save_snapshot( &self, operations: Vec>, diff --git a/turbopack/crates/turbo-tasks-backend/src/lmdb_backing_storage.rs b/turbopack/crates/turbo-tasks-backend/src/lmdb_backing_storage.rs index 5239eb18dda82..5562243069a59 100644 --- a/turbopack/crates/turbo-tasks-backend/src/lmdb_backing_storage.rs +++ b/turbopack/crates/turbo-tasks-backend/src/lmdb_backing_storage.rs @@ -75,13 +75,23 @@ impl LmdbBackingStorage { impl BackingStorage for LmdbBackingStorage { fn next_free_task_id(&self) -> TaskId { - let Ok(tx) = self.env.begin_ro_txn() else { - return TaskId::from(1); - }; - let next_free_task_id = - as_u32(tx.get(self.meta_db, &IntKey::new(META_KEY_NEXT_FREE_TASK_ID))).unwrap_or(1); - let _ = tx.commit(); - TaskId::from(next_free_task_id) + fn get(this: &LmdbBackingStorage) -> Result { + let tx = this.env.begin_rw_txn()?; + let next_free_task_id = + as_u32(tx.get(this.meta_db, &IntKey::new(META_KEY_NEXT_FREE_TASK_ID)))?; + Ok(next_free_task_id) + } + TaskId::from(get(self).unwrap_or(1)) + } + + fn uncompleted_operations(&self) -> Vec { + fn get(this: &LmdbBackingStorage) -> Result> { + let tx = this.env.begin_ro_txn()?; + let operations = tx.get(this.meta_db, &IntKey::new(META_KEY_OPERATIONS))?; + let operations = bincode::deserialize(operations)?; + Ok(operations) + } + get(self).unwrap_or_default() } fn save_snapshot( From 77d525804ced2e8a515adbc6b044d7376d09ef51 Mon Sep 17 00:00:00 2001 From: Tobias Koppers Date: Mon, 12 Aug 2024 15:37:15 +0200 Subject: [PATCH 04/46] create dir and logging --- .../src/lmdb_backing_storage.rs | 19 +++++++++++++++++++ 1 file changed, 19 insertions(+) diff --git a/turbopack/crates/turbo-tasks-backend/src/lmdb_backing_storage.rs b/turbopack/crates/turbo-tasks-backend/src/lmdb_backing_storage.rs index 5562243069a59..e2a704965a6ca 100644 --- a/turbopack/crates/turbo-tasks-backend/src/lmdb_backing_storage.rs +++ b/turbopack/crates/turbo-tasks-backend/src/lmdb_backing_storage.rs @@ -1,9 +1,11 @@ use std::{ collections::{hash_map::Entry, HashMap}, error::Error, + fs::create_dir_all, path::Path, sync::Arc, thread::available_parallelism, + time::Instant, }; use anyhow::Result; @@ -50,6 +52,7 @@ pub struct LmdbBackingStorage { impl LmdbBackingStorage { pub fn new(path: &Path) -> Result { + create_dir_all(path)?; println!("opening lmdb {:?}", path); let env = Environment::new() .set_flags(EnvironmentFlags::WRITE_MAP | EnvironmentFlags::NO_META_SYNC) @@ -100,6 +103,14 @@ impl BackingStorage for LmdbBackingStorage { task_cache_updates: ChunkedVec<(Arc, TaskId)>, data_updates: ChunkedVec, ) -> Result<()> { + println!( + "Persisting {} operations, {} task cache updates, {} data updates...", + operations.len(), + task_cache_updates.len(), + data_updates.len() + ); + let start = Instant::now(); + let mut op_count = 0; let mut tx = self.env.begin_rw_txn()?; let mut next_task_id = as_u32(tx.get(self.meta_db, &IntKey::new(META_KEY_NEXT_FREE_TASK_ID))).unwrap_or(1); @@ -118,6 +129,7 @@ impl BackingStorage for LmdbBackingStorage { &task_type, WriteFlags::empty(), )?; + op_count += 2; next_task_id = next_task_id.max(task_id + 1); } tx.put( @@ -133,6 +145,8 @@ impl BackingStorage for LmdbBackingStorage { &operations, WriteFlags::empty(), )?; + op_count += 2; + let mut updated_items: HashMap> = HashMap::new(); for CachedDataUpdate { task, key, value } in data_updates.into_iter() { @@ -168,8 +182,13 @@ impl BackingStorage for LmdbBackingStorage { &value, WriteFlags::empty(), )?; + op_count += 1; } tx.commit()?; + println!( + "Persisted {op_count} db entries after {:?}", + start.elapsed() + ); Ok(()) } From 9719916dfcf82c381af44a4cfa9833945a7e9eed Mon Sep 17 00:00:00 2001 From: Tobias Koppers Date: Tue, 13 Aug 2024 15:29:52 +0200 Subject: [PATCH 05/46] improve error messages --- .../src/lmdb_backing_storage.rs | 31 ++++++++++++------- 1 file changed, 20 insertions(+), 11 deletions(-) diff --git a/turbopack/crates/turbo-tasks-backend/src/lmdb_backing_storage.rs b/turbopack/crates/turbo-tasks-backend/src/lmdb_backing_storage.rs index e2a704965a6ca..eb06ff4d74940 100644 --- a/turbopack/crates/turbo-tasks-backend/src/lmdb_backing_storage.rs +++ b/turbopack/crates/turbo-tasks-backend/src/lmdb_backing_storage.rs @@ -8,7 +8,7 @@ use std::{ time::Instant, }; -use anyhow::Result; +use anyhow::{anyhow, Context, Result}; use lmdb::{Database, DatabaseFlags, Environment, EnvironmentFlags, Transaction, WriteFlags}; use turbo_tasks::{backend::CachedTaskType, KeyValuePair, TaskId}; @@ -116,19 +116,22 @@ impl BackingStorage for LmdbBackingStorage { as_u32(tx.get(self.meta_db, &IntKey::new(META_KEY_NEXT_FREE_TASK_ID))).unwrap_or(1); for (task_type, task_id) in task_cache_updates.iter() { let task_id = **task_id; - let task_type = bincode::serialize(&task_type)?; + let task_type_bytes = bincode::serialize(&task_type) + .with_context(|| anyhow!("Unable to serialize task cache key {task_type:?}"))?; tx.put( self.forward_task_cache_db, - &task_type, + &task_type_bytes, &task_id.to_be_bytes(), WriteFlags::empty(), - )?; + ) + .with_context(|| anyhow!("Unable to write task cache {task_type:?} => {task_id}"))?; tx.put( self.reverse_task_cache_db, &IntKey::new(task_id), - &task_type, + &task_type_bytes, WriteFlags::empty(), - )?; + ) + .with_context(|| anyhow!("Unable to write task cache {task_id} => {task_type:?}"))?; op_count += 2; next_task_id = next_task_id.max(task_id + 1); } @@ -137,14 +140,16 @@ impl BackingStorage for LmdbBackingStorage { &IntKey::new(META_KEY_NEXT_FREE_TASK_ID), &next_task_id.to_be_bytes(), WriteFlags::empty(), - )?; + ) + .with_context(|| anyhow!("Unable to write next free task id"))?; let operations = bincode::serialize(&operations)?; tx.put( self.meta_db, &IntKey::new(META_KEY_OPERATIONS), &operations, WriteFlags::empty(), - )?; + ) + .with_context(|| anyhow!("Unable to write operations"))?; op_count += 2; let mut updated_items: HashMap> = @@ -175,16 +180,20 @@ impl BackingStorage for LmdbBackingStorage { .into_iter() .map(|(key, value)| CachedDataItem::from_key_and_value(key, value)) .collect(); - let value = bincode::serialize(&vec)?; + let value = bincode::serialize(&vec).with_context(|| { + anyhow!("Unable to serialize data items for {task_id}: {vec:#?}") + })?; tx.put( self.data_db, &IntKey::new(*task_id), &value, WriteFlags::empty(), - )?; + ) + .with_context(|| anyhow!("Unable to write data items for {task_id}"))?; op_count += 1; } - tx.commit()?; + tx.commit() + .with_context(|| anyhow!("Unable to commit operations"))?; println!( "Persisted {op_count} db entries after {:?}", start.elapsed() From df96007d49e11b3636a481fc414fc4795d805762 Mon Sep 17 00:00:00 2001 From: Tobias Koppers Date: Tue, 13 Aug 2024 15:30:48 +0200 Subject: [PATCH 06/46] handle keys larger than 511 bytes --- Cargo.lock | 5 +- .../crates/turbo-tasks-backend/Cargo.toml | 1 + .../src/lmdb_backing_storage.rs | 8 +- .../src/lmdb_backing_storage/extended_key.rs | 104 ++++++++++++++++++ 4 files changed, 113 insertions(+), 5 deletions(-) create mode 100644 turbopack/crates/turbo-tasks-backend/src/lmdb_backing_storage/extended_key.rs diff --git a/Cargo.lock b/Cargo.lock index 6d0d8ef2cbe91..99d723f5af141 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -920,9 +920,9 @@ checksum = "17febce684fd15d89027105661fec94afb475cb995fbc59d2865198446ba2eea" [[package]] name = "byteorder" -version = "1.4.3" +version = "1.5.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "14c189c53d098945499cdfa7ecc63567cf3886b3332b312a5b4585d8d3a6a610" +checksum = "1fd0f2584146f6f2ef48085050886acf353beff7305ebd1ae69500e27c67f64b" [[package]] name = "bytes" @@ -8597,6 +8597,7 @@ dependencies = [ "async-trait", "auto-hash-map", "bincode", + "byteorder", "dashmap", "either", "hashbrown 0.14.5", diff --git a/turbopack/crates/turbo-tasks-backend/Cargo.toml b/turbopack/crates/turbo-tasks-backend/Cargo.toml index df94cd7602cc3..d30f421fc1995 100644 --- a/turbopack/crates/turbo-tasks-backend/Cargo.toml +++ b/turbopack/crates/turbo-tasks-backend/Cargo.toml @@ -17,6 +17,7 @@ anyhow = { workspace = true } async-trait = { workspace = true } auto-hash-map = { workspace = true } bincode = "1.3.3" +byteorder = "1.5.0" dashmap = { workspace = true, features = ["raw-api"]} either = { workspace = true } hashbrown = { workspace = true } diff --git a/turbopack/crates/turbo-tasks-backend/src/lmdb_backing_storage.rs b/turbopack/crates/turbo-tasks-backend/src/lmdb_backing_storage.rs index eb06ff4d74940..e185f43f307fa 100644 --- a/turbopack/crates/turbo-tasks-backend/src/lmdb_backing_storage.rs +++ b/turbopack/crates/turbo-tasks-backend/src/lmdb_backing_storage.rs @@ -1,3 +1,5 @@ +mod extended_key; + use std::{ collections::{hash_map::Entry, HashMap}, error::Error, @@ -118,7 +120,8 @@ impl BackingStorage for LmdbBackingStorage { let task_id = **task_id; let task_type_bytes = bincode::serialize(&task_type) .with_context(|| anyhow!("Unable to serialize task cache key {task_type:?}"))?; - tx.put( + extended_key::put( + &mut tx, self.forward_task_cache_db, &task_type_bytes, &task_id.to_be_bytes(), @@ -204,8 +207,7 @@ impl BackingStorage for LmdbBackingStorage { fn forward_lookup_task_cache(&self, task_type: &CachedTaskType) -> Option { let tx = self.env.begin_ro_txn().ok()?; let task_type = bincode::serialize(task_type).ok()?; - let result = tx - .get(self.forward_task_cache_db, &task_type) + let result = extended_key::get(&tx, self.forward_task_cache_db, &task_type) .ok() .and_then(|v| v.try_into().ok()) .map(|v| TaskId::from(u32::from_be_bytes(v))); diff --git a/turbopack/crates/turbo-tasks-backend/src/lmdb_backing_storage/extended_key.rs b/turbopack/crates/turbo-tasks-backend/src/lmdb_backing_storage/extended_key.rs new file mode 100644 index 0000000000000..3e87669c2563c --- /dev/null +++ b/turbopack/crates/turbo-tasks-backend/src/lmdb_backing_storage/extended_key.rs @@ -0,0 +1,104 @@ +use std::hash::{Hash, Hasher}; + +use byteorder::ByteOrder; +use lmdb::{Database, RoTransaction, RwTransaction, Transaction, WriteFlags}; +use rustc_hash::FxHasher; + +const MAX_KEY_SIZE: usize = 511; +const SHARED_KEY: usize = MAX_KEY_SIZE - 8; + +pub fn get<'tx>( + tx: &'tx RoTransaction<'tx>, + database: Database, + key: &[u8], +) -> lmdb::Result<&'tx [u8]> { + if key.len() > MAX_KEY_SIZE - 1 { + let hashed_key = hashed_key(key); + let data = tx.get(database, &hashed_key)?; + let mut iter = ExtendedValueIter::new(data); + while let Some((k, v)) = iter.next() { + if k == key { + return Ok(v); + } + } + Err(lmdb::Error::NotFound) + } else { + tx.get(database, &key) + } +} + +pub fn put( + tx: &mut RwTransaction<'_>, + database: Database, + key: &[u8], + value: &[u8], + flags: WriteFlags, +) -> lmdb::Result<()> { + if key.len() > MAX_KEY_SIZE - 1 { + let hashed_key = hashed_key(key); + + let size = key.len() - SHARED_KEY + value.len() + 8; + let old = tx.get(database, &hashed_key); + let old_size = old.map_or(0, |v| v.len()); + let mut data = Vec::with_capacity(old_size + size); + data.extend_from_slice(&((key.len() - SHARED_KEY) as u32).to_be_bytes()); + data.extend_from_slice(&(value.len() as u32).to_be_bytes()); + data.extend_from_slice(&key[SHARED_KEY..]); + data.extend_from_slice(value); + if let Ok(old) = old { + let mut iter = ExtendedValueIter::new(old); + while let Some((k, v)) = iter.next() { + if k != &key[SHARED_KEY..] { + data.extend_from_slice(&(k.len() as u32).to_be_bytes()); + data.extend_from_slice(&(v.len() as u32).to_be_bytes()); + data.extend_from_slice(k); + data.extend_from_slice(v); + } + } + }; + + tx.put(database, &hashed_key, &data, flags)?; + Ok(()) + } else { + tx.put(database, &key, &value, flags) + } +} + +fn hashed_key(key: &[u8]) -> [u8; MAX_KEY_SIZE] { + let mut result = [0; MAX_KEY_SIZE]; + let mut hash = FxHasher::default(); + key.hash(&mut hash); + byteorder::BigEndian::write_u64(&mut result, hash.finish()); + result[8..].copy_from_slice(&key[0..SHARED_KEY]); + result +} + +struct ExtendedValueIter<'a> { + data: &'a [u8], + pos: usize, +} + +impl<'a> Iterator for ExtendedValueIter<'a> { + type Item = (&'a [u8], &'a [u8]); + + fn next(&mut self) -> Option { + if self.pos >= self.data.len() { + return None; + } + let key_len = byteorder::BigEndian::read_u32(&self.data[self.pos..]) as usize; + self.pos += 4; + let value_len = byteorder::BigEndian::read_u32(&self.data[self.pos..]) as usize; + self.pos += 4; + let key = &self.data[self.pos..self.pos + key_len]; + self.pos += key_len; + let value = &self.data[self.pos..self.pos + value_len]; + self.pos += value_len; + Some((key, value)) + } +} + +impl<'a> ExtendedValueIter<'a> { + fn new(data: &'a [u8]) -> Self { + Self { data, pos: 0 } + } +} From 7c092e745548fd418af7d301399ea8bbba8d135e Mon Sep 17 00:00:00 2001 From: Tobias Koppers Date: Wed, 14 Aug 2024 21:01:26 +0200 Subject: [PATCH 07/46] avoid storing transient tasks --- .../turbo-tasks-backend/src/backend/operation/mod.rs | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/turbopack/crates/turbo-tasks-backend/src/backend/operation/mod.rs b/turbopack/crates/turbo-tasks-backend/src/backend/operation/mod.rs index 83acabd47af8c..9ef44dc9f19e1 100644 --- a/turbopack/crates/turbo-tasks-backend/src/backend/operation/mod.rs +++ b/turbopack/crates/turbo-tasks-backend/src/backend/operation/mod.rs @@ -173,7 +173,7 @@ impl TaskGuard<'_> { #[must_use] pub fn add(&mut self, item: CachedDataItem) -> bool { - if !item.is_persistent() { + if self.task_id.is_transient() || !item.is_persistent() { self.task.add(item) } else if self.task.add(item.clone()) { let (key, value) = item.into_key_and_value(); @@ -199,7 +199,7 @@ impl TaskGuard<'_> { pub fn insert(&mut self, item: CachedDataItem) -> Option { let (key, value) = item.into_key_and_value(); - if !key.is_persistent() { + if self.task_id.is_transient() || !key.is_persistent() { self.task .insert(CachedDataItem::from_key_and_value(key, value)) } else if value.is_persistent() { @@ -243,7 +243,7 @@ impl TaskGuard<'_> { key: &CachedDataItemKey, update: impl FnOnce(Option) -> Option, ) { - if !key.is_persistent() { + if self.task_id.is_transient() || !key.is_persistent() { self.task.update(key, update); return; } @@ -288,7 +288,7 @@ impl TaskGuard<'_> { pub fn remove(&mut self, key: &CachedDataItemKey) -> Option { let old_value = self.task.remove(key); if let Some(value) = old_value { - if key.is_persistent() && value.is_persistent() { + if !self.task_id.is_transient() && key.is_persistent() && value.is_persistent() { let key = key.clone(); self.task.persistance_state_mut().add_persisting_item(); self.backend From 3e9d86f2663465a593a2e01e18fd5d0117e9c567 Mon Sep 17 00:00:00 2001 From: Tobias Koppers Date: Wed, 14 Aug 2024 12:35:04 +0200 Subject: [PATCH 08/46] show lookup error --- .../src/lmdb_backing_storage.rs | 16 ++++++++++++++-- 1 file changed, 14 insertions(+), 2 deletions(-) diff --git a/turbopack/crates/turbo-tasks-backend/src/lmdb_backing_storage.rs b/turbopack/crates/turbo-tasks-backend/src/lmdb_backing_storage.rs index e185f43f307fa..69f82182e0103 100644 --- a/turbopack/crates/turbo-tasks-backend/src/lmdb_backing_storage.rs +++ b/turbopack/crates/turbo-tasks-backend/src/lmdb_backing_storage.rs @@ -229,11 +229,23 @@ impl BackingStorage for LmdbBackingStorage { fn lookup_data(&self, task_id: TaskId) -> Vec { fn lookup(this: &LmdbBackingStorage, task_id: TaskId) -> Result> { let tx = this.env.begin_ro_txn()?; - let bytes = tx.get(this.data_db, &IntKey::new(*task_id))?; + let bytes = match tx.get(this.data_db, &IntKey::new(*task_id)) { + Ok(bytes) => bytes, + Err(err) => { + if err == lmdb::Error::NotFound { + return Ok(Vec::new()); + } else { + return Err(err.into()); + } + } + }; let result = bincode::deserialize(bytes)?; tx.commit()?; Ok(result) } - lookup(self, task_id).unwrap_or_default() + let result = lookup(self, task_id) + .inspect_err(|err| println!("Looking up data for {task_id} failed: {err:?}")) + .unwrap_or_default(); + result } } From f190d6f30d8b30734923ec839d2626557aade8cb Mon Sep 17 00:00:00 2001 From: Tobias Koppers Date: Wed, 14 Aug 2024 09:39:53 +0200 Subject: [PATCH 09/46] handle state serialization --- .../turbo-tasks-backend/src/backend/mod.rs | 24 +++++++++++++++++++ .../src/utils/chunked_vec.rs | 8 +++++++ 2 files changed, 32 insertions(+) diff --git a/turbopack/crates/turbo-tasks-backend/src/backend/mod.rs b/turbopack/crates/turbo-tasks-backend/src/backend/mod.rs index f9f7696de2cff..7b56bb1f761a5 100644 --- a/turbopack/crates/turbo-tasks-backend/src/backend/mod.rs +++ b/turbopack/crates/turbo-tasks-backend/src/backend/mod.rs @@ -632,6 +632,30 @@ impl Backend for TurboTasksBackend { ); } + fn invalidate_serialization( + &self, + task_id: TaskId, + turbo_tasks: &dyn TurboTasksBackendApi, + ) { + let ctx = self.execute_context(turbo_tasks); + let task = ctx.task(task_id); + let cell_data = task + .iter(CachedDataItemIndex::CellData) + .filter_map(|(key, value)| match (key, value) { + (CachedDataItemKey::CellData { cell }, CachedDataItemValue::CellData { value }) => { + Some(CachedDataUpdate { + task: task_id, + key: CachedDataItemKey::CellData { cell: *cell }, + value: Some(CachedDataItemValue::CellData { + value: value.clone(), + }), + }) + } + _ => None, + }); + self.persisted_storage_log.lock().extend(cell_data); + } + fn get_task_description(&self, task: TaskId) -> std::string::String { let task_type = self.lookup_task_type(task).expect("Task not found"); task_type.to_string() diff --git a/turbopack/crates/turbo-tasks-backend/src/utils/chunked_vec.rs b/turbopack/crates/turbo-tasks-backend/src/utils/chunked_vec.rs index c5e2014715e29..fd9ad98f57011 100644 --- a/turbopack/crates/turbo-tasks-backend/src/utils/chunked_vec.rs +++ b/turbopack/crates/turbo-tasks-backend/src/utils/chunked_vec.rs @@ -55,6 +55,14 @@ impl ChunkedVec { } } +impl Extend for ChunkedVec { + fn extend>(&mut self, iter: I) { + for item in iter { + self.push(item); + } + } +} + fn chunk_size(chunk_index: usize) -> usize { 8 << chunk_index } From c4a2414aa15ee2b302c5b134209be223ccb143d0 Mon Sep 17 00:00:00 2001 From: Tobias Koppers Date: Wed, 14 Aug 2024 09:49:39 +0200 Subject: [PATCH 10/46] validate serialization and improve errors --- Cargo.lock | 1 + .../crates/turbo-tasks-backend/Cargo.toml | 1 + .../crates/turbo-tasks-backend/src/data.rs | 11 +++ .../src/lmdb_backing_storage.rs | 75 +++++++++++++++++-- .../src/utils/chunked_vec.rs | 6 ++ .../crates/turbo-tasks/src/value_type.rs | 4 + 6 files changed, 92 insertions(+), 6 deletions(-) diff --git a/Cargo.lock b/Cargo.lock index 99d723f5af141..56457fb45ec1d 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -8608,6 +8608,7 @@ dependencies = [ "rand", "rustc-hash", "serde", + "serde_path_to_error", "smallvec", "tokio", "tracing", diff --git a/turbopack/crates/turbo-tasks-backend/Cargo.toml b/turbopack/crates/turbo-tasks-backend/Cargo.toml index d30f421fc1995..c1e6386853b96 100644 --- a/turbopack/crates/turbo-tasks-backend/Cargo.toml +++ b/turbopack/crates/turbo-tasks-backend/Cargo.toml @@ -28,6 +28,7 @@ parking_lot = { workspace = true } rand = { workspace = true } rustc-hash = { workspace = true } serde = { workspace = true } +serde_path_to_error = { workspace = true } smallvec = { workspace = true } tokio = { workspace = true } tracing = { workspace = true } diff --git a/turbopack/crates/turbo-tasks-backend/src/data.rs b/turbopack/crates/turbo-tasks-backend/src/data.rs index ce3f99e989b72..66eba2948e401 100644 --- a/turbopack/crates/turbo-tasks-backend/src/data.rs +++ b/turbopack/crates/turbo-tasks-backend/src/data.rs @@ -1,6 +1,7 @@ use serde::{Deserialize, Serialize}; use turbo_tasks::{ event::{Event, EventListener}, + registry, util::SharedError, CellId, KeyValuePair, TaskId, TypedSharedReference, ValueTypeId, }; @@ -281,6 +282,13 @@ impl CachedDataItem { } } + pub fn is_optional(&self) -> bool { + match self { + CachedDataItem::CellData { .. } => true, + _ => false, + } + } + pub fn new_scheduled(description: impl Fn() -> String + Sync + Send + 'static) -> Self { CachedDataItem::InProgress { value: InProgressState::Scheduled { @@ -408,6 +416,9 @@ impl CachedDataItemValue { pub fn is_persistent(&self) -> bool { match self { CachedDataItemValue::Output { value } => !value.is_transient(), + CachedDataItemValue::CellData { value } => { + registry::get_value_type(value.0).is_serializable() + } _ => true, } } diff --git a/turbopack/crates/turbo-tasks-backend/src/lmdb_backing_storage.rs b/turbopack/crates/turbo-tasks-backend/src/lmdb_backing_storage.rs index 69f82182e0103..02adcfb23be9d 100644 --- a/turbopack/crates/turbo-tasks-backend/src/lmdb_backing_storage.rs +++ b/turbopack/crates/turbo-tasks-backend/src/lmdb_backing_storage.rs @@ -11,6 +11,7 @@ use std::{ }; use anyhow::{anyhow, Context, Result}; +use bincode::Options; use lmdb::{Database, DatabaseFlags, Environment, EnvironmentFlags, Transaction, WriteFlags}; use turbo_tasks::{backend::CachedTaskType, KeyValuePair, TaskId}; @@ -145,7 +146,8 @@ impl BackingStorage for LmdbBackingStorage { WriteFlags::empty(), ) .with_context(|| anyhow!("Unable to write next free task id"))?; - let operations = bincode::serialize(&operations)?; + let operations = bincode::serialize(&operations) + .with_context(|| anyhow!("Unable to serialize operations"))?; tx.put( self.meta_db, &IntKey::new(META_KEY_OPERATIONS), @@ -163,7 +165,20 @@ impl BackingStorage for LmdbBackingStorage { Entry::Vacant(entry) => { let mut map = HashMap::new(); if let Ok(old_data) = tx.get(self.data_db, &IntKey::new(*task)) { - let old_data: Vec = bincode::deserialize(old_data)?; + let old_data: Vec = match bincode::deserialize(old_data) { + Ok(d) => d, + Err(_) => serde_path_to_error::deserialize( + &mut bincode::Deserializer::from_slice( + old_data, + bincode::DefaultOptions::new() + .with_fixint_encoding() + .allow_trailing_bytes(), + ), + ) + .with_context(|| { + anyhow!("Unable to deserialize old value of {task}: {old_data:?}") + })?, + }; for item in old_data { let (key, value) = item.into_key_and_value(); map.insert(key, value); @@ -179,13 +194,61 @@ impl BackingStorage for LmdbBackingStorage { } } for (task_id, data) in updated_items { - let vec: Vec = data + let mut vec: Vec = data .into_iter() .map(|(key, value)| CachedDataItem::from_key_and_value(key, value)) .collect(); - let value = bincode::serialize(&vec).with_context(|| { - anyhow!("Unable to serialize data items for {task_id}: {vec:#?}") - })?; + let value = match bincode::serialize(&vec) { + // Ok(value) => value, + Ok(_) | Err(_) => { + let mut error = Ok(()); + vec.retain(|item| { + let mut buf = Vec::::new(); + let mut serializer = bincode::Serializer::new( + &mut buf, + bincode::DefaultOptions::new() + .with_fixint_encoding() + .allow_trailing_bytes(), + ); + if let Err(err) = serde_path_to_error::serialize(item, &mut serializer) { + if item.is_optional() { + println!("Skipping non-serializable optional item: {item:?}"); + } else { + error = Err(err).context({ + anyhow!( + "Unable to serialize data item for {task_id}: {item:#?}" + ) + }); + } + false + } else { + let deserialize: Result = + serde_path_to_error::deserialize( + &mut bincode::Deserializer::from_slice( + &buf, + bincode::DefaultOptions::new() + .with_fixint_encoding() + .allow_trailing_bytes(), + ), + ); + if let Err(err) = deserialize { + println!( + "Data item would not be deserializable {task_id}: \ + {err:?}\n{item:#?}" + ); + false + } else { + true + } + } + }); + error?; + + bincode::serialize(&vec).with_context(|| { + anyhow!("Unable to serialize data items for {task_id}: {vec:#?}") + })? + } + }; tx.put( self.data_db, &IntKey::new(*task_id), diff --git a/turbopack/crates/turbo-tasks-backend/src/utils/chunked_vec.rs b/turbopack/crates/turbo-tasks-backend/src/utils/chunked_vec.rs index fd9ad98f57011..2d71fd13851d2 100644 --- a/turbopack/crates/turbo-tasks-backend/src/utils/chunked_vec.rs +++ b/turbopack/crates/turbo-tasks-backend/src/utils/chunked_vec.rs @@ -35,6 +35,12 @@ impl ChunkedVec { self.chunks.push(chunk); } + pub fn extend>(&mut self, iter: I) { + for item in iter { + self.push(item); + } + } + pub fn into_iter(self) -> impl Iterator { let len = self.len(); ExactSizeIter { diff --git a/turbopack/crates/turbo-tasks/src/value_type.rs b/turbopack/crates/turbo-tasks/src/value_type.rs index 067df22ca9b20..2702e359caf26 100644 --- a/turbopack/crates/turbo-tasks/src/value_type.rs +++ b/turbopack/crates/turbo-tasks/src/value_type.rs @@ -168,6 +168,10 @@ impl ValueType { } } + pub fn is_serializable(&self) -> bool { + self.any_serialization.is_some() + } + pub fn get_magic_deserialize_seed(&self) -> Option { self.magic_serialization.map(|s| s.1) } From f76a16e2db114de95fe2715c5740e56867a9c560 Mon Sep 17 00:00:00 2001 From: Tobias Koppers Date: Fri, 16 Aug 2024 07:48:53 +0200 Subject: [PATCH 11/46] add turbo_tasks_backend to tracing, add tracing for restore --- .../src/lmdb_backing_storage.rs | 14 +++++++++++--- .../turbopack-trace-utils/src/tracing_presets.rs | 1 + 2 files changed, 12 insertions(+), 3 deletions(-) diff --git a/turbopack/crates/turbo-tasks-backend/src/lmdb_backing_storage.rs b/turbopack/crates/turbo-tasks-backend/src/lmdb_backing_storage.rs index 02adcfb23be9d..bec95a0da4f4b 100644 --- a/turbopack/crates/turbo-tasks-backend/src/lmdb_backing_storage.rs +++ b/turbopack/crates/turbo-tasks-backend/src/lmdb_backing_storage.rs @@ -13,6 +13,7 @@ use std::{ use anyhow::{anyhow, Context, Result}; use bincode::Options; use lmdb::{Database, DatabaseFlags, Environment, EnvironmentFlags, Transaction, WriteFlags}; +use tracing::Span; use turbo_tasks::{backend::CachedTaskType, KeyValuePair, TaskId}; use crate::{ @@ -290,7 +291,12 @@ impl BackingStorage for LmdbBackingStorage { } fn lookup_data(&self, task_id: TaskId) -> Vec { - fn lookup(this: &LmdbBackingStorage, task_id: TaskId) -> Result> { + let span = tracing::trace_span!("restore data", bytes = 0usize, items = 0usize); + fn lookup( + this: &LmdbBackingStorage, + task_id: TaskId, + span: &Span, + ) -> Result> { let tx = this.env.begin_ro_txn()?; let bytes = match tx.get(this.data_db, &IntKey::new(*task_id)) { Ok(bytes) => bytes, @@ -302,11 +308,13 @@ impl BackingStorage for LmdbBackingStorage { } } }; - let result = bincode::deserialize(bytes)?; + span.record("bytes", bytes.len()); + let result: Vec = bincode::deserialize(bytes)?; + span.record("items", result.len()); tx.commit()?; Ok(result) } - let result = lookup(self, task_id) + let result = lookup(self, task_id, &span) .inspect_err(|err| println!("Looking up data for {task_id} failed: {err:?}")) .unwrap_or_default(); result diff --git a/turbopack/crates/turbopack-trace-utils/src/tracing_presets.rs b/turbopack/crates/turbopack-trace-utils/src/tracing_presets.rs index eb8ea35900740..2adea79d313c2 100644 --- a/turbopack/crates/turbopack-trace-utils/src/tracing_presets.rs +++ b/turbopack/crates/turbopack-trace-utils/src/tracing_presets.rs @@ -69,6 +69,7 @@ pub static TRACING_TURBO_TASKS_TARGETS: Lazy> = Lazy::new(|| { "turbo_tasks_fs=trace", "turbo_tasks_hash=trace", "turbo_tasks_memory=trace", + "turbo_tasks_backend=trace", ], ] .concat() From e0fd0367d723c16f129b448259d9c5a67d2633b4 Mon Sep 17 00:00:00 2001 From: Tobias Koppers Date: Fri, 16 Aug 2024 07:49:26 +0200 Subject: [PATCH 12/46] disable TLS --- .../crates/turbo-tasks-backend/src/lmdb_backing_storage.rs | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/turbopack/crates/turbo-tasks-backend/src/lmdb_backing_storage.rs b/turbopack/crates/turbo-tasks-backend/src/lmdb_backing_storage.rs index bec95a0da4f4b..c0d2bf0a4b58a 100644 --- a/turbopack/crates/turbo-tasks-backend/src/lmdb_backing_storage.rs +++ b/turbopack/crates/turbo-tasks-backend/src/lmdb_backing_storage.rs @@ -59,7 +59,11 @@ impl LmdbBackingStorage { create_dir_all(path)?; println!("opening lmdb {:?}", path); let env = Environment::new() - .set_flags(EnvironmentFlags::WRITE_MAP | EnvironmentFlags::NO_META_SYNC) + .set_flags( + EnvironmentFlags::WRITE_MAP + | EnvironmentFlags::NO_META_SYNC + | EnvironmentFlags::NO_TLS, + ) .set_max_readers((available_parallelism().map_or(16, |v| v.get()) * 8) as u32) .set_max_dbs(4) .set_map_size(20 * 1024 * 1024 * 1024) From bf6e895cebb3f40be6abdd38a60ce21018604473 Mon Sep 17 00:00:00 2001 From: Tobias Koppers Date: Fri, 16 Aug 2024 08:16:25 +0200 Subject: [PATCH 13/46] print lookup error --- .../src/lmdb_backing_storage.rs | 30 ++++++++++++++----- 1 file changed, 22 insertions(+), 8 deletions(-) diff --git a/turbopack/crates/turbo-tasks-backend/src/lmdb_backing_storage.rs b/turbopack/crates/turbo-tasks-backend/src/lmdb_backing_storage.rs index c0d2bf0a4b58a..9c21a517a1064 100644 --- a/turbopack/crates/turbo-tasks-backend/src/lmdb_backing_storage.rs +++ b/turbopack/crates/turbo-tasks-backend/src/lmdb_backing_storage.rs @@ -284,14 +284,28 @@ impl BackingStorage for LmdbBackingStorage { } fn reverse_lookup_task_cache(&self, task_id: TaskId) -> Option> { - let tx = self.env.begin_ro_txn().ok()?; - let result = tx - .get(self.reverse_task_cache_db, &(*task_id).to_be_bytes()) - .ok() - .and_then(|v| v.try_into().ok()) - .and_then(|v: [u8; 4]| bincode::deserialize(&v).ok()); - tx.commit().ok()?; - result + fn lookup( + this: &LmdbBackingStorage, + task_id: TaskId, + ) -> Result>> { + let tx = this.env.begin_ro_txn()?; + let bytes = match tx.get(this.reverse_task_cache_db, &IntKey::new(*task_id)) { + Ok(bytes) => bytes, + Err(err) => { + if err == lmdb::Error::NotFound { + return Ok(None); + } else { + return Err(err.into()); + } + } + }; + let result = bincode::deserialize(bytes)?; + tx.commit()?; + Ok(result) + } + lookup(self, task_id) + .inspect_err(|err| println!("Looking up task type for {task_id} failed: {err:?}")) + .ok()? } fn lookup_data(&self, task_id: TaskId) -> Vec { From b77626bc4c09e9e00ad201c3e96d4c07782be52d Mon Sep 17 00:00:00 2001 From: Tobias Koppers Date: Fri, 16 Aug 2024 08:17:13 +0200 Subject: [PATCH 14/46] verify serialization --- .../crates/turbo-tasks-backend/Cargo.toml | 4 ++ .../src/lmdb_backing_storage.rs | 57 ++++++++++++------- 2 files changed, 42 insertions(+), 19 deletions(-) diff --git a/turbopack/crates/turbo-tasks-backend/Cargo.toml b/turbopack/crates/turbo-tasks-backend/Cargo.toml index c1e6386853b96..7708d0929ea77 100644 --- a/turbopack/crates/turbo-tasks-backend/Cargo.toml +++ b/turbopack/crates/turbo-tasks-backend/Cargo.toml @@ -12,6 +12,10 @@ bench = false [lints] workspace = true +[features] +default = ["verify_serialization"] +verify_serialization = [] + [dependencies] anyhow = { workspace = true } async-trait = { workspace = true } diff --git a/turbopack/crates/turbo-tasks-backend/src/lmdb_backing_storage.rs b/turbopack/crates/turbo-tasks-backend/src/lmdb_backing_storage.rs index 9c21a517a1064..b7c5408185bdb 100644 --- a/turbopack/crates/turbo-tasks-backend/src/lmdb_backing_storage.rs +++ b/turbopack/crates/turbo-tasks-backend/src/lmdb_backing_storage.rs @@ -126,6 +126,22 @@ impl BackingStorage for LmdbBackingStorage { let task_id = **task_id; let task_type_bytes = bincode::serialize(&task_type) .with_context(|| anyhow!("Unable to serialize task cache key {task_type:?}"))?; + #[cfg(feature = "verify_serialization")] + { + let deserialize: Result = + serde_path_to_error::deserialize(&mut bincode::Deserializer::from_slice( + &task_type_bytes, + bincode::DefaultOptions::new() + .with_fixint_encoding() + .allow_trailing_bytes(), + )); + if let Err(err) = deserialize { + println!( + "Task type would not be deserializable {task_id}: {err:?}\n{task_type:#?}" + ); + panic!("Task type would not be deserializable {task_id}: {err:?}"); + } + } extended_key::put( &mut tx, self.forward_task_cache_db, @@ -204,8 +220,9 @@ impl BackingStorage for LmdbBackingStorage { .map(|(key, value)| CachedDataItem::from_key_and_value(key, value)) .collect(); let value = match bincode::serialize(&vec) { - // Ok(value) => value, - Ok(_) | Err(_) => { + #[cfg(not(feature = "verify_serialization"))] + Ok(value) => value, + _ => { let mut error = Ok(()); vec.retain(|item| { let mut buf = Vec::::new(); @@ -227,24 +244,26 @@ impl BackingStorage for LmdbBackingStorage { } false } else { - let deserialize: Result = - serde_path_to_error::deserialize( - &mut bincode::Deserializer::from_slice( - &buf, - bincode::DefaultOptions::new() - .with_fixint_encoding() - .allow_trailing_bytes(), - ), - ); - if let Err(err) = deserialize { - println!( - "Data item would not be deserializable {task_id}: \ - {err:?}\n{item:#?}" - ); - false - } else { - true + #[cfg(feature = "verify_serialization")] + { + let deserialize: Result = + serde_path_to_error::deserialize( + &mut bincode::Deserializer::from_slice( + &buf, + bincode::DefaultOptions::new() + .with_fixint_encoding() + .allow_trailing_bytes(), + ), + ); + if let Err(err) = deserialize { + println!( + "Data item would not be deserializable {task_id}: \ + {err:?}\n{item:#?}" + ); + return false; + } } + true } }); error?; From 6fa201deaeb2403a68dac2da1141e5909d0b39ad Mon Sep 17 00:00:00 2001 From: Tobias Koppers Date: Fri, 16 Aug 2024 08:47:30 +0200 Subject: [PATCH 15/46] fix lookup deserialization --- .../crates/turbo-tasks-backend/src/lmdb_backing_storage.rs | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/turbopack/crates/turbo-tasks-backend/src/lmdb_backing_storage.rs b/turbopack/crates/turbo-tasks-backend/src/lmdb_backing_storage.rs index b7c5408185bdb..d55a21c9b2968 100644 --- a/turbopack/crates/turbo-tasks-backend/src/lmdb_backing_storage.rs +++ b/turbopack/crates/turbo-tasks-backend/src/lmdb_backing_storage.rs @@ -124,7 +124,7 @@ impl BackingStorage for LmdbBackingStorage { as_u32(tx.get(self.meta_db, &IntKey::new(META_KEY_NEXT_FREE_TASK_ID))).unwrap_or(1); for (task_type, task_id) in task_cache_updates.iter() { let task_id = **task_id; - let task_type_bytes = bincode::serialize(&task_type) + let task_type_bytes = bincode::serialize(&**task_type) .with_context(|| anyhow!("Unable to serialize task cache key {task_type:?}"))?; #[cfg(feature = "verify_serialization")] { @@ -320,7 +320,7 @@ impl BackingStorage for LmdbBackingStorage { }; let result = bincode::deserialize(bytes)?; tx.commit()?; - Ok(result) + Ok(Some(result)) } lookup(self, task_id) .inspect_err(|err| println!("Looking up task type for {task_id} failed: {err:?}")) From de99d8b97a4f2009863e350be614f9e0aa117420 Mon Sep 17 00:00:00 2001 From: Tobias Koppers Date: Fri, 16 Aug 2024 10:18:05 +0200 Subject: [PATCH 16/46] replace bincode with pot --- Cargo.lock | 30 +++++++--- .../crates/turbo-tasks-backend/Cargo.toml | 4 +- .../src/lmdb_backing_storage.rs | 55 +++++++------------ 3 files changed, 42 insertions(+), 47 deletions(-) diff --git a/Cargo.lock b/Cargo.lock index 56457fb45ec1d..ef715c704c6e0 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -3610,21 +3610,22 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "b4ce301924b7887e9d637144fdade93f9dfff9b60981d4ac161db09720d39aa5" [[package]] -name = "lmdb" -version = "0.8.0" +name = "lmdb-rkv" +version = "0.14.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "5b0908efb5d6496aa977d96f91413da2635a902e5e31dbef0bfb88986c248539" +checksum = "447a296f7aca299cfbb50f4e4f3d49451549af655fb7215d7f8c0c3d64bad42b" dependencies = [ "bitflags 1.3.2", + "byteorder", "libc", - "lmdb-sys", + "lmdb-rkv-sys", ] [[package]] -name = "lmdb-sys" -version = "0.8.0" +name = "lmdb-rkv-sys" +version = "0.11.2" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "d5b392838cfe8858e86fac37cf97a0e8c55cc60ba0a18365cadc33092f128ce9" +checksum = "61b9ce6b3be08acefa3003c57b7565377432a89ec24476bbe72e11d101f852fe" dependencies = [ "cc", "libc", @@ -5013,6 +5014,17 @@ dependencies = [ "serde", ] +[[package]] +name = "pot" +version = "3.0.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "df842bdb3b0553a411589e64aaa1a7d0c0259f72fabcedfaa841683ae3019d80" +dependencies = [ + "byteorder", + "half 2.4.1", + "serde", +] + [[package]] name = "powerfmt" version = "0.2.0" @@ -8596,15 +8608,15 @@ dependencies = [ "anyhow", "async-trait", "auto-hash-map", - "bincode", "byteorder", "dashmap", "either", "hashbrown 0.14.5", "indexmap 1.9.3", - "lmdb", + "lmdb-rkv", "once_cell", "parking_lot", + "pot", "rand", "rustc-hash", "serde", diff --git a/turbopack/crates/turbo-tasks-backend/Cargo.toml b/turbopack/crates/turbo-tasks-backend/Cargo.toml index 7708d0929ea77..d7b5ed72cd636 100644 --- a/turbopack/crates/turbo-tasks-backend/Cargo.toml +++ b/turbopack/crates/turbo-tasks-backend/Cargo.toml @@ -20,15 +20,15 @@ verify_serialization = [] anyhow = { workspace = true } async-trait = { workspace = true } auto-hash-map = { workspace = true } -bincode = "1.3.3" byteorder = "1.5.0" dashmap = { workspace = true, features = ["raw-api"]} either = { workspace = true } hashbrown = { workspace = true } indexmap = { workspace = true } -lmdb = "0.8.0" +lmdb-rkv = "0.14.0" once_cell = { workspace = true } parking_lot = { workspace = true } +pot = "3.0.0" rand = { workspace = true } rustc-hash = { workspace = true } serde = { workspace = true } diff --git a/turbopack/crates/turbo-tasks-backend/src/lmdb_backing_storage.rs b/turbopack/crates/turbo-tasks-backend/src/lmdb_backing_storage.rs index d55a21c9b2968..898346d9ed59f 100644 --- a/turbopack/crates/turbo-tasks-backend/src/lmdb_backing_storage.rs +++ b/turbopack/crates/turbo-tasks-backend/src/lmdb_backing_storage.rs @@ -11,7 +11,6 @@ use std::{ }; use anyhow::{anyhow, Context, Result}; -use bincode::Options; use lmdb::{Database, DatabaseFlags, Environment, EnvironmentFlags, Transaction, WriteFlags}; use tracing::Span; use turbo_tasks::{backend::CachedTaskType, KeyValuePair, TaskId}; @@ -99,7 +98,7 @@ impl BackingStorage for LmdbBackingStorage { fn get(this: &LmdbBackingStorage) -> Result> { let tx = this.env.begin_ro_txn()?; let operations = tx.get(this.meta_db, &IntKey::new(META_KEY_OPERATIONS))?; - let operations = bincode::deserialize(operations)?; + let operations = pot::from_slice(operations)?; Ok(operations) } get(self).unwrap_or_default() @@ -124,17 +123,13 @@ impl BackingStorage for LmdbBackingStorage { as_u32(tx.get(self.meta_db, &IntKey::new(META_KEY_NEXT_FREE_TASK_ID))).unwrap_or(1); for (task_type, task_id) in task_cache_updates.iter() { let task_id = **task_id; - let task_type_bytes = bincode::serialize(&**task_type) + let task_type_bytes = pot::to_vec(&**task_type) .with_context(|| anyhow!("Unable to serialize task cache key {task_type:?}"))?; #[cfg(feature = "verify_serialization")] { - let deserialize: Result = - serde_path_to_error::deserialize(&mut bincode::Deserializer::from_slice( - &task_type_bytes, - bincode::DefaultOptions::new() - .with_fixint_encoding() - .allow_trailing_bytes(), - )); + let deserialize: Result = serde_path_to_error::deserialize( + &mut pot::de::SymbolList::new().deserializer_for_slice(&task_type_bytes)?, + ); if let Err(err) = deserialize { println!( "Task type would not be deserializable {task_id}: {err:?}\n{task_type:#?}" @@ -167,8 +162,8 @@ impl BackingStorage for LmdbBackingStorage { WriteFlags::empty(), ) .with_context(|| anyhow!("Unable to write next free task id"))?; - let operations = bincode::serialize(&operations) - .with_context(|| anyhow!("Unable to serialize operations"))?; + let operations = + pot::to_vec(&operations).with_context(|| anyhow!("Unable to serialize operations"))?; tx.put( self.meta_db, &IntKey::new(META_KEY_OPERATIONS), @@ -186,15 +181,10 @@ impl BackingStorage for LmdbBackingStorage { Entry::Vacant(entry) => { let mut map = HashMap::new(); if let Ok(old_data) = tx.get(self.data_db, &IntKey::new(*task)) { - let old_data: Vec = match bincode::deserialize(old_data) { + let old_data: Vec = match pot::from_slice(old_data) { Ok(d) => d, Err(_) => serde_path_to_error::deserialize( - &mut bincode::Deserializer::from_slice( - old_data, - bincode::DefaultOptions::new() - .with_fixint_encoding() - .allow_trailing_bytes(), - ), + &mut pot::de::SymbolList::new().deserializer_for_slice(old_data)?, ) .with_context(|| { anyhow!("Unable to deserialize old value of {task}: {old_data:?}") @@ -219,19 +209,15 @@ impl BackingStorage for LmdbBackingStorage { .into_iter() .map(|(key, value)| CachedDataItem::from_key_and_value(key, value)) .collect(); - let value = match bincode::serialize(&vec) { + let value = match pot::to_vec(&vec) { #[cfg(not(feature = "verify_serialization"))] Ok(value) => value, _ => { let mut error = Ok(()); vec.retain(|item| { let mut buf = Vec::::new(); - let mut serializer = bincode::Serializer::new( - &mut buf, - bincode::DefaultOptions::new() - .with_fixint_encoding() - .allow_trailing_bytes(), - ); + let mut symbol_map = pot::ser::SymbolMap::new(); + let mut serializer = symbol_map.serializer_for(&mut buf).unwrap(); if let Err(err) = serde_path_to_error::serialize(item, &mut serializer) { if item.is_optional() { println!("Skipping non-serializable optional item: {item:?}"); @@ -248,12 +234,9 @@ impl BackingStorage for LmdbBackingStorage { { let deserialize: Result = serde_path_to_error::deserialize( - &mut bincode::Deserializer::from_slice( - &buf, - bincode::DefaultOptions::new() - .with_fixint_encoding() - .allow_trailing_bytes(), - ), + &mut pot::de::SymbolList::new() + .deserializer_for_slice(&buf) + .unwrap(), ); if let Err(err) = deserialize { println!( @@ -268,7 +251,7 @@ impl BackingStorage for LmdbBackingStorage { }); error?; - bincode::serialize(&vec).with_context(|| { + pot::to_vec(&vec).with_context(|| { anyhow!("Unable to serialize data items for {task_id}: {vec:#?}") })? } @@ -293,7 +276,7 @@ impl BackingStorage for LmdbBackingStorage { fn forward_lookup_task_cache(&self, task_type: &CachedTaskType) -> Option { let tx = self.env.begin_ro_txn().ok()?; - let task_type = bincode::serialize(task_type).ok()?; + let task_type = pot::to_vec(task_type).ok()?; let result = extended_key::get(&tx, self.forward_task_cache_db, &task_type) .ok() .and_then(|v| v.try_into().ok()) @@ -318,7 +301,7 @@ impl BackingStorage for LmdbBackingStorage { } } }; - let result = bincode::deserialize(bytes)?; + let result = pot::from_slice(bytes)?; tx.commit()?; Ok(Some(result)) } @@ -346,7 +329,7 @@ impl BackingStorage for LmdbBackingStorage { } }; span.record("bytes", bytes.len()); - let result: Vec = bincode::deserialize(bytes)?; + let result: Vec = pot::from_slice(bytes)?; span.record("items", result.len()); tx.commit()?; Ok(result) From 31d3f0709fd7449154df99a21452f2147e5d748a Mon Sep 17 00:00:00 2001 From: Tobias Koppers Date: Sat, 17 Aug 2024 11:03:24 +0200 Subject: [PATCH 17/46] fix restore data trace --- .../crates/turbo-tasks-backend/src/lmdb_backing_storage.rs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/turbopack/crates/turbo-tasks-backend/src/lmdb_backing_storage.rs b/turbopack/crates/turbo-tasks-backend/src/lmdb_backing_storage.rs index 898346d9ed59f..f24bdd4dc2ef9 100644 --- a/turbopack/crates/turbo-tasks-backend/src/lmdb_backing_storage.rs +++ b/turbopack/crates/turbo-tasks-backend/src/lmdb_backing_storage.rs @@ -311,7 +311,7 @@ impl BackingStorage for LmdbBackingStorage { } fn lookup_data(&self, task_id: TaskId) -> Vec { - let span = tracing::trace_span!("restore data", bytes = 0usize, items = 0usize); + let span = tracing::trace_span!("restore data", bytes = 0usize, items = 0usize).entered(); fn lookup( this: &LmdbBackingStorage, task_id: TaskId, From 0e016fb6ddbc37ed9390ea1700e12f47c16c93cf Mon Sep 17 00:00:00 2001 From: Tobias Koppers Date: Sat, 17 Aug 2024 12:19:06 +0200 Subject: [PATCH 18/46] more tracing in db --- .../src/lmdb_backing_storage.rs | 44 ++++++++++++++----- 1 file changed, 34 insertions(+), 10 deletions(-) diff --git a/turbopack/crates/turbo-tasks-backend/src/lmdb_backing_storage.rs b/turbopack/crates/turbo-tasks-backend/src/lmdb_backing_storage.rs index f24bdd4dc2ef9..0e713cb649f4b 100644 --- a/turbopack/crates/turbo-tasks-backend/src/lmdb_backing_storage.rs +++ b/turbopack/crates/turbo-tasks-backend/src/lmdb_backing_storage.rs @@ -275,20 +275,42 @@ impl BackingStorage for LmdbBackingStorage { } fn forward_lookup_task_cache(&self, task_type: &CachedTaskType) -> Option { - let tx = self.env.begin_ro_txn().ok()?; - let task_type = pot::to_vec(task_type).ok()?; - let result = extended_key::get(&tx, self.forward_task_cache_db, &task_type) - .ok() - .and_then(|v| v.try_into().ok()) - .map(|v| TaskId::from(u32::from_be_bytes(v))); - tx.commit().ok()?; - result + let span = tracing::trace_span!("forward lookup task cache", key_bytes = 0usize).entered(); + fn lookup( + this: &LmdbBackingStorage, + task_type: &CachedTaskType, + span: &Span, + ) -> Result> { + let tx = this.env.begin_ro_txn()?; + let task_type = pot::to_vec(task_type)?; + span.record("key_bytes", task_type.len()); + let bytes = match extended_key::get(&tx, this.forward_task_cache_db, &task_type) { + Ok(result) => result, + Err(err) => { + if err == lmdb::Error::NotFound { + return Ok(None); + } else { + return Err(err.into()); + } + } + }; + let bytes = bytes.try_into()?; + let id = TaskId::from(u32::from_be_bytes(bytes)); + tx.commit()?; + Ok(Some(id)) + } + let id = lookup(self, task_type, &span) + .inspect_err(|err| println!("Looking up task id for {task_type:?} failed: {err:?}")) + .ok()??; + Some(id) } fn reverse_lookup_task_cache(&self, task_id: TaskId) -> Option> { + let span = tracing::trace_span!("reverse lookup task cache", bytes = 0usize).entered(); fn lookup( this: &LmdbBackingStorage, task_id: TaskId, + span: &Span, ) -> Result>> { let tx = this.env.begin_ro_txn()?; let bytes = match tx.get(this.reverse_task_cache_db, &IntKey::new(*task_id)) { @@ -301,13 +323,15 @@ impl BackingStorage for LmdbBackingStorage { } } }; + span.record("bytes", bytes.len()); let result = pot::from_slice(bytes)?; tx.commit()?; Ok(Some(result)) } - lookup(self, task_id) + let result = lookup(self, task_id, &span) .inspect_err(|err| println!("Looking up task type for {task_id} failed: {err:?}")) - .ok()? + .ok()??; + Some(result) } fn lookup_data(&self, task_id: TaskId) -> Vec { From d26c6c82ec5222bbddac8c890ce2497a523f8858 Mon Sep 17 00:00:00 2001 From: Tobias Koppers Date: Mon, 19 Aug 2024 16:41:04 +0200 Subject: [PATCH 19/46] remove verify_serialization --- turbopack/crates/turbo-tasks-backend/Cargo.toml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/turbopack/crates/turbo-tasks-backend/Cargo.toml b/turbopack/crates/turbo-tasks-backend/Cargo.toml index d7b5ed72cd636..e7fd19a9323b3 100644 --- a/turbopack/crates/turbo-tasks-backend/Cargo.toml +++ b/turbopack/crates/turbo-tasks-backend/Cargo.toml @@ -13,7 +13,7 @@ bench = false workspace = true [features] -default = ["verify_serialization"] +default = [] verify_serialization = [] [dependencies] From 02eed51aeec395d12c03da5252ed0b39db67c73e Mon Sep 17 00:00:00 2001 From: Tobias Koppers Date: Sat, 17 Aug 2024 12:44:25 +0200 Subject: [PATCH 20/46] fix race condition --- turbopack/crates/turbo-tasks-backend/src/backend/mod.rs | 3 +++ 1 file changed, 3 insertions(+) diff --git a/turbopack/crates/turbo-tasks-backend/src/backend/mod.rs b/turbopack/crates/turbo-tasks-backend/src/backend/mod.rs index 7b56bb1f761a5..d9470e60a16c9 100644 --- a/turbopack/crates/turbo-tasks-backend/src/backend/mod.rs +++ b/turbopack/crates/turbo-tasks-backend/src/backend/mod.rs @@ -571,6 +571,9 @@ impl Backend for TurboTasksBackend { unsafe { self.persisted_task_id_factory.reuse(task_id); } + self.persisted_task_cache_log + .lock() + .push((task_type, existing_task_id)); self.connect_child(parent_task, existing_task_id, turbo_tasks); return existing_task_id; } From 0ddd6904ebba80db9b4366bf6a35ced80b3d3ec8 Mon Sep 17 00:00:00 2001 From: Tobias Koppers Date: Mon, 19 Aug 2024 13:03:38 +0200 Subject: [PATCH 21/46] do not interrupt persisting while there is data --- .../turbo-tasks-backend/src/backend/mod.rs | 38 ++++++++++++------- 1 file changed, 24 insertions(+), 14 deletions(-) diff --git a/turbopack/crates/turbo-tasks-backend/src/backend/mod.rs b/turbopack/crates/turbo-tasks-backend/src/backend/mod.rs index d9470e60a16c9..c5a897254ec53 100644 --- a/turbopack/crates/turbo-tasks-backend/src/backend/mod.rs +++ b/turbopack/crates/turbo-tasks-backend/src/backend/mod.rs @@ -474,7 +474,7 @@ impl TurboTasksBackend { } } - fn snapshot(&self) -> Option { + fn snapshot(&self) -> Option<(Instant, bool)> { let mut snapshot_request = self.snapshot_request.lock(); snapshot_request.snapshot_requested = true; let active_operations = self @@ -509,7 +509,10 @@ impl TurboTasksBackend { *counts.entry(*task).or_default() += 1; } + let mut new_items = false; + if !persisted_task_cache_log.is_empty() || !persisted_storage_log.is_empty() { + new_items = true; if let Err(err) = self.backing_storage.save_snapshot( suspended_operations, persisted_task_cache_log, @@ -528,7 +531,7 @@ impl TurboTasksBackend { .finish_persisting_items(count); } - Some(snapshot_time) + Some((snapshot_time, new_items)) } } @@ -1054,21 +1057,28 @@ impl Backend for TurboTasksBackend { ) -> Pin + Send + 'a>> { Box::pin(async move { if *id == 1 { - const SNAPSHOT_INTERVAL: Duration = Duration::from_secs(1); - let last_snapshot = self.last_snapshot.load(Ordering::Relaxed); - let last_snapshot = self.start_time + Duration::from_millis(last_snapshot); - let elapsed = last_snapshot.elapsed(); - if elapsed < SNAPSHOT_INTERVAL { - tokio::time::sleep(SNAPSHOT_INTERVAL - elapsed).await; - } + let mut last_snapshot = self.start_time + Duration::from_millis(last_snapshot); + loop { + const SNAPSHOT_INTERVAL: Duration = Duration::from_secs(1); - if let Some(last_snapshot) = self.snapshot() { - let last_snapshot = last_snapshot.duration_since(self.start_time); - self.last_snapshot - .store(last_snapshot.as_millis() as u64, Ordering::Relaxed); + let elapsed = last_snapshot.elapsed(); + if elapsed < SNAPSHOT_INTERVAL { + tokio::time::sleep(SNAPSHOT_INTERVAL - elapsed).await; + } - turbo_tasks.schedule_backend_background_job(id); + if let Some((snapshot_start, new_data)) = self.snapshot() { + last_snapshot = snapshot_start; + if new_data { + continue; + } + let last_snapshot = last_snapshot.duration_since(self.start_time); + self.last_snapshot + .store(last_snapshot.as_millis() as u64, Ordering::Relaxed); + + turbo_tasks.schedule_backend_background_job(id); + return; + } } } }) From ebab3100e0f26128491ae3dbf34574b68a5fed73 Mon Sep 17 00:00:00 2001 From: Tobias Koppers Date: Mon, 19 Aug 2024 16:40:56 +0200 Subject: [PATCH 22/46] add persist trace --- turbopack/crates/turbo-tasks-backend/src/lmdb_backing_storage.rs | 1 + 1 file changed, 1 insertion(+) diff --git a/turbopack/crates/turbo-tasks-backend/src/lmdb_backing_storage.rs b/turbopack/crates/turbo-tasks-backend/src/lmdb_backing_storage.rs index 0e713cb649f4b..f7c7fdd8dd285 100644 --- a/turbopack/crates/turbo-tasks-backend/src/lmdb_backing_storage.rs +++ b/turbopack/crates/turbo-tasks-backend/src/lmdb_backing_storage.rs @@ -104,6 +104,7 @@ impl BackingStorage for LmdbBackingStorage { get(self).unwrap_or_default() } + #[tracing::instrument(level = "trace", skip_all, fields(operations = operations.len(), task_cache_updates = task_cache_updates.len(), data_updates = data_updates.len()))] fn save_snapshot( &self, operations: Vec>, From d98055086e4439c43c4e3b8c6e51995794649122 Mon Sep 17 00:00:00 2001 From: Tobias Koppers Date: Tue, 3 Sep 2024 16:03:24 +0200 Subject: [PATCH 23/46] improve task aggregation --- .../turbo-tasks-backend/src/backend/mod.rs | 21 ++++----------- .../src/backend/operation/mod.rs | 26 +++++++++++++++++++ .../src/backend/storage.rs | 4 +++ 3 files changed, 35 insertions(+), 16 deletions(-) diff --git a/turbopack/crates/turbo-tasks-backend/src/backend/mod.rs b/turbopack/crates/turbo-tasks-backend/src/backend/mod.rs index c5a897254ec53..9b6a09082b181 100644 --- a/turbopack/crates/turbo-tasks-backend/src/backend/mod.rs +++ b/turbopack/crates/turbo-tasks-backend/src/backend/mod.rs @@ -643,23 +643,12 @@ impl Backend for TurboTasksBackend { task_id: TaskId, turbo_tasks: &dyn TurboTasksBackendApi, ) { + if task_id.is_transient() { + return; + } let ctx = self.execute_context(turbo_tasks); - let task = ctx.task(task_id); - let cell_data = task - .iter(CachedDataItemIndex::CellData) - .filter_map(|(key, value)| match (key, value) { - (CachedDataItemKey::CellData { cell }, CachedDataItemValue::CellData { value }) => { - Some(CachedDataUpdate { - task: task_id, - key: CachedDataItemKey::CellData { cell: *cell }, - value: Some(CachedDataItemValue::CellData { - value: value.clone(), - }), - }) - } - _ => None, - }); - self.persisted_storage_log.lock().extend(cell_data); + let mut task = ctx.task(task_id); + task.invalidate_serialization(); } fn get_task_description(&self, task: TaskId) -> std::string::String { diff --git a/turbopack/crates/turbo-tasks-backend/src/backend/operation/mod.rs b/turbopack/crates/turbo-tasks-backend/src/backend/operation/mod.rs index 9ef44dc9f19e1..d874c83e97a33 100644 --- a/turbopack/crates/turbo-tasks-backend/src/backend/operation/mod.rs +++ b/turbopack/crates/turbo-tasks-backend/src/backend/operation/mod.rs @@ -328,6 +328,32 @@ impl TaskGuard<'_> { pub fn iter_all(&self) -> impl Iterator { self.task.iter_all() } + + pub fn invalidate_serialization(&mut self) { + let mut count = 0; + let cell_data = self + .iter(CachedDataItemIndex::CellData) + .filter_map(|(key, value)| match (key, value) { + (CachedDataItemKey::CellData { cell }, CachedDataItemValue::CellData { value }) => { + count += 1; + Some(CachedDataUpdate { + task: self.task_id, + key: CachedDataItemKey::CellData { cell: *cell }, + value: Some(CachedDataItemValue::CellData { + value: value.clone(), + }), + }) + } + _ => None, + }); + { + let mut guard = self.backend.persisted_storage_log.lock(); + guard.extend(cell_data); + self.task + .persistance_state_mut() + .add_persisting_items(count); + } + } } macro_rules! impl_operation { diff --git a/turbopack/crates/turbo-tasks-backend/src/backend/storage.rs b/turbopack/crates/turbo-tasks-backend/src/backend/storage.rs index 4b455b0c8ec15..82db7c81f5c86 100644 --- a/turbopack/crates/turbo-tasks-backend/src/backend/storage.rs +++ b/turbopack/crates/turbo-tasks-backend/src/backend/storage.rs @@ -37,6 +37,10 @@ impl PersistanceState { self.value += 1; } + pub fn add_persisting_items(&mut self, count: u32) { + self.value += count; + } + pub fn finish_persisting_items(&mut self, count: u32) { self.value -= count; } From 23ebb6ef829f9eba7dfd136f8a6c7c08576da13d Mon Sep 17 00:00:00 2001 From: Tobias Koppers Date: Thu, 5 Sep 2024 01:27:11 +0200 Subject: [PATCH 24/46] restore task_pair --- .../src/backend/operation/mod.rs | 30 ++++++++++++++++++- 1 file changed, 29 insertions(+), 1 deletion(-) diff --git a/turbopack/crates/turbo-tasks-backend/src/backend/operation/mod.rs b/turbopack/crates/turbo-tasks-backend/src/backend/operation/mod.rs index d874c83e97a33..6ec3e823c4275 100644 --- a/turbopack/crates/turbo-tasks-backend/src/backend/operation/mod.rs +++ b/turbopack/crates/turbo-tasks-backend/src/backend/operation/mod.rs @@ -88,7 +88,35 @@ impl<'a> ExecuteContext<'a> { } pub fn task_pair(&self, task_id1: TaskId, task_id2: TaskId) -> (TaskGuard<'a>, TaskGuard<'a>) { - let (task1, task2) = self.backend.storage.access_pair_mut(task_id1, task_id2); + let (mut task1, mut task2) = self.backend.storage.access_pair_mut(task_id1, task_id2); + let is_restored1 = task1.persistance_state().is_restored(); + let is_restored2 = task2.persistance_state().is_restored(); + if !is_restored1 || !is_restored2 { + // Avoid holding the lock too long since this can also affect other tasks + drop(task1); + drop(task2); + + let items1 = + (!is_restored1).then(|| self.backend.backing_storage.lookup_data(task_id1)); + let items2 = + (!is_restored2).then(|| self.backend.backing_storage.lookup_data(task_id2)); + + let (t1, t2) = self.backend.storage.access_pair_mut(task_id1, task_id2); + task1 = t1; + task2 = t2; + if !task1.persistance_state().is_restored() { + for item in items1.unwrap() { + task1.add(item); + } + task1.persistance_state_mut().set_restored(); + } + if !task2.persistance_state().is_restored() { + for item in items2.unwrap() { + task2.add(item); + } + task2.persistance_state_mut().set_restored(); + } + } ( TaskGuard { task: task1, From 2c51283ec31b2eef1b52cfd7c270f229974b7104 Mon Sep 17 00:00:00 2001 From: Tobias Koppers Date: Mon, 19 Aug 2024 10:24:31 +0200 Subject: [PATCH 25/46] verify persistent function only calls persistent functions --- turbopack/crates/turbo-tasks-backend/src/backend/mod.rs | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/turbopack/crates/turbo-tasks-backend/src/backend/mod.rs b/turbopack/crates/turbo-tasks-backend/src/backend/mod.rs index 9b6a09082b181..62611dd7d9fc3 100644 --- a/turbopack/crates/turbo-tasks-backend/src/backend/mod.rs +++ b/turbopack/crates/turbo-tasks-backend/src/backend/mod.rs @@ -595,6 +595,14 @@ impl Backend for TurboTasksBackend { parent_task: TaskId, turbo_tasks: &dyn TurboTasksBackendApi, ) -> TaskId { + if !parent_task.is_transient() { + let parent_task_type = self.lookup_task_type(parent_task); + panic!( + "Calling transient function {} from persistent function function {} is not allowed", + task_type.get_name(), + parent_task_type.map_or_else(|| "unknown".into(), |t| t.get_name()) + ); + } if let Some(task_id) = self.task_cache.lookup_forward(&task_type) { self.connect_child(parent_task, task_id, turbo_tasks); return task_id; From 8fa030fe0d3a280cd7bacb52abb2a2decf008903 Mon Sep 17 00:00:00 2001 From: Tobias Koppers Date: Fri, 6 Sep 2024 15:30:59 +0200 Subject: [PATCH 26/46] clippy --- turbopack/crates/turbo-tasks-backend/src/data.rs | 5 +---- .../turbo-tasks-backend/src/lmdb_backing_storage.rs | 5 ++--- .../src/lmdb_backing_storage/extended_key.rs | 8 ++++---- .../crates/turbo-tasks-backend/tests/test_config.trs | 2 +- 4 files changed, 8 insertions(+), 12 deletions(-) diff --git a/turbopack/crates/turbo-tasks-backend/src/data.rs b/turbopack/crates/turbo-tasks-backend/src/data.rs index 66eba2948e401..2b49683805756 100644 --- a/turbopack/crates/turbo-tasks-backend/src/data.rs +++ b/turbopack/crates/turbo-tasks-backend/src/data.rs @@ -283,10 +283,7 @@ impl CachedDataItem { } pub fn is_optional(&self) -> bool { - match self { - CachedDataItem::CellData { .. } => true, - _ => false, - } + matches!(self, CachedDataItem::CellData { .. }) } pub fn new_scheduled(description: impl Fn() -> String + Sync + Send + 'static) -> Self { diff --git a/turbopack/crates/turbo-tasks-backend/src/lmdb_backing_storage.rs b/turbopack/crates/turbo-tasks-backend/src/lmdb_backing_storage.rs index f7c7fdd8dd285..33837138bc343 100644 --- a/turbopack/crates/turbo-tasks-backend/src/lmdb_backing_storage.rs +++ b/turbopack/crates/turbo-tasks-backend/src/lmdb_backing_storage.rs @@ -359,9 +359,8 @@ impl BackingStorage for LmdbBackingStorage { tx.commit()?; Ok(result) } - let result = lookup(self, task_id, &span) + lookup(self, task_id, &span) .inspect_err(|err| println!("Looking up data for {task_id} failed: {err:?}")) - .unwrap_or_default(); - result + .unwrap_or_default() } } diff --git a/turbopack/crates/turbo-tasks-backend/src/lmdb_backing_storage/extended_key.rs b/turbopack/crates/turbo-tasks-backend/src/lmdb_backing_storage/extended_key.rs index 3e87669c2563c..1a45fff59475d 100644 --- a/turbopack/crates/turbo-tasks-backend/src/lmdb_backing_storage/extended_key.rs +++ b/turbopack/crates/turbo-tasks-backend/src/lmdb_backing_storage/extended_key.rs @@ -15,8 +15,8 @@ pub fn get<'tx>( if key.len() > MAX_KEY_SIZE - 1 { let hashed_key = hashed_key(key); let data = tx.get(database, &hashed_key)?; - let mut iter = ExtendedValueIter::new(data); - while let Some((k, v)) = iter.next() { + let iter = ExtendedValueIter::new(data); + for (k, v) in iter { if k == key { return Ok(v); } @@ -46,8 +46,8 @@ pub fn put( data.extend_from_slice(&key[SHARED_KEY..]); data.extend_from_slice(value); if let Ok(old) = old { - let mut iter = ExtendedValueIter::new(old); - while let Some((k, v)) = iter.next() { + let iter = ExtendedValueIter::new(old); + for (k, v) in iter { if k != &key[SHARED_KEY..] { data.extend_from_slice(&(k.len() as u32).to_be_bytes()); data.extend_from_slice(&(v.len() as u32).to_be_bytes()); diff --git a/turbopack/crates/turbo-tasks-backend/tests/test_config.trs b/turbopack/crates/turbo-tasks-backend/tests/test_config.trs index 2f4c2421a61e7..d995fb71c0e6e 100644 --- a/turbopack/crates/turbo-tasks-backend/tests/test_config.trs +++ b/turbopack/crates/turbo-tasks-backend/tests/test_config.trs @@ -10,7 +10,7 @@ turbo_tasks::TurboTasks::new( turbo_tasks_backend::TurboTasksBackend::new( Arc::new(turbo_tasks_backend::LmdbBackingStorage::new( - &path.as_path() + path.as_path() ).unwrap()) ) ) From 6cc4c27b05dbe24bd2904afe61b096d8ab288788 Mon Sep 17 00:00:00 2001 From: Tobias Koppers Date: Fri, 20 Sep 2024 11:16:45 +0200 Subject: [PATCH 27/46] add more details to save_snapshot tracing --- .../src/lmdb_backing_storage.rs | 276 ++++++++++-------- 1 file changed, 153 insertions(+), 123 deletions(-) diff --git a/turbopack/crates/turbo-tasks-backend/src/lmdb_backing_storage.rs b/turbopack/crates/turbo-tasks-backend/src/lmdb_backing_storage.rs index 33837138bc343..4a81c117bc6a6 100644 --- a/turbopack/crates/turbo-tasks-backend/src/lmdb_backing_storage.rs +++ b/turbopack/crates/turbo-tasks-backend/src/lmdb_backing_storage.rs @@ -122,152 +122,182 @@ impl BackingStorage for LmdbBackingStorage { let mut tx = self.env.begin_rw_txn()?; let mut next_task_id = as_u32(tx.get(self.meta_db, &IntKey::new(META_KEY_NEXT_FREE_TASK_ID))).unwrap_or(1); - for (task_type, task_id) in task_cache_updates.iter() { - let task_id = **task_id; - let task_type_bytes = pot::to_vec(&**task_type) - .with_context(|| anyhow!("Unable to serialize task cache key {task_type:?}"))?; - #[cfg(feature = "verify_serialization")] - { - let deserialize: Result = serde_path_to_error::deserialize( - &mut pot::de::SymbolList::new().deserializer_for_slice(&task_type_bytes)?, - ); - if let Err(err) = deserialize { - println!( - "Task type would not be deserializable {task_id}: {err:?}\n{task_type:#?}" + { + let _span = tracing::trace_span!("update task cache", items = task_cache_updates.len()) + .entered(); + for (task_type, task_id) in task_cache_updates.iter() { + let task_id = **task_id; + let task_type_bytes = pot::to_vec(&**task_type) + .with_context(|| anyhow!("Unable to serialize task cache key {task_type:?}"))?; + #[cfg(feature = "verify_serialization")] + { + let deserialize: Result = serde_path_to_error::deserialize( + &mut pot::de::SymbolList::new().deserializer_for_slice(&task_type_bytes)?, ); - panic!("Task type would not be deserializable {task_id}: {err:?}"); + if let Err(err) = deserialize { + println!( + "Task type would not be deserializable {task_id}: \ + {err:?}\n{task_type:#?}" + ); + panic!("Task type would not be deserializable {task_id}: {err:?}"); + } } + extended_key::put( + &mut tx, + self.forward_task_cache_db, + &task_type_bytes, + &task_id.to_be_bytes(), + WriteFlags::empty(), + ) + .with_context(|| { + anyhow!("Unable to write task cache {task_type:?} => {task_id}") + })?; + tx.put( + self.reverse_task_cache_db, + &IntKey::new(task_id), + &task_type_bytes, + WriteFlags::empty(), + ) + .with_context(|| { + anyhow!("Unable to write task cache {task_id} => {task_type:?}") + })?; + op_count += 2; + next_task_id = next_task_id.max(task_id + 1); } - extended_key::put( - &mut tx, - self.forward_task_cache_db, - &task_type_bytes, - &task_id.to_be_bytes(), + tx.put( + self.meta_db, + &IntKey::new(META_KEY_NEXT_FREE_TASK_ID), + &next_task_id.to_be_bytes(), WriteFlags::empty(), ) - .with_context(|| anyhow!("Unable to write task cache {task_type:?} => {task_id}"))?; + .with_context(|| anyhow!("Unable to write next free task id"))?; + } + { + let _span = + tracing::trace_span!("update operations", operations = operations.len()).entered(); + let operations = pot::to_vec(&operations) + .with_context(|| anyhow!("Unable to serialize operations"))?; tx.put( - self.reverse_task_cache_db, - &IntKey::new(task_id), - &task_type_bytes, + self.meta_db, + &IntKey::new(META_KEY_OPERATIONS), + &operations, WriteFlags::empty(), ) - .with_context(|| anyhow!("Unable to write task cache {task_id} => {task_type:?}"))?; + .with_context(|| anyhow!("Unable to write operations"))?; op_count += 2; - next_task_id = next_task_id.max(task_id + 1); } - tx.put( - self.meta_db, - &IntKey::new(META_KEY_NEXT_FREE_TASK_ID), - &next_task_id.to_be_bytes(), - WriteFlags::empty(), - ) - .with_context(|| anyhow!("Unable to write next free task id"))?; - let operations = - pot::to_vec(&operations).with_context(|| anyhow!("Unable to serialize operations"))?; - tx.put( - self.meta_db, - &IntKey::new(META_KEY_OPERATIONS), - &operations, - WriteFlags::empty(), - ) - .with_context(|| anyhow!("Unable to write operations"))?; - op_count += 2; let mut updated_items: HashMap> = HashMap::new(); - for CachedDataUpdate { task, key, value } in data_updates.into_iter() { - let data = match updated_items.entry(task) { - Entry::Occupied(entry) => entry.into_mut(), - Entry::Vacant(entry) => { - let mut map = HashMap::new(); - if let Ok(old_data) = tx.get(self.data_db, &IntKey::new(*task)) { - let old_data: Vec = match pot::from_slice(old_data) { - Ok(d) => d, - Err(_) => serde_path_to_error::deserialize( - &mut pot::de::SymbolList::new().deserializer_for_slice(old_data)?, - ) - .with_context(|| { - anyhow!("Unable to deserialize old value of {task}: {old_data:?}") - })?, - }; - for item in old_data { - let (key, value) = item.into_key_and_value(); - map.insert(key, value); + { + let _span = + tracing::trace_span!("sort and restore task data", updates = data_updates.len()) + .entered(); + for CachedDataUpdate { task, key, value } in data_updates.into_iter() { + let data = match updated_items.entry(task) { + Entry::Occupied(entry) => entry.into_mut(), + Entry::Vacant(entry) => { + let mut map = HashMap::new(); + if let Ok(old_data) = tx.get(self.data_db, &IntKey::new(*task)) { + let old_data: Vec = match pot::from_slice(old_data) { + Ok(d) => d, + Err(_) => serde_path_to_error::deserialize( + &mut pot::de::SymbolList::new() + .deserializer_for_slice(old_data)?, + ) + .with_context(|| { + anyhow!( + "Unable to deserialize old value of {task}: {old_data:?}" + ) + })?, + }; + for item in old_data { + let (key, value) = item.into_key_and_value(); + map.insert(key, value); + } } + entry.insert(map) } - entry.insert(map) + }; + if let Some(value) = value { + data.insert(key, value); + } else { + data.remove(&key); } - }; - if let Some(value) = value { - data.insert(key, value); - } else { - data.remove(&key); } } - for (task_id, data) in updated_items { - let mut vec: Vec = data - .into_iter() - .map(|(key, value)| CachedDataItem::from_key_and_value(key, value)) - .collect(); - let value = match pot::to_vec(&vec) { - #[cfg(not(feature = "verify_serialization"))] - Ok(value) => value, - _ => { - let mut error = Ok(()); - vec.retain(|item| { - let mut buf = Vec::::new(); - let mut symbol_map = pot::ser::SymbolMap::new(); - let mut serializer = symbol_map.serializer_for(&mut buf).unwrap(); - if let Err(err) = serde_path_to_error::serialize(item, &mut serializer) { - if item.is_optional() { - println!("Skipping non-serializable optional item: {item:?}"); - } else { - error = Err(err).context({ - anyhow!( - "Unable to serialize data item for {task_id}: {item:#?}" - ) - }); - } - false - } else { - #[cfg(feature = "verify_serialization")] + { + let _span = + tracing::trace_span!("update task data", tasks = updated_items.len()).entered(); + for (task_id, data) in updated_items { + let mut vec: Vec = data + .into_iter() + .map(|(key, value)| CachedDataItem::from_key_and_value(key, value)) + .collect(); + let value = match pot::to_vec(&vec) { + #[cfg(not(feature = "verify_serialization"))] + Ok(value) => value, + _ => { + let mut error = Ok(()); + vec.retain(|item| { + let mut buf = Vec::::new(); + let mut symbol_map = pot::ser::SymbolMap::new(); + let mut serializer = symbol_map.serializer_for(&mut buf).unwrap(); + if let Err(err) = serde_path_to_error::serialize(item, &mut serializer) { - let deserialize: Result = - serde_path_to_error::deserialize( - &mut pot::de::SymbolList::new() - .deserializer_for_slice(&buf) - .unwrap(), - ); - if let Err(err) = deserialize { - println!( - "Data item would not be deserializable {task_id}: \ - {err:?}\n{item:#?}" - ); - return false; + if item.is_optional() { + println!("Skipping non-serializable optional item: {item:?}"); + } else { + error = Err(err).context({ + anyhow!( + "Unable to serialize data item for {task_id}: \ + {item:#?}" + ) + }); } + false + } else { + #[cfg(feature = "verify_serialization")] + { + let deserialize: Result = + serde_path_to_error::deserialize( + &mut pot::de::SymbolList::new() + .deserializer_for_slice(&buf) + .unwrap(), + ); + if let Err(err) = deserialize { + println!( + "Data item would not be deserializable {task_id}: \ + {err:?}\n{item:#?}" + ); + return false; + } + } + true } - true - } - }); - error?; + }); + error?; - pot::to_vec(&vec).with_context(|| { - anyhow!("Unable to serialize data items for {task_id}: {vec:#?}") - })? - } - }; - tx.put( - self.data_db, - &IntKey::new(*task_id), - &value, - WriteFlags::empty(), - ) - .with_context(|| anyhow!("Unable to write data items for {task_id}"))?; - op_count += 1; + pot::to_vec(&vec).with_context(|| { + anyhow!("Unable to serialize data items for {task_id}: {vec:#?}") + })? + } + }; + tx.put( + self.data_db, + &IntKey::new(*task_id), + &value, + WriteFlags::empty(), + ) + .with_context(|| anyhow!("Unable to write data items for {task_id}"))?; + op_count += 1; + } + } + { + let _span = tracing::trace_span!("commit").entered(); + tx.commit() + .with_context(|| anyhow!("Unable to commit operations"))?; } - tx.commit() - .with_context(|| anyhow!("Unable to commit operations"))?; println!( "Persisted {op_count} db entries after {:?}", start.elapsed() From ca084fc33cabe2b17dbb5fd4e6fd7808f55bfd0a Mon Sep 17 00:00:00 2001 From: Tobias Koppers Date: Wed, 25 Sep 2024 15:21:55 +0200 Subject: [PATCH 28/46] store task data and aggregation separately --- .../turbo-tasks-backend/src/backend/mod.rs | 52 ++-- .../backend/operation/aggregation_update.rs | 35 +-- .../backend/operation/cleanup_old_edges.rs | 25 +- .../src/backend/operation/connect_child.rs | 5 +- .../src/backend/operation/invalidate.rs | 3 +- .../src/backend/operation/mod.rs | 125 ++++++---- .../src/backend/operation/update_cell.rs | 5 +- .../src/backend/operation/update_output.rs | 3 +- .../src/backend/storage.rs | 77 +++++- .../src/backing_storage.rs | 5 +- .../crates/turbo-tasks-backend/src/data.rs | 35 ++- .../src/lmdb_backing_storage.rs | 230 ++++++++++-------- 12 files changed, 384 insertions(+), 216 deletions(-) diff --git a/turbopack/crates/turbo-tasks-backend/src/backend/mod.rs b/turbopack/crates/turbo-tasks-backend/src/backend/mod.rs index 62611dd7d9fc3..ee3ba1f11527b 100644 --- a/turbopack/crates/turbo-tasks-backend/src/backend/mod.rs +++ b/turbopack/crates/turbo-tasks-backend/src/backend/mod.rs @@ -22,6 +22,7 @@ use dashmap::DashMap; use parking_lot::{Condvar, Mutex}; use rustc_hash::FxHasher; use smallvec::smallvec; +pub use storage::TaskDataCategory; use turbo_tasks::{ backend::{ Backend, BackendJobId, CachedTaskType, CellContent, TaskExecutionSpec, TransientTaskRoot, @@ -101,7 +102,8 @@ pub struct TurboTasksBackend { task_cache: BiMap, TaskId>, transient_tasks: DashMap>, - persisted_storage_log: Mutex>, + persisted_storage_data_log: Mutex>, + persisted_storage_meta_log: Mutex>, storage: Storage, /// Number of executing operations + Highest bit is set when snapshot is @@ -140,7 +142,8 @@ impl TurboTasksBackend { persisted_task_cache_log: Mutex::new(ChunkedVec::new()), task_cache: BiMap::new(), transient_tasks: DashMap::new(), - persisted_storage_log: Mutex::new(ChunkedVec::new()), + persisted_storage_data_log: Mutex::new(ChunkedVec::new()), + persisted_storage_meta_log: Mutex::new(ChunkedVec::new()), storage: Storage::new(), in_progress_operations: AtomicUsize::new(0), snapshot_request: Mutex::new(SnapshotRequest::new()), @@ -205,6 +208,17 @@ impl TurboTasksBackend { } OperationGuard { backend: self } } + + fn persisted_storage_log( + &self, + category: TaskDataCategory, + ) -> &Mutex> { + match category { + TaskDataCategory::Data => &self.persisted_storage_data_log, + TaskDataCategory::Meta => &self.persisted_storage_meta_log, + TaskDataCategory::All => unreachable!(), + } + } } pub(crate) struct OperationGuard<'a> { @@ -246,7 +260,7 @@ impl TurboTasksBackend { turbo_tasks: &dyn TurboTasksBackendApi, ) -> Result> { let ctx = self.execute_context(turbo_tasks); - let mut task = ctx.task(task_id); + let mut task = ctx.task(task_id, TaskDataCategory::All); if let Some(in_progress) = get!(task, InProgress) { match in_progress { @@ -281,7 +295,7 @@ impl TurboTasksBackend { }, &ctx, ); - task = ctx.task(task_id); + task = ctx.task(task_id, TaskDataCategory::All); } // Check the dirty count of the root node @@ -328,7 +342,7 @@ impl TurboTasksBackend { }); drop(task); - let mut reader_task = ctx.task(reader); + let mut reader_task = ctx.task(reader, TaskDataCategory::Data); if reader_task .remove(&CachedDataItemKey::OutdatedOutputDependency { target: task_id }) .is_none() @@ -370,7 +384,7 @@ impl TurboTasksBackend { turbo_tasks: &dyn TurboTasksBackendApi, ) -> Result> { let ctx = self.execute_context(turbo_tasks); - let mut task = ctx.task(task_id); + let mut task = ctx.task(task_id, TaskDataCategory::Data); if let Some(content) = get!(task, CellData { cell }) { let content = content.clone(); if let Some(reader) = reader { @@ -381,7 +395,7 @@ impl TurboTasksBackend { }); drop(task); - let mut reader_task = ctx.task(reader); + let mut reader_task = ctx.task(reader, TaskDataCategory::Data); let target = CellRef { task: task_id, cell, @@ -494,7 +508,8 @@ impl TurboTasksBackend { .map(|op| op.arc().clone()) .collect::>(); drop(snapshot_request); - let persisted_storage_log = take(&mut *self.persisted_storage_log.lock()); + let persisted_storage_meta_log = take(&mut *self.persisted_storage_meta_log.lock()); + let persisted_storage_data_log = take(&mut *self.persisted_storage_data_log.lock()); let persisted_task_cache_log = take(&mut *self.persisted_task_cache_log.lock()); let mut snapshot_request = self.snapshot_request.lock(); snapshot_request.snapshot_requested = false; @@ -505,18 +520,25 @@ impl TurboTasksBackend { drop(snapshot_request); let mut counts: HashMap = HashMap::new(); - for CachedDataUpdate { task, .. } in persisted_storage_log.iter() { + for CachedDataUpdate { task, .. } in persisted_storage_data_log + .iter() + .chain(persisted_storage_meta_log.iter()) + { *counts.entry(*task).or_default() += 1; } let mut new_items = false; - if !persisted_task_cache_log.is_empty() || !persisted_storage_log.is_empty() { + if !persisted_task_cache_log.is_empty() + || !persisted_storage_meta_log.is_empty() + || !persisted_storage_data_log.is_empty() + { new_items = true; if let Err(err) = self.backing_storage.save_snapshot( suspended_operations, persisted_task_cache_log, - persisted_storage_log, + persisted_storage_meta_log, + persisted_storage_data_log, ) { println!("Persising failed: {:#?}", err); return None; @@ -655,7 +677,7 @@ impl Backend for TurboTasksBackend { return; } let ctx = self.execute_context(turbo_tasks); - let mut task = ctx.task(task_id); + let mut task = ctx.task(task_id, TaskDataCategory::Data); task.invalidate_serialization(); } @@ -696,7 +718,7 @@ impl Backend for TurboTasksBackend { }; { let ctx = self.execute_context(turbo_tasks); - let mut task = ctx.task(task_id); + let mut task = ctx.task(task_id, TaskDataCategory::Data); let in_progress = remove!(task, InProgress)?; let InProgressState::Scheduled { done_event } = in_progress else { task.add_new(CachedDataItem::InProgress { value: in_progress }); @@ -887,7 +909,7 @@ impl Backend for TurboTasksBackend { turbo_tasks: &dyn TurboTasksBackendApi, ) -> bool { let ctx = self.execute_context(turbo_tasks); - let mut task = ctx.task(task_id); + let mut task = ctx.task(task_id, TaskDataCategory::All); let Some(CachedDataItemValue::InProgress { value: in_progress }) = task.remove(&CachedDataItemKey::InProgress {}) else { @@ -1126,7 +1148,7 @@ impl Backend for TurboTasksBackend { turbo_tasks: &dyn TurboTasksBackendApi, ) -> Result { let ctx = self.execute_context(turbo_tasks); - let task = ctx.task(task_id); + let task = ctx.task(task_id, TaskDataCategory::Data); if let Some(content) = get!(task, CellData { cell }) { Ok(CellContent(Some(content.1.clone())).into_typed(cell.type_id)) } else { diff --git a/turbopack/crates/turbo-tasks-backend/src/backend/operation/aggregation_update.rs b/turbopack/crates/turbo-tasks-backend/src/backend/operation/aggregation_update.rs index b019caf3090e8..48ef106dcab3a 100644 --- a/turbopack/crates/turbo-tasks-backend/src/backend/operation/aggregation_update.rs +++ b/turbopack/crates/turbo-tasks-backend/src/backend/operation/aggregation_update.rs @@ -7,6 +7,7 @@ use crate::{ backend::{ operation::{ExecuteContext, Operation, TaskGuard}, storage::{get, get_many, iter_many, remove, update, update_count}, + TaskDataCategory, }, data::{ActiveType, AggregationNumber, CachedDataItem, CachedDataItemKey, RootState}, }; @@ -385,7 +386,7 @@ impl AggregationUpdateQueue { } fn balance_edge(&mut self, ctx: &ExecuteContext, upper_id: TaskId, task_id: TaskId) { - let (mut upper, mut task) = ctx.task_pair(upper_id, task_id); + let (mut upper, mut task) = ctx.task_pair(upper_id, task_id, TaskDataCategory::Meta); let upper_aggregation_number = get_aggregation_number(&upper); let task_aggregation_number = get_aggregation_number(&task); @@ -490,7 +491,7 @@ impl AggregationUpdateQueue { self.push(AggregationUpdateJob::FindAndScheduleDirty { task_ids }); } if let Some(task_id) = popped { - let mut task = ctx.task(task_id); + let mut task = ctx.task(task_id, TaskDataCategory::Meta); #[allow(clippy::collapsible_if, reason = "readablility")] if task.has_key(&CachedDataItemKey::Dirty {}) { let description = ctx.backend.get_task_desc_fn(task_id); @@ -522,7 +523,7 @@ impl AggregationUpdateQueue { update: AggregatedDataUpdate, ) { for upper_id in upper_ids { - let mut upper = ctx.task(upper_id); + let mut upper = ctx.task(upper_id, TaskDataCategory::Meta); let diff = update.apply(&mut upper, self); if !diff.is_empty() { let upper_ids = get_uppers(&upper); @@ -542,7 +543,7 @@ impl AggregationUpdateQueue { lost_follower_id: TaskId, mut upper_ids: Vec, ) { - let mut follower = ctx.task(lost_follower_id); + let mut follower = ctx.task(lost_follower_id, TaskDataCategory::Meta); let mut follower_in_upper_ids = Vec::new(); upper_ids.retain(|&upper_id| { let mut keep_upper = false; @@ -571,7 +572,7 @@ impl AggregationUpdateQueue { if !data.is_empty() { for upper_id in upper_ids.iter() { // remove data from upper - let mut upper = ctx.task(*upper_id); + let mut upper = ctx.task(*upper_id, TaskDataCategory::Meta); let diff = data.apply(&mut upper, self); if !diff.is_empty() { let upper_ids = get_uppers(&upper); @@ -593,7 +594,7 @@ impl AggregationUpdateQueue { } for upper_id in follower_in_upper_ids { - let mut upper = ctx.task(upper_id); + let mut upper = ctx.task(upper_id, TaskDataCategory::Meta); if update_count!( upper, Follower { @@ -617,12 +618,12 @@ impl AggregationUpdateQueue { mut upper_ids: Vec, ) { let follower_aggregation_number = { - let follower = ctx.task(new_follower_id); + let follower = ctx.task(new_follower_id, TaskDataCategory::Meta); get_aggregation_number(&follower) }; let mut upper_ids_as_follower = Vec::new(); upper_ids.retain(|&upper_id| { - let upper = ctx.task(upper_id); + let upper = ctx.task(upper_id, TaskDataCategory::Meta); // decide if it should be an inner or follower let upper_aggregation_number = get_aggregation_number(&upper); @@ -638,7 +639,7 @@ impl AggregationUpdateQueue { } }); if !upper_ids.is_empty() { - let mut follower = ctx.task(new_follower_id); + let mut follower = ctx.task(new_follower_id, TaskDataCategory::Meta); upper_ids.retain(|&upper_id| { if update_count!(follower, Upper { task: upper_id }, 1) { // It's a new upper @@ -656,7 +657,7 @@ impl AggregationUpdateQueue { if !data.is_empty() { for upper_id in upper_ids.iter() { // add data to upper - let mut upper = ctx.task(*upper_id); + let mut upper = ctx.task(*upper_id, TaskDataCategory::Meta); let diff = data.apply(&mut upper, self); if !diff.is_empty() { let upper_ids = get_uppers(&upper); @@ -678,7 +679,7 @@ impl AggregationUpdateQueue { } } upper_ids_as_follower.retain(|&upper_id| { - let mut upper = ctx.task(upper_id); + let mut upper = ctx.task(upper_id, TaskDataCategory::Meta); update_count!( upper, Follower { @@ -704,14 +705,14 @@ impl AggregationUpdateQueue { let mut followers_with_aggregation_number = new_follower_ids .into_iter() .map(|new_follower_id| { - let follower = ctx.task(new_follower_id); + let follower = ctx.task(new_follower_id, TaskDataCategory::Meta); (new_follower_id, get_aggregation_number(&follower)) }) .collect::>(); let mut followers_of_upper = Vec::new(); { - let upper = ctx.task(upper_id); + let upper = ctx.task(upper_id, TaskDataCategory::Meta); // decide if it should be an inner or follower let upper_aggregation_number = get_aggregation_number(&upper); @@ -734,7 +735,7 @@ impl AggregationUpdateQueue { let mut upper_data_updates = Vec::new(); let mut upper_new_followers = Vec::new(); for (follower_id, _) in followers_with_aggregation_number { - let mut follower = ctx.task(follower_id); + let mut follower = ctx.task(follower_id, TaskDataCategory::Meta); if update_count!(follower, Upper { task: upper_id }, 1) { // It's a new upper let data = AggregatedDataUpdate::from_task(&mut follower); @@ -755,7 +756,7 @@ impl AggregationUpdateQueue { } if !upper_data_updates.is_empty() { // add data to upper - let mut upper = ctx.task(upper_id); + let mut upper = ctx.task(upper_id, TaskDataCategory::Meta); let diffs = upper_data_updates .into_iter() .filter_map(|data| { @@ -782,7 +783,7 @@ impl AggregationUpdateQueue { } } if !followers_of_upper.is_empty() { - let mut upper = ctx.task(upper_id); + let mut upper = ctx.task(upper_id, TaskDataCategory::Meta); followers_of_upper .retain(|follower_id| update_count!(upper, Follower { task: *follower_id }, 1)); if !followers_of_upper.is_empty() { @@ -801,7 +802,7 @@ impl AggregationUpdateQueue { base_effective_distance: Option>, base_aggregation_number: u32, ) { - let mut task = ctx.task(task_id); + let mut task = ctx.task(task_id, TaskDataCategory::Meta); let current = get!(task, AggregationNumber).copied().unwrap_or_default(); // The wanted new distance is either the provided one or the old distance let distance = base_effective_distance.map_or(current.distance, |d| d.get()); diff --git a/turbopack/crates/turbo-tasks-backend/src/backend/operation/cleanup_old_edges.rs b/turbopack/crates/turbo-tasks-backend/src/backend/operation/cleanup_old_edges.rs index 66a65ea95acb4..46b7e1ec66cc4 100644 --- a/turbopack/crates/turbo-tasks-backend/src/backend/operation/cleanup_old_edges.rs +++ b/turbopack/crates/turbo-tasks-backend/src/backend/operation/cleanup_old_edges.rs @@ -4,13 +4,16 @@ use serde::{Deserialize, Serialize}; use turbo_tasks::TaskId; use crate::{ - backend::operation::{ - aggregation_update::{ - get_aggregation_number, get_uppers, is_aggregating_node, AggregationUpdateJob, - AggregationUpdateQueue, + backend::{ + operation::{ + aggregation_update::{ + get_aggregation_number, get_uppers, is_aggregating_node, AggregationUpdateJob, + AggregationUpdateQueue, + }, + invalidate::make_task_dirty, + ExecuteContext, Operation, }, - invalidate::make_task_dirty, - ExecuteContext, Operation, + TaskDataCategory, }, data::{CachedDataItemKey, CellRef}, }; @@ -69,7 +72,7 @@ impl Operation for CleanupOldEdgesOperation { if let Some(edge) = outdated.pop() { match edge { OutdatedEdge::Child(child_id) => { - let mut task = ctx.task(task_id); + let mut task = ctx.task(task_id, TaskDataCategory::All); task.remove(&CachedDataItemKey::Child { task: child_id }); if is_aggregating_node(get_aggregation_number(&task)) { queue.push(AggregationUpdateJob::InnerLostFollower { @@ -89,14 +92,14 @@ impl Operation for CleanupOldEdgesOperation { cell, }) => { { - let mut task = ctx.task(cell_task_id); + let mut task = ctx.task(cell_task_id, TaskDataCategory::Data); task.remove(&CachedDataItemKey::CellDependent { cell, task: task_id, }); } { - let mut task = ctx.task(task_id); + let mut task = ctx.task(task_id, TaskDataCategory::Data); task.remove(&CachedDataItemKey::CellDependency { target: CellRef { task: cell_task_id, @@ -107,13 +110,13 @@ impl Operation for CleanupOldEdgesOperation { } OutdatedEdge::OutputDependency(output_task_id) => { { - let mut task = ctx.task(output_task_id); + let mut task = ctx.task(output_task_id, TaskDataCategory::Data); task.remove(&CachedDataItemKey::OutputDependent { task: task_id, }); } { - let mut task = ctx.task(task_id); + let mut task = ctx.task(task_id, TaskDataCategory::Data); task.remove(&CachedDataItemKey::OutputDependency { target: output_task_id, }); diff --git a/turbopack/crates/turbo-tasks-backend/src/backend/operation/connect_child.rs b/turbopack/crates/turbo-tasks-backend/src/backend/operation/connect_child.rs index 98b07e9978cdf..e5d99de0106e0 100644 --- a/turbopack/crates/turbo-tasks-backend/src/backend/operation/connect_child.rs +++ b/turbopack/crates/turbo-tasks-backend/src/backend/operation/connect_child.rs @@ -12,6 +12,7 @@ use crate::{ is_root_node, ExecuteContext, Operation, }, storage::get, + TaskDataCategory, }, data::{CachedDataItem, CachedDataItemIndex, CachedDataItemKey}, }; @@ -29,7 +30,7 @@ pub enum ConnectChildOperation { impl ConnectChildOperation { pub fn run(parent_task_id: TaskId, child_task_id: TaskId, ctx: ExecuteContext<'_>) { - let mut parent_task = ctx.task(parent_task_id); + let mut parent_task = ctx.task(parent_task_id, TaskDataCategory::All); parent_task.remove(&CachedDataItemKey::OutdatedChild { task: child_task_id, }); @@ -108,7 +109,7 @@ impl ConnectChildOperation { drop(parent_task); { - let mut task = ctx.task(child_task_id); + let mut task = ctx.task(child_task_id, TaskDataCategory::Data); should_schedule = should_schedule || !task.has_key(&CachedDataItemKey::Output {}); if should_schedule { let description = ctx.backend.get_task_desc_fn(child_task_id); diff --git a/turbopack/crates/turbo-tasks-backend/src/backend/operation/invalidate.rs b/turbopack/crates/turbo-tasks-backend/src/backend/operation/invalidate.rs index 71c58a97ec92d..7131f9fe10026 100644 --- a/turbopack/crates/turbo-tasks-backend/src/backend/operation/invalidate.rs +++ b/turbopack/crates/turbo-tasks-backend/src/backend/operation/invalidate.rs @@ -11,6 +11,7 @@ use crate::{ ExecuteContext, Operation, }, storage::get, + TaskDataCategory, }, data::{CachedDataItem, CachedDataItemKey}, }; @@ -70,7 +71,7 @@ pub fn make_task_dirty(task_id: TaskId, queue: &mut AggregationUpdateQueue, ctx: return; } - let mut task = ctx.task(task_id); + let mut task = ctx.task(task_id, TaskDataCategory::All); if task.add(CachedDataItem::Dirty { value: () }) { let dirty_container = get!(task, AggregatedDirtyContainerCount) diff --git a/turbopack/crates/turbo-tasks-backend/src/backend/operation/mod.rs b/turbopack/crates/turbo-tasks-backend/src/backend/operation/mod.rs index 6ec3e823c4275..b2d4490853794 100644 --- a/turbopack/crates/turbo-tasks-backend/src/backend/operation/mod.rs +++ b/turbopack/crates/turbo-tasks-backend/src/backend/operation/mod.rs @@ -14,7 +14,10 @@ use serde::{Deserialize, Serialize}; use turbo_tasks::{KeyValuePair, TaskId, TurboTasksBackendApi}; use crate::{ - backend::{storage::StorageWriteGuard, OperationGuard, TransientTask, TurboTasksBackend}, + backend::{ + storage::StorageWriteGuard, OperationGuard, TaskDataCategory, TransientTask, + TurboTasksBackend, + }, data::{ CachedDataItem, CachedDataItemIndex, CachedDataItemKey, CachedDataItemValue, CachedDataUpdate, @@ -51,21 +54,25 @@ impl<'a> ExecuteContext<'a> { } } - pub fn task(&self, task_id: TaskId) -> TaskGuard<'a> { + pub fn task(&self, task_id: TaskId, category: TaskDataCategory) -> TaskGuard<'a> { let mut task = self.backend.storage.access_mut(task_id); - if !task.persistance_state().is_restored() { + if !task.persistance_state().is_restored(category) { if task_id.is_transient() { - task.persistance_state_mut().set_restored(); + task.persistance_state_mut() + .set_restored(TaskDataCategory::All); } else { - // Avoid holding the lock too long since this can also affect other tasks - drop(task); - let items = self.backend.backing_storage.lookup_data(task_id); - task = self.backend.storage.access_mut(task_id); - if !task.persistance_state().is_restored() { - for item in items { - task.add(item); + for category in category { + // Avoid holding the lock too long since this can also affect other tasks + drop(task); + + let items = self.backend.backing_storage.lookup_data(task_id, category); + task = self.backend.storage.access_mut(task_id); + if !task.persistance_state().is_restored(category) { + for item in items { + task.add(item); + } + task.persistance_state_mut().set_restored(category); } - task.persistance_state_mut().set_restored(); } } } @@ -87,34 +94,41 @@ impl<'a> ExecuteContext<'a> { } } - pub fn task_pair(&self, task_id1: TaskId, task_id2: TaskId) -> (TaskGuard<'a>, TaskGuard<'a>) { + pub fn task_pair( + &self, + task_id1: TaskId, + task_id2: TaskId, + category: TaskDataCategory, + ) -> (TaskGuard<'a>, TaskGuard<'a>) { let (mut task1, mut task2) = self.backend.storage.access_pair_mut(task_id1, task_id2); - let is_restored1 = task1.persistance_state().is_restored(); - let is_restored2 = task2.persistance_state().is_restored(); + let is_restored1 = task1.persistance_state().is_restored(category); + let is_restored2 = task2.persistance_state().is_restored(category); if !is_restored1 || !is_restored2 { - // Avoid holding the lock too long since this can also affect other tasks - drop(task1); - drop(task2); - - let items1 = - (!is_restored1).then(|| self.backend.backing_storage.lookup_data(task_id1)); - let items2 = - (!is_restored2).then(|| self.backend.backing_storage.lookup_data(task_id2)); - - let (t1, t2) = self.backend.storage.access_pair_mut(task_id1, task_id2); - task1 = t1; - task2 = t2; - if !task1.persistance_state().is_restored() { - for item in items1.unwrap() { - task1.add(item); + for category in category { + // Avoid holding the lock too long since this can also affect other tasks + drop(task1); + drop(task2); + + let items1 = (!is_restored1) + .then(|| self.backend.backing_storage.lookup_data(task_id1, category)); + let items2 = (!is_restored2) + .then(|| self.backend.backing_storage.lookup_data(task_id2, category)); + + let (t1, t2) = self.backend.storage.access_pair_mut(task_id1, task_id2); + task1 = t1; + task2 = t2; + if !task1.persistance_state().is_restored(category) { + for item in items1.unwrap() { + task1.add(item); + } + task1.persistance_state_mut().set_restored(category); } - task1.persistance_state_mut().set_restored(); - } - if !task2.persistance_state().is_restored() { - for item in items2.unwrap() { - task2.add(item); + if !task2.persistance_state().is_restored(category) { + for item in items2.unwrap() { + task2.add(item); + } + task2.persistance_state_mut().set_restored(category); } - task2.persistance_state_mut().set_restored(); } } ( @@ -207,7 +221,7 @@ impl TaskGuard<'_> { let (key, value) = item.into_key_and_value(); self.task.persistance_state_mut().add_persisting_item(); self.backend - .persisted_storage_log + .persisted_storage_log(key.category()) .lock() .push(CachedDataUpdate { key, @@ -237,7 +251,7 @@ impl TaskGuard<'_> { )); self.task.persistance_state_mut().add_persisting_item(); self.backend - .persisted_storage_log + .persisted_storage_log(key.category()) .lock() .push(CachedDataUpdate { key, @@ -251,7 +265,7 @@ impl TaskGuard<'_> { if old.is_persistent() { self.task.persistance_state_mut().add_persisting_item(); self.backend - .persisted_storage_log + .persisted_storage_log(key.category()) .lock() .push(CachedDataUpdate { key, @@ -290,19 +304,25 @@ impl TaskGuard<'_> { (false, false) => {} (true, false) => { add_persisting_item = true; - backend.persisted_storage_log.lock().push(CachedDataUpdate { - key: key.clone(), - task: *task_id, - value: None, - }); + backend + .persisted_storage_log(key.category()) + .lock() + .push(CachedDataUpdate { + key: key.clone(), + task: *task_id, + value: None, + }); } (_, true) => { add_persisting_item = true; - backend.persisted_storage_log.lock().push(CachedDataUpdate { - key: key.clone(), - task: *task_id, - value: new.clone(), - }); + backend + .persisted_storage_log(key.category()) + .lock() + .push(CachedDataUpdate { + key: key.clone(), + task: *task_id, + value: new.clone(), + }); } } @@ -320,7 +340,7 @@ impl TaskGuard<'_> { let key = key.clone(); self.task.persistance_state_mut().add_persisting_item(); self.backend - .persisted_storage_log + .persisted_storage_log(key.category()) .lock() .push(CachedDataUpdate { key, @@ -375,7 +395,10 @@ impl TaskGuard<'_> { _ => None, }); { - let mut guard = self.backend.persisted_storage_log.lock(); + let mut guard = self + .backend + .persisted_storage_log(TaskDataCategory::Data) + .lock(); guard.extend(cell_data); self.task .persistance_state_mut() diff --git a/turbopack/crates/turbo-tasks-backend/src/backend/operation/update_cell.rs b/turbopack/crates/turbo-tasks-backend/src/backend/operation/update_cell.rs index 705ab5660f3e1..90fb7e8daf3d1 100644 --- a/turbopack/crates/turbo-tasks-backend/src/backend/operation/update_cell.rs +++ b/turbopack/crates/turbo-tasks-backend/src/backend/operation/update_cell.rs @@ -4,6 +4,7 @@ use crate::{ backend::{ operation::{ExecuteContext, InvalidateOperation}, storage::{get_many, remove}, + TaskDataCategory, }, data::{CachedDataItem, CachedDataItemKey}, }; @@ -11,8 +12,8 @@ use crate::{ pub struct UpdateCellOperation; impl UpdateCellOperation { - pub fn run(task: TaskId, cell: CellId, content: CellContent, ctx: ExecuteContext<'_>) { - let mut task = ctx.task(task); + pub fn run(task_id: TaskId, cell: CellId, content: CellContent, ctx: ExecuteContext<'_>) { + let mut task = ctx.task(task_id, TaskDataCategory::All); let old_content = if let CellContent(Some(new_content)) = content { task.insert(CachedDataItem::CellData { cell, diff --git a/turbopack/crates/turbo-tasks-backend/src/backend/operation/update_output.rs b/turbopack/crates/turbo-tasks-backend/src/backend/operation/update_output.rs index efcee8546d8f5..be107e17fbbd0 100644 --- a/turbopack/crates/turbo-tasks-backend/src/backend/operation/update_output.rs +++ b/turbopack/crates/turbo-tasks-backend/src/backend/operation/update_output.rs @@ -7,6 +7,7 @@ use crate::{ backend::{ operation::{ExecuteContext, InvalidateOperation}, storage::get_many, + TaskDataCategory, }, data::{CachedDataItem, CachedDataItemKey, CachedDataItemValue, CellRef, OutputValue}, }; @@ -19,7 +20,7 @@ impl UpdateOutputOperation { output: Result, Option>>, ctx: ExecuteContext<'_>, ) { - let mut task = ctx.task(task_id); + let mut task = ctx.task(task_id, TaskDataCategory::Data); let old_error = task.remove(&CachedDataItemKey::Error {}); let current_output = task.get(&CachedDataItemKey::Output {}); let output_value = match output { diff --git a/turbopack/crates/turbo-tasks-backend/src/backend/storage.rs b/turbopack/crates/turbo-tasks-backend/src/backend/storage.rs index 82db7c81f5c86..1ad5898002cb5 100644 --- a/turbopack/crates/turbo-tasks-backend/src/backend/storage.rs +++ b/turbopack/crates/turbo-tasks-backend/src/backend/storage.rs @@ -16,7 +16,68 @@ use crate::{ utils::dash_map_multi::{get_multiple_mut, RefMut}, }; -const UNRESTORED: u32 = u32::MAX; +const META_UNRESTORED: u32 = 1 << 31; +const DATA_UNRESTORED: u32 = 1 << 30; + +#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash)] +pub enum TaskDataCategory { + Meta, + Data, + All, +} + +impl TaskDataCategory { + pub fn flag(&self) -> u32 { + match self { + TaskDataCategory::Meta => META_UNRESTORED, + TaskDataCategory::Data => DATA_UNRESTORED, + TaskDataCategory::All => META_UNRESTORED | DATA_UNRESTORED, + } + } +} + +impl IntoIterator for TaskDataCategory { + type Item = TaskDataCategory; + + type IntoIter = TaskDataCategoryIterator; + + fn into_iter(self) -> Self::IntoIter { + match self { + TaskDataCategory::Meta => TaskDataCategoryIterator::Meta, + TaskDataCategory::Data => TaskDataCategoryIterator::Data, + TaskDataCategory::All => TaskDataCategoryIterator::All, + } + } +} + +pub enum TaskDataCategoryIterator { + All, + Meta, + Data, + None, +} + +impl Iterator for TaskDataCategoryIterator { + type Item = TaskDataCategory; + + fn next(&mut self) -> Option { + match self { + TaskDataCategoryIterator::All => { + *self = TaskDataCategoryIterator::Data; + Some(TaskDataCategory::Meta) + } + TaskDataCategoryIterator::Meta => { + *self = TaskDataCategoryIterator::None; + Some(TaskDataCategory::Meta) + } + TaskDataCategoryIterator::Data => { + *self = TaskDataCategoryIterator::None; + Some(TaskDataCategory::Data) + } + TaskDataCategoryIterator::None => None, + } + } +} pub struct PersistanceState { value: u32, @@ -24,13 +85,15 @@ pub struct PersistanceState { impl Default for PersistanceState { fn default() -> Self { - Self { value: UNRESTORED } + Self { + value: META_UNRESTORED | DATA_UNRESTORED, + } } } impl PersistanceState { - pub fn set_restored(&mut self) { - self.value = 0; + pub fn set_restored(&mut self, category: TaskDataCategory) { + self.value &= !category.flag(); } pub fn add_persisting_item(&mut self) { @@ -45,12 +108,12 @@ impl PersistanceState { self.value -= count; } - pub fn is_restored(&self) -> bool { - self.value != UNRESTORED + pub fn is_restored(&self, category: TaskDataCategory) -> bool { + self.value & category.flag() == 0 } pub fn is_fully_persisted(&self) -> bool { - self.value == 0 + self.value & !TaskDataCategory::All.flag() == 0 } } diff --git a/turbopack/crates/turbo-tasks-backend/src/backing_storage.rs b/turbopack/crates/turbo-tasks-backend/src/backing_storage.rs index cb1b2da8a4129..3082785f3ad8d 100644 --- a/turbopack/crates/turbo-tasks-backend/src/backing_storage.rs +++ b/turbopack/crates/turbo-tasks-backend/src/backing_storage.rs @@ -4,7 +4,7 @@ use anyhow::Result; use turbo_tasks::{backend::CachedTaskType, TaskId}; use crate::{ - backend::AnyOperation, + backend::{AnyOperation, TaskDataCategory}, data::{CachedDataItem, CachedDataUpdate}, utils::chunked_vec::ChunkedVec, }; @@ -16,9 +16,10 @@ pub trait BackingStorage { &self, operations: Vec>, task_cache_updates: ChunkedVec<(Arc, TaskId)>, + meta_updates: ChunkedVec, data_updates: ChunkedVec, ) -> Result<()>; fn forward_lookup_task_cache(&self, key: &CachedTaskType) -> Option; fn reverse_lookup_task_cache(&self, task_id: TaskId) -> Option>; - fn lookup_data(&self, task_id: TaskId) -> Vec; + fn lookup_data(&self, task_id: TaskId, category: TaskDataCategory) -> Vec; } diff --git a/turbopack/crates/turbo-tasks-backend/src/data.rs b/turbopack/crates/turbo-tasks-backend/src/data.rs index 2b49683805756..15b64eb8ca995 100644 --- a/turbopack/crates/turbo-tasks-backend/src/data.rs +++ b/turbopack/crates/turbo-tasks-backend/src/data.rs @@ -6,7 +6,7 @@ use turbo_tasks::{ CellId, KeyValuePair, TaskId, TypedSharedReference, ValueTypeId, }; -use crate::backend::indexed::Indexed; +use crate::backend::{indexed::Indexed, TaskDataCategory}; #[derive(Debug, Copy, Clone, Hash, PartialEq, Eq, Serialize, Deserialize)] pub struct CellRef { @@ -343,6 +343,39 @@ impl CachedDataItemKey { CachedDataItemKey::Error { .. } => false, } } + + pub fn category(&self) -> TaskDataCategory { + match self { + CachedDataItemKey::Output { .. } + | CachedDataItemKey::Collectible { .. } + | CachedDataItemKey::Child { .. } + | CachedDataItemKey::CellData { .. } + | CachedDataItemKey::CellTypeMaxIndex { .. } + | CachedDataItemKey::OutputDependency { .. } + | CachedDataItemKey::CellDependency { .. } + | CachedDataItemKey::CollectiblesDependency { .. } + | CachedDataItemKey::OutputDependent { .. } + | CachedDataItemKey::CellDependent { .. } + | CachedDataItemKey::CollectiblesDependent { .. } + | CachedDataItemKey::InProgress { .. } + | CachedDataItemKey::InProgressCell { .. } + | CachedDataItemKey::OutdatedCollectible { .. } + | CachedDataItemKey::OutdatedOutputDependency { .. } + | CachedDataItemKey::OutdatedCellDependency { .. } + | CachedDataItemKey::OutdatedChild { .. } + | CachedDataItemKey::Error { .. } => TaskDataCategory::Data, + + CachedDataItemKey::AggregationNumber { .. } + | CachedDataItemKey::Dirty { .. } + | CachedDataItemKey::DirtyWhenPersisted { .. } + | CachedDataItemKey::Follower { .. } + | CachedDataItemKey::Upper { .. } + | CachedDataItemKey::AggregatedDirtyContainer { .. } + | CachedDataItemKey::AggregatedCollectible { .. } + | CachedDataItemKey::AggregatedDirtyContainerCount { .. } + | CachedDataItemKey::AggregateRoot { .. } => TaskDataCategory::Meta, + } + } } #[derive(Debug, Clone, Copy, PartialEq, Eq, Hash)] diff --git a/turbopack/crates/turbo-tasks-backend/src/lmdb_backing_storage.rs b/turbopack/crates/turbo-tasks-backend/src/lmdb_backing_storage.rs index 4a81c117bc6a6..508c1da87e9a7 100644 --- a/turbopack/crates/turbo-tasks-backend/src/lmdb_backing_storage.rs +++ b/turbopack/crates/turbo-tasks-backend/src/lmdb_backing_storage.rs @@ -16,7 +16,7 @@ use tracing::Span; use turbo_tasks::{backend::CachedTaskType, KeyValuePair, TaskId}; use crate::{ - backend::AnyOperation, + backend::{AnyOperation, TaskDataCategory}, backing_storage::BackingStorage, data::{CachedDataItem, CachedDataItemKey, CachedDataItemValue, CachedDataUpdate}, utils::chunked_vec::ChunkedVec, @@ -47,8 +47,9 @@ fn as_u32(result: Result<&[u8], E>) -> Result< pub struct LmdbBackingStorage { env: Environment, - meta_db: Database, + infra_db: Database, data_db: Database, + meta_db: Database, forward_task_cache_db: Database, reverse_task_cache_db: Database, } @@ -64,23 +65,33 @@ impl LmdbBackingStorage { | EnvironmentFlags::NO_TLS, ) .set_max_readers((available_parallelism().map_or(16, |v| v.get()) * 8) as u32) - .set_max_dbs(4) + .set_max_dbs(5) .set_map_size(20 * 1024 * 1024 * 1024) .open(path)?; - let meta_db = env.create_db(Some("meta"), DatabaseFlags::INTEGER_KEY)?; + let infra_db = env.create_db(Some("infra"), DatabaseFlags::INTEGER_KEY)?; let data_db = env.create_db(Some("data"), DatabaseFlags::INTEGER_KEY)?; + let meta_db = env.create_db(Some("meta"), DatabaseFlags::INTEGER_KEY)?; let forward_task_cache_db = env.create_db(Some("forward_task_cache"), DatabaseFlags::empty())?; let reverse_task_cache_db = env.create_db(Some("reverse_task_cache"), DatabaseFlags::INTEGER_KEY)?; Ok(Self { env, - meta_db, + infra_db, data_db, + meta_db, forward_task_cache_db, reverse_task_cache_db, }) } + + fn db(&self, category: TaskDataCategory) -> Database { + match category { + TaskDataCategory::Meta => self.meta_db, + TaskDataCategory::Data => self.data_db, + _ => unreachable!(), + } + } } impl BackingStorage for LmdbBackingStorage { @@ -88,7 +99,7 @@ impl BackingStorage for LmdbBackingStorage { fn get(this: &LmdbBackingStorage) -> Result { let tx = this.env.begin_rw_txn()?; let next_free_task_id = - as_u32(tx.get(this.meta_db, &IntKey::new(META_KEY_NEXT_FREE_TASK_ID)))?; + as_u32(tx.get(this.infra_db, &IntKey::new(META_KEY_NEXT_FREE_TASK_ID)))?; Ok(next_free_task_id) } TaskId::from(get(self).unwrap_or(1)) @@ -97,7 +108,7 @@ impl BackingStorage for LmdbBackingStorage { fn uncompleted_operations(&self) -> Vec { fn get(this: &LmdbBackingStorage) -> Result> { let tx = this.env.begin_ro_txn()?; - let operations = tx.get(this.meta_db, &IntKey::new(META_KEY_OPERATIONS))?; + let operations = tx.get(this.infra_db, &IntKey::new(META_KEY_OPERATIONS))?; let operations = pot::from_slice(operations)?; Ok(operations) } @@ -109,19 +120,21 @@ impl BackingStorage for LmdbBackingStorage { &self, operations: Vec>, task_cache_updates: ChunkedVec<(Arc, TaskId)>, + meta_updates: ChunkedVec, data_updates: ChunkedVec, ) -> Result<()> { println!( - "Persisting {} operations, {} task cache updates, {} data updates...", + "Persisting {} operations, {} task cache updates, {} meta updates, {} data updates...", operations.len(), task_cache_updates.len(), + meta_updates.len(), data_updates.len() ); let start = Instant::now(); let mut op_count = 0; let mut tx = self.env.begin_rw_txn()?; let mut next_task_id = - as_u32(tx.get(self.meta_db, &IntKey::new(META_KEY_NEXT_FREE_TASK_ID))).unwrap_or(1); + as_u32(tx.get(self.infra_db, &IntKey::new(META_KEY_NEXT_FREE_TASK_ID))).unwrap_or(1); { let _span = tracing::trace_span!("update task cache", items = task_cache_updates.len()) .entered(); @@ -165,7 +178,7 @@ impl BackingStorage for LmdbBackingStorage { next_task_id = next_task_id.max(task_id + 1); } tx.put( - self.meta_db, + self.infra_db, &IntKey::new(META_KEY_NEXT_FREE_TASK_ID), &next_task_id.to_be_bytes(), WriteFlags::empty(), @@ -178,7 +191,7 @@ impl BackingStorage for LmdbBackingStorage { let operations = pot::to_vec(&operations) .with_context(|| anyhow!("Unable to serialize operations"))?; tx.put( - self.meta_db, + self.infra_db, &IntKey::new(META_KEY_OPERATIONS), &operations, WriteFlags::empty(), @@ -187,110 +200,114 @@ impl BackingStorage for LmdbBackingStorage { op_count += 2; } - let mut updated_items: HashMap> = - HashMap::new(); - { - let _span = - tracing::trace_span!("sort and restore task data", updates = data_updates.len()) - .entered(); - for CachedDataUpdate { task, key, value } in data_updates.into_iter() { - let data = match updated_items.entry(task) { - Entry::Occupied(entry) => entry.into_mut(), - Entry::Vacant(entry) => { - let mut map = HashMap::new(); - if let Ok(old_data) = tx.get(self.data_db, &IntKey::new(*task)) { - let old_data: Vec = match pot::from_slice(old_data) { - Ok(d) => d, - Err(_) => serde_path_to_error::deserialize( - &mut pot::de::SymbolList::new() - .deserializer_for_slice(old_data)?, - ) - .with_context(|| { - anyhow!( - "Unable to deserialize old value of {task}: {old_data:?}" + for (db, updates) in [(self.meta_db, meta_updates), (self.data_db, data_updates)] { + let mut updated_items: HashMap< + TaskId, + HashMap, + > = HashMap::new(); + { + let _span = + tracing::trace_span!("sort and restore task data", updates = updates.len()) + .entered(); + for CachedDataUpdate { task, key, value } in updates.into_iter() { + let data = match updated_items.entry(task) { + Entry::Occupied(entry) => entry.into_mut(), + Entry::Vacant(entry) => { + let mut map = HashMap::new(); + if let Ok(old_data) = tx.get(db, &IntKey::new(*task)) { + let old_data: Vec = match pot::from_slice(old_data) + { + Ok(d) => d, + Err(_) => serde_path_to_error::deserialize( + &mut pot::de::SymbolList::new() + .deserializer_for_slice(old_data)?, ) - })?, - }; - for item in old_data { - let (key, value) = item.into_key_and_value(); - map.insert(key, value); + .with_context(|| { + anyhow!( + "Unable to deserialize old value of {task}: \ + {old_data:?}" + ) + })?, + }; + for item in old_data { + let (key, value) = item.into_key_and_value(); + map.insert(key, value); + } } + entry.insert(map) } - entry.insert(map) + }; + if let Some(value) = value { + data.insert(key, value); + } else { + data.remove(&key); } - }; - if let Some(value) = value { - data.insert(key, value); - } else { - data.remove(&key); } } - } - { - let _span = - tracing::trace_span!("update task data", tasks = updated_items.len()).entered(); - for (task_id, data) in updated_items { - let mut vec: Vec = data - .into_iter() - .map(|(key, value)| CachedDataItem::from_key_and_value(key, value)) - .collect(); - let value = match pot::to_vec(&vec) { - #[cfg(not(feature = "verify_serialization"))] - Ok(value) => value, - _ => { - let mut error = Ok(()); - vec.retain(|item| { - let mut buf = Vec::::new(); - let mut symbol_map = pot::ser::SymbolMap::new(); - let mut serializer = symbol_map.serializer_for(&mut buf).unwrap(); - if let Err(err) = serde_path_to_error::serialize(item, &mut serializer) - { - if item.is_optional() { - println!("Skipping non-serializable optional item: {item:?}"); - } else { - error = Err(err).context({ - anyhow!( - "Unable to serialize data item for {task_id}: \ - {item:#?}" - ) - }); - } - false - } else { - #[cfg(feature = "verify_serialization")] + { + let _span = + tracing::trace_span!("update task data", tasks = updated_items.len()).entered(); + for (task_id, data) in updated_items { + let mut vec: Vec = data + .into_iter() + .map(|(key, value)| CachedDataItem::from_key_and_value(key, value)) + .collect(); + let value = match pot::to_vec(&vec) { + #[cfg(not(feature = "verify_serialization"))] + Ok(value) => value, + _ => { + let mut error = Ok(()); + vec.retain(|item| { + let mut buf = Vec::::new(); + let mut symbol_map = pot::ser::SymbolMap::new(); + let mut serializer = symbol_map.serializer_for(&mut buf).unwrap(); + if let Err(err) = + serde_path_to_error::serialize(item, &mut serializer) { - let deserialize: Result = - serde_path_to_error::deserialize( - &mut pot::de::SymbolList::new() - .deserializer_for_slice(&buf) - .unwrap(), - ); - if let Err(err) = deserialize { + if item.is_optional() { println!( - "Data item would not be deserializable {task_id}: \ - {err:?}\n{item:#?}" + "Skipping non-serializable optional item: {item:?}" ); - return false; + } else { + error = Err(err).context({ + anyhow!( + "Unable to serialize data item for {task_id}: \ + {item:#?}" + ) + }); + } + false + } else { + #[cfg(feature = "verify_serialization")] + { + let deserialize: Result = + serde_path_to_error::deserialize( + &mut pot::de::SymbolList::new() + .deserializer_for_slice(&buf) + .unwrap(), + ); + if let Err(err) = deserialize { + println!( + "Data item would not be deserializable {task_id}: \ + {err:?}\n{item:#?}" + ); + return false; + } } + true } - true - } - }); - error?; + }); + error?; - pot::to_vec(&vec).with_context(|| { - anyhow!("Unable to serialize data items for {task_id}: {vec:#?}") - })? - } - }; - tx.put( - self.data_db, - &IntKey::new(*task_id), - &value, - WriteFlags::empty(), - ) - .with_context(|| anyhow!("Unable to write data items for {task_id}"))?; - op_count += 1; + pot::to_vec(&vec).with_context(|| { + anyhow!("Unable to serialize data items for {task_id}: {vec:#?}") + })? + } + }; + tx.put(db, &IntKey::new(*task_id), &value, WriteFlags::empty()) + .with_context(|| anyhow!("Unable to write data items for {task_id}"))?; + op_count += 1; + } } } { @@ -365,15 +382,16 @@ impl BackingStorage for LmdbBackingStorage { Some(result) } - fn lookup_data(&self, task_id: TaskId) -> Vec { + fn lookup_data(&self, task_id: TaskId, category: TaskDataCategory) -> Vec { let span = tracing::trace_span!("restore data", bytes = 0usize, items = 0usize).entered(); fn lookup( this: &LmdbBackingStorage, task_id: TaskId, + category: TaskDataCategory, span: &Span, ) -> Result> { let tx = this.env.begin_ro_txn()?; - let bytes = match tx.get(this.data_db, &IntKey::new(*task_id)) { + let bytes = match tx.get(this.db(category), &IntKey::new(*task_id)) { Ok(bytes) => bytes, Err(err) => { if err == lmdb::Error::NotFound { @@ -389,7 +407,7 @@ impl BackingStorage for LmdbBackingStorage { tx.commit()?; Ok(result) } - lookup(self, task_id, &span) + lookup(self, task_id, category, &span) .inspect_err(|err| println!("Looking up data for {task_id} failed: {err:?}")) .unwrap_or_default() } From a50a27450643c629f10a5a5daf554b84dab1f96f Mon Sep 17 00:00:00 2001 From: Tobias Koppers Date: Tue, 24 Sep 2024 16:04:51 +0200 Subject: [PATCH 29/46] compare with old value before storing to db --- .../src/backend/operation/mod.rs | 22 ++++- .../crates/turbo-tasks-backend/src/data.rs | 39 ++++++-- .../src/lmdb_backing_storage.rs | 92 ++++++++++++------- .../src/derive/key_value_pair_macro.rs | 2 +- 4 files changed, 108 insertions(+), 47 deletions(-) diff --git a/turbopack/crates/turbo-tasks-backend/src/backend/operation/mod.rs b/turbopack/crates/turbo-tasks-backend/src/backend/operation/mod.rs index b2d4490853794..da7832d0b3b0c 100644 --- a/turbopack/crates/turbo-tasks-backend/src/backend/operation/mod.rs +++ b/turbopack/crates/turbo-tasks-backend/src/backend/operation/mod.rs @@ -227,6 +227,7 @@ impl TaskGuard<'_> { key, task: self.task_id, value: Some(value), + old_value: None, }); true } else { @@ -257,6 +258,9 @@ impl TaskGuard<'_> { key, task: self.task_id, value: Some(value), + old_value: old + .as_ref() + .and_then(|old| old.is_persistent().then(|| old.clone())), }); old } else { @@ -271,6 +275,7 @@ impl TaskGuard<'_> { key, task: self.task_id, value: None, + old_value: Some(old.clone()), }); } Some(old) @@ -296,13 +301,16 @@ impl TaskGuard<'_> { } = self; let mut add_persisting_item = false; task.update(key, |old| { - let old_persistent = old.as_ref().map(|old| old.is_persistent()).unwrap_or(false); + let old_value_when_persistent = old + .as_ref() + .map(|old| old.is_persistent().then(|| old.clone())) + .flatten(); let new = update(old); let new_persistent = new.as_ref().map(|new| new.is_persistent()).unwrap_or(false); - match (old_persistent, new_persistent) { - (false, false) => {} - (true, false) => { + match (old_value_when_persistent, new_persistent) { + (None, false) => {} + (Some(old_value), false) => { add_persisting_item = true; backend .persisted_storage_log(key.category()) @@ -311,9 +319,10 @@ impl TaskGuard<'_> { key: key.clone(), task: *task_id, value: None, + old_value: Some(old_value), }); } - (_, true) => { + (old_value, true) => { add_persisting_item = true; backend .persisted_storage_log(key.category()) @@ -322,6 +331,7 @@ impl TaskGuard<'_> { key: key.clone(), task: *task_id, value: new.clone(), + old_value, }); } } @@ -346,6 +356,7 @@ impl TaskGuard<'_> { key, task: self.task_id, value: None, + old_value: value.is_persistent().then(|| value.clone()), }); } Some(value) @@ -390,6 +401,7 @@ impl TaskGuard<'_> { value: Some(CachedDataItemValue::CellData { value: value.clone(), }), + old_value: None, }) } _ => None, diff --git a/turbopack/crates/turbo-tasks-backend/src/data.rs b/turbopack/crates/turbo-tasks-backend/src/data.rs index 15b64eb8ca995..62285cb04436f 100644 --- a/turbopack/crates/turbo-tasks-backend/src/data.rs +++ b/turbopack/crates/turbo-tasks-backend/src/data.rs @@ -53,6 +53,20 @@ impl RootState { } } +impl Clone for RootState { + fn clone(&self) -> Self { + panic!("RootState cannot be cloned"); + } +} + +impl PartialEq for RootState { + fn eq(&self, _other: &Self) -> bool { + panic!("RootState cannot be compared"); + } +} + +impl Eq for RootState {} + #[derive(Debug, Clone, Copy)] pub enum ActiveType { RootTask, @@ -63,12 +77,6 @@ pub enum ActiveType { CachedActiveUntilClean, } -impl Clone for RootState { - fn clone(&self) -> Self { - panic!("RootState cannot be cloned"); - } -} - #[derive(Debug)] pub enum InProgressState { Scheduled { @@ -88,6 +96,14 @@ impl Clone for InProgressState { } } +impl PartialEq for InProgressState { + fn eq(&self, _other: &Self) -> bool { + panic!("InProgressState cannot be compared"); + } +} + +impl Eq for InProgressState {} + #[derive(Debug)] pub struct InProgressCellState { pub event: Event, @@ -99,6 +115,14 @@ impl Clone for InProgressCellState { } } +impl PartialEq for InProgressCellState { + fn eq(&self, _other: &Self) -> bool { + panic!("InProgressCell cannot be compared"); + } +} + +impl Eq for InProgressCellState {} + impl InProgressCellState { pub fn new(task_id: TaskId, cell: CellId) -> Self { InProgressCellState { @@ -109,7 +133,7 @@ impl InProgressCellState { } } -#[derive(Debug, Clone, Copy, Default, Serialize, Deserialize)] +#[derive(Debug, Clone, Copy, Default, PartialEq, Eq, Serialize, Deserialize)] pub struct AggregationNumber { pub base: u32, pub distance: u32, @@ -459,4 +483,5 @@ pub struct CachedDataUpdate { pub task: TaskId, pub key: CachedDataItemKey, pub value: Option, + pub old_value: Option, } diff --git a/turbopack/crates/turbo-tasks-backend/src/lmdb_backing_storage.rs b/turbopack/crates/turbo-tasks-backend/src/lmdb_backing_storage.rs index 508c1da87e9a7..8f87391839809 100644 --- a/turbopack/crates/turbo-tasks-backend/src/lmdb_backing_storage.rs +++ b/turbopack/crates/turbo-tasks-backend/src/lmdb_backing_storage.rs @@ -201,47 +201,71 @@ impl BackingStorage for LmdbBackingStorage { } for (db, updates) in [(self.meta_db, meta_updates), (self.data_db, data_updates)] { - let mut updated_items: HashMap< + let mut task_updates: HashMap< TaskId, - HashMap, + HashMap< + CachedDataItemKey, + (Option, Option), + >, > = HashMap::new(); { let _span = - tracing::trace_span!("sort and restore task data", updates = updates.len()) - .entered(); - for CachedDataUpdate { task, key, value } in updates.into_iter() { - let data = match updated_items.entry(task) { - Entry::Occupied(entry) => entry.into_mut(), + tracing::trace_span!("organize task data", updates = updates.len()).entered(); + for CachedDataUpdate { + task, + key, + value, + old_value, + } in updates.into_iter() + { + let data = task_updates.entry(task).or_default(); + match data.entry(key) { + Entry::Occupied(mut entry) => { + entry.get_mut().1 = value; + } Entry::Vacant(entry) => { - let mut map = HashMap::new(); - if let Ok(old_data) = tx.get(db, &IntKey::new(*task)) { - let old_data: Vec = match pot::from_slice(old_data) - { - Ok(d) => d, - Err(_) => serde_path_to_error::deserialize( - &mut pot::de::SymbolList::new() - .deserializer_for_slice(old_data)?, - ) - .with_context(|| { - anyhow!( - "Unable to deserialize old value of {task}: \ - {old_data:?}" - ) - })?, - }; - for item in old_data { - let (key, value) = item.into_key_and_value(); - map.insert(key, value); - } - } - entry.insert(map) + entry.insert((old_value, value)); + } + } + } + task_updates.retain(|_, data| { + data.retain(|_, (old_value, value)| *old_value != *value); + !data.is_empty() + }); + } + let mut updated_items: HashMap< + TaskId, + HashMap, + >; + { + let _span = + tracing::trace_span!("restore task data", tasks = task_updates.len()).entered(); + updated_items = HashMap::with_capacity(task_updates.len()); + for (task, updates) in task_updates.into_iter() { + let mut map = HashMap::new(); + if let Ok(old_data) = tx.get(db, &IntKey::new(*task)) { + let old_data: Vec = match pot::from_slice(old_data) { + Ok(d) => d, + Err(_) => serde_path_to_error::deserialize( + &mut pot::de::SymbolList::new().deserializer_for_slice(old_data)?, + ) + .with_context(|| { + anyhow!("Unable to deserialize old value of {task}: {old_data:?}") + })?, + }; + for item in old_data { + let (key, value) = item.into_key_and_value(); + map.insert(key, value); + } + } + for (key, (_, value)) in updates { + if let Some(value) = value { + map.insert(key, value); + } else { + map.remove(&key); } - }; - if let Some(value) = value { - data.insert(key, value); - } else { - data.remove(&key); } + updated_items.insert(task, map); } } { diff --git a/turbopack/crates/turbo-tasks-macros/src/derive/key_value_pair_macro.rs b/turbopack/crates/turbo-tasks-macros/src/derive/key_value_pair_macro.rs index a3a0d192ec9a2..3753a920a79ff 100644 --- a/turbopack/crates/turbo-tasks-macros/src/derive/key_value_pair_macro.rs +++ b/turbopack/crates/turbo-tasks-macros/src/derive/key_value_pair_macro.rs @@ -106,7 +106,7 @@ pub fn derive_key_value_pair(input: TokenStream) -> TokenStream { )* } - #[derive(Debug, Clone, Default)] + #[derive(Debug, Clone, Default, PartialEq, Eq)] #vis enum #value_name { #( #variant_names { From bdd8f8f6f6d25327032fa00bfe1f322aecee4f98 Mon Sep 17 00:00:00 2001 From: Tobias Koppers Date: Tue, 24 Sep 2024 16:25:02 +0200 Subject: [PATCH 30/46] Early stop snapshot interval when stopping Wait 30s for the initial snapshot, 15s interval --- .../turbo-tasks-backend/src/backend/mod.rs | 35 +++++++++++++++---- turbopack/crates/turbo-tasks/src/backend.rs | 2 ++ turbopack/crates/turbo-tasks/src/manager.rs | 1 + 3 files changed, 31 insertions(+), 7 deletions(-) diff --git a/turbopack/crates/turbo-tasks-backend/src/backend/mod.rs b/turbopack/crates/turbo-tasks-backend/src/backend/mod.rs index ee3ba1f11527b..0af99b07ada6f 100644 --- a/turbopack/crates/turbo-tasks-backend/src/backend/mod.rs +++ b/turbopack/crates/turbo-tasks-backend/src/backend/mod.rs @@ -10,7 +10,7 @@ use std::{ mem::take, pin::Pin, sync::{ - atomic::{AtomicU64, AtomicUsize, Ordering}, + atomic::{AtomicBool, AtomicU64, AtomicUsize, Ordering}, Arc, }, time::{Duration, Instant}, @@ -23,12 +23,13 @@ use parking_lot::{Condvar, Mutex}; use rustc_hash::FxHasher; use smallvec::smallvec; pub use storage::TaskDataCategory; +use tokio::time::timeout; use turbo_tasks::{ backend::{ Backend, BackendJobId, CachedTaskType, CellContent, TaskExecutionSpec, TransientTaskRoot, TransientTaskType, TypedCellContent, }, - event::EventListener, + event::{Event, EventListener}, registry, util::IdFactoryWithReuse, CellId, FunctionId, RawVc, ReadConsistency, TaskId, TraitTypeId, TurboTasksBackendApi, @@ -124,6 +125,9 @@ pub struct TurboTasksBackend { /// The timestamp of the last started snapshot. last_snapshot: AtomicU64, + stopping: AtomicBool, + stopping_event: Event, + backing_storage: Arc, } @@ -150,6 +154,8 @@ impl TurboTasksBackend { operations_suspended: Condvar::new(), snapshot_completed: Condvar::new(), last_snapshot: AtomicU64::new(0), + stopping: AtomicBool::new(false), + stopping_event: Event::new(|| "TurboTasksBackend::stopping_event".to_string()), backing_storage, } } @@ -572,6 +578,11 @@ impl Backend for TurboTasksBackend { turbo_tasks.schedule_backend_background_job(BackendJobId::from(1)); } + fn stopping(&self, _turbo_tasks: &dyn TurboTasksBackendApi) { + self.stopping.store(true, Ordering::Release); + self.stopping_event.notify(usize::MAX); + } + fn get_or_create_persistent_task( &self, task_type: CachedTaskType, @@ -1075,15 +1086,25 @@ impl Backend for TurboTasksBackend { turbo_tasks: &'a dyn TurboTasksBackendApi, ) -> Pin + Send + 'a>> { Box::pin(async move { - if *id == 1 { + if *id == 1 || *id == 2 { let last_snapshot = self.last_snapshot.load(Ordering::Relaxed); let mut last_snapshot = self.start_time + Duration::from_millis(last_snapshot); loop { - const SNAPSHOT_INTERVAL: Duration = Duration::from_secs(1); + const FIRST_SNAPSHOT_WAIT: Duration = Duration::from_secs(30); + const SNAPSHOT_INTERVAL: Duration = Duration::from_secs(15); + + let time = if *id == 1 { + FIRST_SNAPSHOT_WAIT + } else { + SNAPSHOT_INTERVAL + }; let elapsed = last_snapshot.elapsed(); - if elapsed < SNAPSHOT_INTERVAL { - tokio::time::sleep(SNAPSHOT_INTERVAL - elapsed).await; + if elapsed < time { + let listener = self.stopping_event.listen(); + if !self.stopping.load(Ordering::Acquire) { + let _ = timeout(time - elapsed, listener).await; + } } if let Some((snapshot_start, new_data)) = self.snapshot() { @@ -1095,7 +1116,7 @@ impl Backend for TurboTasksBackend { self.last_snapshot .store(last_snapshot.as_millis() as u64, Ordering::Relaxed); - turbo_tasks.schedule_backend_background_job(id); + turbo_tasks.schedule_backend_background_job(BackendJobId::from(2)); return; } } diff --git a/turbopack/crates/turbo-tasks/src/backend.rs b/turbopack/crates/turbo-tasks/src/backend.rs index 2ee1f810109a1..a7096f9634474 100644 --- a/turbopack/crates/turbo-tasks/src/backend.rs +++ b/turbopack/crates/turbo-tasks/src/backend.rs @@ -529,6 +529,8 @@ pub trait Backend: Sync + Send { #[allow(unused_variables)] fn stop(&self, turbo_tasks: &dyn TurboTasksBackendApi) {} + #[allow(unused_variables)] + fn stopping(&self, turbo_tasks: &dyn TurboTasksBackendApi) {} #[allow(unused_variables)] fn idle_start(&self, turbo_tasks: &dyn TurboTasksBackendApi) {} diff --git a/turbopack/crates/turbo-tasks/src/manager.rs b/turbopack/crates/turbo-tasks/src/manager.rs index ebb063a91212a..3186a6dc41f31 100644 --- a/turbopack/crates/turbo-tasks/src/manager.rs +++ b/turbopack/crates/turbo-tasks/src/manager.rs @@ -1052,6 +1052,7 @@ impl TurboTasks { } pub async fn stop_and_wait(&self) { + self.backend.stopping(self); self.stopped.store(true, Ordering::Release); { let listener = self.event.listen_with_note(|| "wait for stop".to_string()); From 9ef72f75e10d7992ce05defea3392847a8ad8678 Mon Sep 17 00:00:00 2001 From: Tobias Koppers Date: Tue, 24 Sep 2024 17:25:44 +0200 Subject: [PATCH 31/46] parallel save_snapshot --- Cargo.lock | 13 + .../crates/turbo-tasks-backend/Cargo.toml | 2 + .../src/lmdb_backing_storage.rs | 436 ++++++++++-------- turbopack/crates/turbo-tasks/Cargo.toml | 1 + turbopack/crates/turbo-tasks/src/lib.rs | 7 +- turbopack/crates/turbo-tasks/src/manager.rs | 4 + turbopack/crates/turbo-tasks/src/scope.rs | 50 ++ 7 files changed, 328 insertions(+), 185 deletions(-) create mode 100644 turbopack/crates/turbo-tasks/src/scope.rs diff --git a/Cargo.lock b/Cargo.lock index ef715c704c6e0..0ce961a6814f7 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -8182,6 +8182,16 @@ dependencies = [ "webpki", ] +[[package]] +name = "tokio-scoped" +version = "0.2.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "e4beb8ba13bc53ac53ce1d52b42f02e5d8060f0f42138862869beb769722b256" +dependencies = [ + "tokio", + "tokio-stream", +] + [[package]] name = "tokio-stream" version = "0.1.15" @@ -8584,6 +8594,7 @@ dependencies = [ "once_cell", "parking_lot", "pin-project-lite", + "rayon", "regex", "rustc-hash", "serde", @@ -8618,11 +8629,13 @@ dependencies = [ "parking_lot", "pot", "rand", + "rayon", "rustc-hash", "serde", "serde_path_to_error", "smallvec", "tokio", + "tokio-scoped", "tracing", "turbo-prehash", "turbo-tasks", diff --git a/turbopack/crates/turbo-tasks-backend/Cargo.toml b/turbopack/crates/turbo-tasks-backend/Cargo.toml index e7fd19a9323b3..9d9de44226457 100644 --- a/turbopack/crates/turbo-tasks-backend/Cargo.toml +++ b/turbopack/crates/turbo-tasks-backend/Cargo.toml @@ -30,11 +30,13 @@ once_cell = { workspace = true } parking_lot = { workspace = true } pot = "3.0.0" rand = { workspace = true } +rayon = { workspace = true } rustc-hash = { workspace = true } serde = { workspace = true } serde_path_to_error = { workspace = true } smallvec = { workspace = true } tokio = { workspace = true } +tokio-scoped = "0.2.0" tracing = { workspace = true } turbo-prehash = { workspace = true } turbo-tasks = { workspace = true } diff --git a/turbopack/crates/turbo-tasks-backend/src/lmdb_backing_storage.rs b/turbopack/crates/turbo-tasks-backend/src/lmdb_backing_storage.rs index 8f87391839809..8219ff589d13f 100644 --- a/turbopack/crates/turbo-tasks-backend/src/lmdb_backing_storage.rs +++ b/turbopack/crates/turbo-tasks-backend/src/lmdb_backing_storage.rs @@ -133,201 +133,132 @@ impl BackingStorage for LmdbBackingStorage { let start = Instant::now(); let mut op_count = 0; let mut tx = self.env.begin_rw_txn()?; - let mut next_task_id = - as_u32(tx.get(self.infra_db, &IntKey::new(META_KEY_NEXT_FREE_TASK_ID))).unwrap_or(1); - { - let _span = tracing::trace_span!("update task cache", items = task_cache_updates.len()) - .entered(); - for (task_type, task_id) in task_cache_updates.iter() { - let task_id = **task_id; - let task_type_bytes = pot::to_vec(&**task_type) - .with_context(|| anyhow!("Unable to serialize task cache key {task_type:?}"))?; - #[cfg(feature = "verify_serialization")] - { - let deserialize: Result = serde_path_to_error::deserialize( - &mut pot::de::SymbolList::new().deserializer_for_slice(&task_type_bytes)?, - ); - if let Err(err) = deserialize { - println!( - "Task type would not be deserializable {task_id}: \ - {err:?}\n{task_type:#?}" - ); - panic!("Task type would not be deserializable {task_id}: {err:?}"); - } - } - extended_key::put( - &mut tx, - self.forward_task_cache_db, - &task_type_bytes, - &task_id.to_be_bytes(), - WriteFlags::empty(), - ) - .with_context(|| { - anyhow!("Unable to write task cache {task_type:?} => {task_id}") - })?; - tx.put( - self.reverse_task_cache_db, - &IntKey::new(task_id), - &task_type_bytes, - WriteFlags::empty(), - ) - .with_context(|| { - anyhow!("Unable to write task cache {task_id} => {task_type:?}") - })?; - op_count += 2; - next_task_id = next_task_id.max(task_id + 1); - } - tx.put( - self.infra_db, - &IntKey::new(META_KEY_NEXT_FREE_TASK_ID), - &next_task_id.to_be_bytes(), - WriteFlags::empty(), - ) - .with_context(|| anyhow!("Unable to write next free task id"))?; - } - { - let _span = - tracing::trace_span!("update operations", operations = operations.len()).entered(); - let operations = pot::to_vec(&operations) - .with_context(|| anyhow!("Unable to serialize operations"))?; - tx.put( - self.infra_db, - &IntKey::new(META_KEY_OPERATIONS), - &operations, - WriteFlags::empty(), - ) - .with_context(|| anyhow!("Unable to write operations"))?; - op_count += 2; - } + let mut task_meta_items_result = Ok(Vec::new()); + let mut task_data_items_result = Ok(Vec::new()); - for (db, updates) in [(self.meta_db, meta_updates), (self.data_db, data_updates)] { - let mut task_updates: HashMap< - TaskId, - HashMap< - CachedDataItemKey, - (Option, Option), - >, - > = HashMap::new(); + turbo_tasks::scope(|s| { + // Start organizing the updates in parallel + s.spawn(|_| { + let task_meta_updates = { + let _span = + tracing::trace_span!("organize task meta", updates = meta_updates.len()) + .entered(); + organize_task_data(meta_updates) + }; + let items_result = { + let _span = + tracing::trace_span!("restore task meta", tasks = task_meta_updates.len()) + .entered(); + restore_task_data(self, self.meta_db, task_meta_updates) + }; + task_meta_items_result = items_result.and_then(|items| { + let _span = tracing::trace_span!("serialize task meta").entered(); + serialize_task_data(items) + }); + }); + s.spawn(|_| { + let task_data_updates = { + let _span = + tracing::trace_span!("organize task data", updates = data_updates.len()) + .entered(); + organize_task_data(data_updates) + }; + let items_result = { + let _span = + tracing::trace_span!("restore task data", tasks = task_data_updates.len()) + .entered(); + restore_task_data(self, self.data_db, task_data_updates) + }; + task_data_items_result = items_result.and_then(|items| { + let _span = tracing::trace_span!("serialize task data").entered(); + serialize_task_data(items) + }); + }); + + let mut next_task_id = + as_u32(tx.get(self.infra_db, &IntKey::new(META_KEY_NEXT_FREE_TASK_ID))) + .unwrap_or(1); { let _span = - tracing::trace_span!("organize task data", updates = updates.len()).entered(); - for CachedDataUpdate { - task, - key, - value, - old_value, - } in updates.into_iter() - { - let data = task_updates.entry(task).or_default(); - match data.entry(key) { - Entry::Occupied(mut entry) => { - entry.get_mut().1 = value; - } - Entry::Vacant(entry) => { - entry.insert((old_value, value)); + tracing::trace_span!("update task cache", items = task_cache_updates.len()) + .entered(); + for (task_type, task_id) in task_cache_updates.iter() { + let task_id = **task_id; + let task_type_bytes = pot::to_vec(&**task_type).with_context(|| { + anyhow!("Unable to serialize task cache key {task_type:?}") + })?; + #[cfg(feature = "verify_serialization")] + { + let deserialize: Result = + serde_path_to_error::deserialize( + &mut pot::de::SymbolList::new() + .deserializer_for_slice(&task_type_bytes)?, + ); + if let Err(err) = deserialize { + println!( + "Task type would not be deserializable {task_id}: \ + {err:?}\n{task_type:#?}" + ); + panic!("Task type would not be deserializable {task_id}: {err:?}"); } } + extended_key::put( + &mut tx, + self.forward_task_cache_db, + &task_type_bytes, + &task_id.to_be_bytes(), + WriteFlags::empty(), + ) + .with_context(|| { + anyhow!("Unable to write task cache {task_type:?} => {task_id}") + })?; + tx.put( + self.reverse_task_cache_db, + &IntKey::new(task_id), + &task_type_bytes, + WriteFlags::empty(), + ) + .with_context(|| { + anyhow!("Unable to write task cache {task_id} => {task_type:?}") + })?; + op_count += 2; + next_task_id = next_task_id.max(task_id + 1); } - task_updates.retain(|_, data| { - data.retain(|_, (old_value, value)| *old_value != *value); - !data.is_empty() - }); + tx.put( + self.infra_db, + &IntKey::new(META_KEY_NEXT_FREE_TASK_ID), + &next_task_id.to_be_bytes(), + WriteFlags::empty(), + ) + .with_context(|| anyhow!("Unable to write next free task id"))?; } - let mut updated_items: HashMap< - TaskId, - HashMap, - >; { let _span = - tracing::trace_span!("restore task data", tasks = task_updates.len()).entered(); - updated_items = HashMap::with_capacity(task_updates.len()); - for (task, updates) in task_updates.into_iter() { - let mut map = HashMap::new(); - if let Ok(old_data) = tx.get(db, &IntKey::new(*task)) { - let old_data: Vec = match pot::from_slice(old_data) { - Ok(d) => d, - Err(_) => serde_path_to_error::deserialize( - &mut pot::de::SymbolList::new().deserializer_for_slice(old_data)?, - ) - .with_context(|| { - anyhow!("Unable to deserialize old value of {task}: {old_data:?}") - })?, - }; - for item in old_data { - let (key, value) = item.into_key_and_value(); - map.insert(key, value); - } - } - for (key, (_, value)) in updates { - if let Some(value) = value { - map.insert(key, value); - } else { - map.remove(&key); - } - } - updated_items.insert(task, map); - } + tracing::trace_span!("update operations", operations = operations.len()) + .entered(); + let operations = pot::to_vec(&operations) + .with_context(|| anyhow!("Unable to serialize operations"))?; + tx.put( + self.infra_db, + &IntKey::new(META_KEY_OPERATIONS), + &operations, + WriteFlags::empty(), + ) + .with_context(|| anyhow!("Unable to write operations"))?; + op_count += 2; } + + anyhow::Ok(()) + })?; + + for (db, task_items) in [ + (self.meta_db, task_meta_items_result?), + (self.data_db, task_data_items_result?), + ] { { let _span = - tracing::trace_span!("update task data", tasks = updated_items.len()).entered(); - for (task_id, data) in updated_items { - let mut vec: Vec = data - .into_iter() - .map(|(key, value)| CachedDataItem::from_key_and_value(key, value)) - .collect(); - let value = match pot::to_vec(&vec) { - #[cfg(not(feature = "verify_serialization"))] - Ok(value) => value, - _ => { - let mut error = Ok(()); - vec.retain(|item| { - let mut buf = Vec::::new(); - let mut symbol_map = pot::ser::SymbolMap::new(); - let mut serializer = symbol_map.serializer_for(&mut buf).unwrap(); - if let Err(err) = - serde_path_to_error::serialize(item, &mut serializer) - { - if item.is_optional() { - println!( - "Skipping non-serializable optional item: {item:?}" - ); - } else { - error = Err(err).context({ - anyhow!( - "Unable to serialize data item for {task_id}: \ - {item:#?}" - ) - }); - } - false - } else { - #[cfg(feature = "verify_serialization")] - { - let deserialize: Result = - serde_path_to_error::deserialize( - &mut pot::de::SymbolList::new() - .deserializer_for_slice(&buf) - .unwrap(), - ); - if let Err(err) = deserialize { - println!( - "Data item would not be deserializable {task_id}: \ - {err:?}\n{item:#?}" - ); - return false; - } - } - true - } - }); - error?; - - pot::to_vec(&vec).with_context(|| { - anyhow!("Unable to serialize data items for {task_id}: {vec:#?}") - })? - } - }; + tracing::trace_span!("update task data", tasks = task_items.len()).entered(); + for (task_id, value) in task_items { tx.put(db, &IntKey::new(*task_id), &value, WriteFlags::empty()) .with_context(|| anyhow!("Unable to write data items for {task_id}"))?; op_count += 1; @@ -436,3 +367,142 @@ impl BackingStorage for LmdbBackingStorage { .unwrap_or_default() } } + +fn organize_task_data( + updates: ChunkedVec, +) -> HashMap< + TaskId, + HashMap, Option)>, +> { + let mut task_updates: HashMap< + TaskId, + HashMap, Option)>, + > = HashMap::new(); + for CachedDataUpdate { + task, + key, + value, + old_value, + } in updates.into_iter() + { + let data = task_updates.entry(task).or_default(); + match data.entry(key) { + Entry::Occupied(mut entry) => { + entry.get_mut().1 = value; + } + Entry::Vacant(entry) => { + entry.insert((old_value, value)); + } + } + } + task_updates.retain(|_, data| { + data.retain(|_, (old_value, value)| *old_value != *value); + !data.is_empty() + }); + task_updates +} + +fn restore_task_data( + this: &LmdbBackingStorage, + db: Database, + task_updates: HashMap< + TaskId, + HashMap, Option)>, + >, +) -> Result)>> { + let mut result = Vec::with_capacity(task_updates.len()); + + let tx = this.env.begin_ro_txn()?; + for (task, updates) in task_updates.into_iter() { + let mut map; + if let Ok(old_data) = tx.get(db, &IntKey::new(*task)) { + let old_data: Vec = match pot::from_slice(old_data) { + Ok(d) => d, + Err(_) => serde_path_to_error::deserialize( + &mut pot::de::SymbolList::new().deserializer_for_slice(old_data)?, + ) + .with_context(|| { + anyhow!("Unable to deserialize old value of {task}: {old_data:?}") + })?, + }; + map = old_data + .into_iter() + .map(|item| item.into_key_and_value()) + .collect(); + } else { + map = HashMap::new(); + } + for (key, (_, value)) in updates { + if let Some(value) = value { + map.insert(key, value); + } else { + map.remove(&key); + } + } + let vec = map + .into_iter() + .map(|(key, value)| CachedDataItem::from_key_and_value(key, value)) + .collect(); + result.push((task, vec)); + } + + Ok(result) +} + +fn serialize_task_data( + tasks: Vec<(TaskId, Vec)>, +) -> Result)>> { + tasks + .into_iter() + .map(|(task_id, mut data)| { + let value = match pot::to_vec(&data) { + #[cfg(not(feature = "verify_serialization"))] + Ok(value) => value, + _ => { + let mut error = Ok(()); + data.retain(|item| { + let mut buf = Vec::::new(); + let mut symbol_map = pot::ser::SymbolMap::new(); + let mut serializer = symbol_map.serializer_for(&mut buf).unwrap(); + if let Err(err) = serde_path_to_error::serialize(item, &mut serializer) { + if item.is_optional() { + println!("Skipping non-serializable optional item: {item:?}"); + } else { + error = Err(err).context({ + anyhow!( + "Unable to serialize data item for {task_id}: {item:#?}" + ) + }); + } + false + } else { + #[cfg(feature = "verify_serialization")] + { + let deserialize: Result = + serde_path_to_error::deserialize( + &mut pot::de::SymbolList::new() + .deserializer_for_slice(&buf) + .unwrap(), + ); + if let Err(err) = deserialize { + println!( + "Data item would not be deserializable {task_id}: \ + {err:?}\n{item:#?}" + ); + return false; + } + } + true + } + }); + error?; + + pot::to_vec(&data).with_context(|| { + anyhow!("Unable to serialize data items for {task_id}: {data:#?}") + })? + } + }; + Ok((task_id, value)) + }) + .collect() +} diff --git a/turbopack/crates/turbo-tasks/Cargo.toml b/turbopack/crates/turbo-tasks/Cargo.toml index 8bf86edb4d0db..63ba0dedb9023 100644 --- a/turbopack/crates/turbo-tasks/Cargo.toml +++ b/turbopack/crates/turbo-tasks/Cargo.toml @@ -30,6 +30,7 @@ mopa = "0.2.0" once_cell = { workspace = true } parking_lot = { workspace = true, features = ["serde"]} pin-project-lite = { workspace = true } +rayon = { workspace = true } regex = { workspace = true } rustc-hash = { workspace = true } serde = { workspace = true, features = ["rc", "derive"] } diff --git a/turbopack/crates/turbo-tasks/src/lib.rs b/turbopack/crates/turbo-tasks/src/lib.rs index f8163e7ad5c24..507af11ee1b3e 100644 --- a/turbopack/crates/turbo-tasks/src/lib.rs +++ b/turbopack/crates/turbo-tasks/src/lib.rs @@ -65,6 +65,7 @@ mod raw_vc; mod rcstr; mod read_ref; pub mod registry; +mod scope; mod serialization_invalidation; pub mod small_duration; mod state; @@ -98,14 +99,16 @@ pub use magic_any::MagicAny; pub use manager::{ dynamic_call, dynamic_this_call, emit, mark_dirty_when_persisted, mark_finished, mark_stateful, prevent_gc, run_once, run_once_with_reason, spawn_blocking, spawn_thread, trait_call, - turbo_tasks, CurrentCellRef, ReadConsistency, TaskPersistence, TurboTasks, TurboTasksApi, - TurboTasksBackendApi, TurboTasksBackendApiExt, TurboTasksCallApi, Unused, UpdateInfo, + turbo_tasks, turbo_tasks_scope, CurrentCellRef, ReadConsistency, TaskPersistence, TurboTasks, + TurboTasksApi, TurboTasksBackendApi, TurboTasksBackendApiExt, TurboTasksCallApi, Unused, + UpdateInfo, }; pub use native_function::{FunctionMeta, NativeFunction}; pub use output::OutputContent; pub use raw_vc::{CellId, RawVc, ReadRawVcFuture, ResolveTypeError}; pub use read_ref::ReadRef; use rustc_hash::FxHasher; +pub use scope::scope; pub use serialization_invalidation::SerializationInvalidator; pub use state::{State, TransientState}; pub use task::{task_input::TaskInput, SharedReference, TypedSharedReference}; diff --git a/turbopack/crates/turbo-tasks/src/manager.rs b/turbopack/crates/turbo-tasks/src/manager.rs index 3186a6dc41f31..eff6f2f2fd126 100644 --- a/turbopack/crates/turbo-tasks/src/manager.rs +++ b/turbopack/crates/turbo-tasks/src/manager.rs @@ -1661,6 +1661,10 @@ pub fn with_turbo_tasks(func: impl FnOnce(&Arc) -> T) -> T TURBO_TASKS.with(|arc| func(arc)) } +pub fn turbo_tasks_scope(tt: Arc, f: impl FnOnce() -> T) -> T { + TURBO_TASKS.sync_scope(tt, f) +} + pub fn with_turbo_tasks_for_testing( tt: Arc, current_task: TaskId, diff --git a/turbopack/crates/turbo-tasks/src/scope.rs b/turbopack/crates/turbo-tasks/src/scope.rs new file mode 100644 index 0000000000000..e7bb8713bd4cb --- /dev/null +++ b/turbopack/crates/turbo-tasks/src/scope.rs @@ -0,0 +1,50 @@ +use std::sync::Arc; + +use crate::{turbo_tasks, turbo_tasks_scope, TurboTasksApi}; + +pub struct Scope<'scope, 'a> { + scope: &'a rayon::Scope<'scope>, + handle: tokio::runtime::Handle, + turbo_tasks: Arc, + span: tracing::Span, +} + +impl<'scope, 'a> Scope<'scope, 'a> { + pub fn spawn(&self, body: BODY) + where + BODY: FnOnce(&Scope<'scope, '_>) + Send + 'scope, + { + let span = self.span.clone(); + let handle = self.handle.clone(); + let turbo_tasks = self.turbo_tasks.clone(); + self.scope.spawn(|scope| { + let _span = span.clone().entered(); + let _guard = handle.enter(); + turbo_tasks_scope(turbo_tasks.clone(), || { + body(&Scope { + scope, + span, + handle, + turbo_tasks, + }) + }) + }); + } +} + +pub fn scope<'scope, OP, R>(op: OP) -> R +where + OP: FnOnce(&Scope<'scope, '_>) -> R, +{ + let span = tracing::Span::current(); + let handle = tokio::runtime::Handle::current(); + let turbo_tasks = turbo_tasks(); + rayon::in_place_scope(|scope| { + op(&Scope { + scope, + span, + handle, + turbo_tasks, + }) + }) +} From 9ec165c27f7be6ef06d0c0cc8e42cc4120274560 Mon Sep 17 00:00:00 2001 From: Tobias Koppers Date: Wed, 25 Sep 2024 09:02:06 +0200 Subject: [PATCH 32/46] run snapshot in spawn_blocking --- .../turbo-tasks-backend/src/backend/mod.rs | 308 +++++++++++++----- .../src/backend/operation/mod.rs | 10 +- .../crates/turbo-tasks/src/capture_future.rs | 4 +- turbopack/crates/turbo-tasks/src/manager.rs | 3 +- 4 files changed, 242 insertions(+), 83 deletions(-) diff --git a/turbopack/crates/turbo-tasks-backend/src/backend/mod.rs b/turbopack/crates/turbo-tasks-backend/src/backend/mod.rs index 0af99b07ada6f..bdd9a65f1ed77 100644 --- a/turbopack/crates/turbo-tasks-backend/src/backend/mod.rs +++ b/turbopack/crates/turbo-tasks-backend/src/backend/mod.rs @@ -93,7 +93,9 @@ pub enum TransientTask { Once(TransientTaskOnce), } -pub struct TurboTasksBackend { +pub struct TurboTasksBackend(Arc); + +struct TurboTasksBackendInner { start_time: Instant, persisted_task_id_factory: IdFactoryWithReuse, @@ -132,6 +134,12 @@ pub struct TurboTasksBackend { } impl TurboTasksBackend { + pub fn new(backing_storage: Arc) -> Self { + Self(Arc::new(TurboTasksBackendInner::new(backing_storage))) + } +} + +impl TurboTasksBackendInner { pub fn new(backing_storage: Arc) -> Self { Self { start_time: Instant::now(), @@ -162,7 +170,7 @@ impl TurboTasksBackend { fn execute_context<'a>( &'a self, - turbo_tasks: &'a dyn TurboTasksBackendApi, + turbo_tasks: &'a dyn TurboTasksBackendApi, ) -> ExecuteContext<'a> { ExecuteContext::new(self, turbo_tasks) } @@ -228,7 +236,7 @@ impl TurboTasksBackend { } pub(crate) struct OperationGuard<'a> { - backend: &'a TurboTasksBackend, + backend: &'a TurboTasksBackendInner, } impl Drop for OperationGuard<'_> { @@ -244,12 +252,12 @@ impl Drop for OperationGuard<'_> { } // Operations -impl TurboTasksBackend { +impl TurboTasksBackendInner { fn connect_child( &self, parent_task: TaskId, child_task: TaskId, - turbo_tasks: &dyn TurboTasksBackendApi, + turbo_tasks: &dyn TurboTasksBackendApi, ) { operation::ConnectChildOperation::run( parent_task, @@ -263,7 +271,7 @@ impl TurboTasksBackend { task_id: TaskId, reader: Option, consistency: ReadConsistency, - turbo_tasks: &dyn TurboTasksBackendApi, + turbo_tasks: &dyn TurboTasksBackendApi, ) -> Result> { let ctx = self.execute_context(turbo_tasks); let mut task = ctx.task(task_id, TaskDataCategory::All); @@ -387,7 +395,7 @@ impl TurboTasksBackend { task_id: TaskId, reader: Option, cell: CellId, - turbo_tasks: &dyn TurboTasksBackendApi, + turbo_tasks: &dyn TurboTasksBackendApi, ) -> Result> { let ctx = self.execute_context(turbo_tasks); let mut task = ctx.task(task_id, TaskDataCategory::Data); @@ -561,10 +569,8 @@ impl TurboTasksBackend { Some((snapshot_time, new_items)) } -} -impl Backend for TurboTasksBackend { - fn startup(&self, turbo_tasks: &dyn TurboTasksBackendApi) { + fn startup(&self, turbo_tasks: &dyn TurboTasksBackendApi) { // Continue all uncompleted operations // They can't be interrupted by a snapshot since the snapshotting job has not been scheduled // yet. @@ -578,7 +584,7 @@ impl Backend for TurboTasksBackend { turbo_tasks.schedule_backend_background_job(BackendJobId::from(1)); } - fn stopping(&self, _turbo_tasks: &dyn TurboTasksBackendApi) { + fn stopping(&self) { self.stopping.store(true, Ordering::Release); self.stopping_event.notify(usize::MAX); } @@ -587,7 +593,7 @@ impl Backend for TurboTasksBackend { &self, task_type: CachedTaskType, parent_task: TaskId, - turbo_tasks: &dyn TurboTasksBackendApi, + turbo_tasks: &dyn TurboTasksBackendApi, ) -> TaskId { if let Some(task_id) = self.task_cache.lookup_forward(&task_type) { self.connect_child(parent_task, task_id, turbo_tasks); @@ -626,7 +632,7 @@ impl Backend for TurboTasksBackend { &self, task_type: CachedTaskType, parent_task: TaskId, - turbo_tasks: &dyn TurboTasksBackendApi, + turbo_tasks: &dyn TurboTasksBackendApi, ) -> TaskId { if !parent_task.is_transient() { let parent_task_type = self.lookup_task_type(parent_task); @@ -657,11 +663,19 @@ impl Backend for TurboTasksBackend { task_id } - fn invalidate_task(&self, task_id: TaskId, turbo_tasks: &dyn TurboTasksBackendApi) { + fn invalidate_task( + &self, + task_id: TaskId, + turbo_tasks: &dyn TurboTasksBackendApi, + ) { operation::InvalidateOperation::run(smallvec![task_id], self.execute_context(turbo_tasks)); } - fn invalidate_tasks(&self, tasks: &[TaskId], turbo_tasks: &dyn TurboTasksBackendApi) { + fn invalidate_tasks( + &self, + tasks: &[TaskId], + turbo_tasks: &dyn TurboTasksBackendApi, + ) { operation::InvalidateOperation::run( tasks.iter().copied().collect(), self.execute_context(turbo_tasks), @@ -671,7 +685,7 @@ impl Backend for TurboTasksBackend { fn invalidate_tasks_set( &self, tasks: &AutoSet, 2>, - turbo_tasks: &dyn TurboTasksBackendApi, + turbo_tasks: &dyn TurboTasksBackendApi, ) { operation::InvalidateOperation::run( tasks.iter().copied().collect(), @@ -682,7 +696,7 @@ impl Backend for TurboTasksBackend { fn invalidate_serialization( &self, task_id: TaskId, - turbo_tasks: &dyn TurboTasksBackendApi, + turbo_tasks: &dyn TurboTasksBackendApi, ) { if task_id.is_transient() { return; @@ -705,13 +719,10 @@ impl Backend for TurboTasksBackend { }) } - type TaskState = (); - fn new_task_state(&self, _task: TaskId) -> Self::TaskState {} - fn try_start_task_execution( &self, task_id: TaskId, - turbo_tasks: &dyn TurboTasksBackendApi, + turbo_tasks: &dyn TurboTasksBackendApi, ) -> Option> { enum TaskType { Cached(Arc), @@ -905,7 +916,7 @@ impl Backend for TurboTasksBackend { &self, task_id: TaskId, result: Result, Option>>, - turbo_tasks: &dyn TurboTasksBackendApi, + turbo_tasks: &dyn TurboTasksBackendApi, ) { operation::UpdateOutputOperation::run(task_id, result, self.execute_context(turbo_tasks)); } @@ -917,7 +928,7 @@ impl Backend for TurboTasksBackend { _memory_usage: usize, cell_counters: &AutoMap, 8>, stateful: bool, - turbo_tasks: &dyn TurboTasksBackendApi, + turbo_tasks: &dyn TurboTasksBackendApi, ) -> bool { let ctx = self.execute_context(turbo_tasks); let mut task = ctx.task(task_id, TaskDataCategory::All); @@ -1081,9 +1092,9 @@ impl Backend for TurboTasksBackend { } fn run_backend_job<'a>( - &'a self, + self: &'a Arc, id: BackendJobId, - turbo_tasks: &'a dyn TurboTasksBackendApi, + turbo_tasks: &'a dyn TurboTasksBackendApi, ) -> Pin + Send + 'a>> { Box::pin(async move { if *id == 1 || *id == 2 { @@ -1107,7 +1118,9 @@ impl Backend for TurboTasksBackend { } } - if let Some((snapshot_start, new_data)) = self.snapshot() { + let this = self.clone(); + let snapshot = turbo_tasks::spawn_blocking(move || this.snapshot()).await; + if let Some((snapshot_start, new_data)) = snapshot { last_snapshot = snapshot_start; if new_data { continue; @@ -1124,6 +1137,188 @@ impl Backend for TurboTasksBackend { }) } + fn try_read_own_task_cell_untracked( + &self, + task_id: TaskId, + cell: CellId, + turbo_tasks: &dyn TurboTasksBackendApi, + ) -> Result { + let ctx = self.execute_context(turbo_tasks); + let task = ctx.task(task_id, TaskDataCategory::Data); + if let Some(content) = get!(task, CellData { cell }) { + Ok(CellContent(Some(content.1.clone())).into_typed(cell.type_id)) + } else { + Ok(CellContent(None).into_typed(cell.type_id)) + } + } + + fn update_task_cell( + &self, + task_id: TaskId, + cell: CellId, + content: CellContent, + turbo_tasks: &dyn TurboTasksBackendApi, + ) { + operation::UpdateCellOperation::run( + task_id, + cell, + content, + self.execute_context(turbo_tasks), + ); + } + + fn connect_task( + &self, + task: TaskId, + parent_task: TaskId, + turbo_tasks: &dyn TurboTasksBackendApi, + ) { + ConnectChildOperation::run(parent_task, task, self.execute_context(turbo_tasks)); + } + + fn create_transient_task(&self, task_type: TransientTaskType) -> TaskId { + let task_id = self.transient_task_id_factory.get(); + let root_type = match task_type { + TransientTaskType::Root(_) => ActiveType::RootTask, + TransientTaskType::Once(_) => ActiveType::OnceTask, + }; + self.transient_tasks.insert( + task_id, + Arc::new(match task_type { + TransientTaskType::Root(f) => TransientTask::Root(f), + TransientTaskType::Once(f) => TransientTask::Once(Mutex::new(Some(f))), + }), + ); + { + let mut task = self.storage.access_mut(task_id); + task.add(CachedDataItem::AggregationNumber { + value: AggregationNumber { + base: u32::MAX, + distance: 0, + effective: u32::MAX, + }, + }); + task.add(CachedDataItem::AggregateRoot { + value: RootState::new(root_type), + }); + task.add(CachedDataItem::new_scheduled(move || match root_type { + ActiveType::RootTask => "Root Task".to_string(), + ActiveType::OnceTask => "Once Task".to_string(), + _ => unreachable!(), + })); + } + task_id + } +} + +impl Backend for TurboTasksBackend { + fn startup(&self, turbo_tasks: &dyn TurboTasksBackendApi) { + self.0.startup(turbo_tasks); + } + + fn stopping(&self, _turbo_tasks: &dyn TurboTasksBackendApi) { + self.0.stopping(); + } + + fn get_or_create_persistent_task( + &self, + task_type: CachedTaskType, + parent_task: TaskId, + turbo_tasks: &dyn TurboTasksBackendApi, + ) -> TaskId { + self.0 + .get_or_create_persistent_task(task_type, parent_task, turbo_tasks) + } + + fn get_or_create_transient_task( + &self, + task_type: CachedTaskType, + parent_task: TaskId, + turbo_tasks: &dyn TurboTasksBackendApi, + ) -> TaskId { + self.0 + .get_or_create_transient_task(task_type, parent_task, turbo_tasks) + } + + fn invalidate_task(&self, task_id: TaskId, turbo_tasks: &dyn TurboTasksBackendApi) { + self.0.invalidate_task(task_id, turbo_tasks); + } + + fn invalidate_tasks(&self, tasks: &[TaskId], turbo_tasks: &dyn TurboTasksBackendApi) { + self.0.invalidate_tasks(tasks, turbo_tasks); + } + + fn invalidate_tasks_set( + &self, + tasks: &AutoSet, 2>, + turbo_tasks: &dyn TurboTasksBackendApi, + ) { + self.0.invalidate_tasks_set(tasks, turbo_tasks); + } + + fn invalidate_serialization( + &self, + task_id: TaskId, + turbo_tasks: &dyn TurboTasksBackendApi, + ) { + self.0.invalidate_serialization(task_id, turbo_tasks); + } + + fn get_task_description(&self, task: TaskId) -> std::string::String { + self.0.get_task_description(task) + } + + fn try_get_function_id(&self, task_id: TaskId) -> Option { + self.0.try_get_function_id(task_id) + } + + type TaskState = (); + fn new_task_state(&self, _task: TaskId) -> Self::TaskState {} + + fn try_start_task_execution( + &self, + task_id: TaskId, + turbo_tasks: &dyn TurboTasksBackendApi, + ) -> Option> { + self.0.try_start_task_execution(task_id, turbo_tasks) + } + + fn task_execution_result( + &self, + task_id: TaskId, + result: Result, Option>>, + turbo_tasks: &dyn TurboTasksBackendApi, + ) { + self.0.task_execution_result(task_id, result, turbo_tasks); + } + + fn task_execution_completed( + &self, + task_id: TaskId, + _duration: Duration, + _memory_usage: usize, + cell_counters: &AutoMap, 8>, + stateful: bool, + turbo_tasks: &dyn TurboTasksBackendApi, + ) -> bool { + self.0.task_execution_completed( + task_id, + _duration, + _memory_usage, + cell_counters, + stateful, + turbo_tasks, + ) + } + + fn run_backend_job<'a>( + &'a self, + id: BackendJobId, + turbo_tasks: &'a dyn TurboTasksBackendApi, + ) -> Pin + Send + 'a>> { + self.0.run_backend_job(id, turbo_tasks) + } + fn try_read_task_output( &self, task_id: TaskId, @@ -1131,7 +1326,8 @@ impl Backend for TurboTasksBackend { consistency: ReadConsistency, turbo_tasks: &dyn TurboTasksBackendApi, ) -> Result> { - self.try_read_task_output(task_id, Some(reader), consistency, turbo_tasks) + self.0 + .try_read_task_output(task_id, Some(reader), consistency, turbo_tasks) } fn try_read_task_output_untracked( @@ -1140,7 +1336,8 @@ impl Backend for TurboTasksBackend { consistency: ReadConsistency, turbo_tasks: &dyn TurboTasksBackendApi, ) -> Result> { - self.try_read_task_output(task_id, None, consistency, turbo_tasks) + self.0 + .try_read_task_output(task_id, None, consistency, turbo_tasks) } fn try_read_task_cell( @@ -1150,7 +1347,8 @@ impl Backend for TurboTasksBackend { reader: TaskId, turbo_tasks: &dyn TurboTasksBackendApi, ) -> Result> { - self.try_read_task_cell(task_id, Some(reader), cell, turbo_tasks) + self.0 + .try_read_task_cell(task_id, Some(reader), cell, turbo_tasks) } fn try_read_task_cell_untracked( @@ -1159,7 +1357,7 @@ impl Backend for TurboTasksBackend { cell: CellId, turbo_tasks: &dyn TurboTasksBackendApi, ) -> Result> { - self.try_read_task_cell(task_id, None, cell, turbo_tasks) + self.0.try_read_task_cell(task_id, None, cell, turbo_tasks) } fn try_read_own_task_cell_untracked( @@ -1168,13 +1366,8 @@ impl Backend for TurboTasksBackend { cell: CellId, turbo_tasks: &dyn TurboTasksBackendApi, ) -> Result { - let ctx = self.execute_context(turbo_tasks); - let task = ctx.task(task_id, TaskDataCategory::Data); - if let Some(content) = get!(task, CellData { cell }) { - Ok(CellContent(Some(content.1.clone())).into_typed(cell.type_id)) - } else { - Ok(CellContent(None).into_typed(cell.type_id)) - } + self.0 + .try_read_own_task_cell_untracked(task_id, cell, turbo_tasks) } fn read_task_collectibles( @@ -1215,12 +1408,7 @@ impl Backend for TurboTasksBackend { content: CellContent, turbo_tasks: &dyn TurboTasksBackendApi, ) { - operation::UpdateCellOperation::run( - task_id, - cell, - content, - self.execute_context(turbo_tasks), - ); + self.0.update_task_cell(task_id, cell, content, turbo_tasks); } fn connect_task( @@ -1229,7 +1417,7 @@ impl Backend for TurboTasksBackend { parent_task: TaskId, turbo_tasks: &dyn TurboTasksBackendApi, ) { - ConnectChildOperation::run(parent_task, task, self.execute_context(turbo_tasks)); + self.0.connect_task(task, parent_task, turbo_tasks); } fn create_transient_task( @@ -1237,37 +1425,7 @@ impl Backend for TurboTasksBackend { task_type: TransientTaskType, _turbo_tasks: &dyn TurboTasksBackendApi, ) -> TaskId { - let task_id = self.transient_task_id_factory.get(); - let root_type = match task_type { - TransientTaskType::Root(_) => ActiveType::RootTask, - TransientTaskType::Once(_) => ActiveType::OnceTask, - }; - self.transient_tasks.insert( - task_id, - Arc::new(match task_type { - TransientTaskType::Root(f) => TransientTask::Root(f), - TransientTaskType::Once(f) => TransientTask::Once(Mutex::new(Some(f))), - }), - ); - { - let mut task = self.storage.access_mut(task_id); - task.add(CachedDataItem::AggregationNumber { - value: AggregationNumber { - base: u32::MAX, - distance: 0, - effective: u32::MAX, - }, - }); - task.add(CachedDataItem::AggregateRoot { - value: RootState::new(root_type), - }); - task.add(CachedDataItem::new_scheduled(move || match root_type { - ActiveType::RootTask => "Root Task".to_string(), - ActiveType::OnceTask => "Once Task".to_string(), - _ => unreachable!(), - })); - } - task_id + self.0.create_transient_task(task_type) } fn dispose_root_task(&self, _: TaskId, _: &dyn TurboTasksBackendApi) { todo!() diff --git a/turbopack/crates/turbo-tasks-backend/src/backend/operation/mod.rs b/turbopack/crates/turbo-tasks-backend/src/backend/operation/mod.rs index da7832d0b3b0c..ca80da978a134 100644 --- a/turbopack/crates/turbo-tasks-backend/src/backend/operation/mod.rs +++ b/turbopack/crates/turbo-tasks-backend/src/backend/operation/mod.rs @@ -16,7 +16,7 @@ use turbo_tasks::{KeyValuePair, TaskId, TurboTasksBackendApi}; use crate::{ backend::{ storage::StorageWriteGuard, OperationGuard, TaskDataCategory, TransientTask, - TurboTasksBackend, + TurboTasksBackend, TurboTasksBackendInner, }, data::{ CachedDataItem, CachedDataItemIndex, CachedDataItemKey, CachedDataItemValue, @@ -35,15 +35,15 @@ pub trait Operation: } pub struct ExecuteContext<'a> { - backend: &'a TurboTasksBackend, + backend: &'a TurboTasksBackendInner, turbo_tasks: &'a dyn TurboTasksBackendApi, _operation_guard: Option>, parent: Option<(&'a AnyOperation, &'a ExecuteContext<'a>)>, } impl<'a> ExecuteContext<'a> { - pub fn new( - backend: &'a TurboTasksBackend, + pub(super) fn new( + backend: &'a TurboTasksBackendInner, turbo_tasks: &'a dyn TurboTasksBackendApi, ) -> Self { Self { @@ -191,7 +191,7 @@ impl<'a> ExecuteContext<'a> { pub struct TaskGuard<'a> { task_id: TaskId, task: StorageWriteGuard<'a, TaskId, CachedDataItem>, - backend: &'a TurboTasksBackend, + backend: &'a TurboTasksBackendInner, } impl Debug for TaskGuard<'_> { diff --git a/turbopack/crates/turbo-tasks/src/capture_future.rs b/turbopack/crates/turbo-tasks/src/capture_future.rs index 8a7b5d621b2e4..1478e00af78d3 100644 --- a/turbopack/crates/turbo-tasks/src/capture_future.rs +++ b/turbopack/crates/turbo-tasks/src/capture_future.rs @@ -39,11 +39,11 @@ impl> CaptureFuture { } pub fn add_duration(duration: Duration) { - EXTRA.with(|cell| cell.lock().unwrap().0 += duration); + let _ = EXTRA.try_with(|cell| cell.lock().unwrap().0 += duration); } pub fn add_allocation_info(alloc_info: AllocationInfo) { - EXTRA.with(|cell| { + let _ = EXTRA.try_with(|cell| { let mut guard = cell.lock().unwrap(); guard.1 += alloc_info.allocations; guard.2 += alloc_info.deallocations; diff --git a/turbopack/crates/turbo-tasks/src/manager.rs b/turbopack/crates/turbo-tasks/src/manager.rs index eff6f2f2fd126..e6f81c3ae43c5 100644 --- a/turbopack/crates/turbo-tasks/src/manager.rs +++ b/turbopack/crates/turbo-tasks/src/manager.rs @@ -1741,12 +1741,13 @@ pub fn emit(collectible: Vc) { } pub async fn spawn_blocking(func: impl FnOnce() -> T + Send + 'static) -> T { + let turbo_tasks = turbo_tasks(); let span = trace_span!("blocking operation").or_current(); let (result, duration, alloc_info) = tokio::task::spawn_blocking(|| { let _guard = span.entered(); let start = Instant::now(); let start_allocations = TurboMalloc::allocation_counters(); - let r = func(); + let r = turbo_tasks_scope(turbo_tasks, func); (r, start.elapsed(), start_allocations.until_now()) }) .await From 4a9e70c14527342fc8082cdbdfaa80cf23b8db68 Mon Sep 17 00:00:00 2001 From: Tobias Koppers Date: Wed, 25 Sep 2024 16:56:31 +0200 Subject: [PATCH 33/46] save snapshot when starting idle --- .../turbo-tasks-backend/src/backend/mod.rs | 18 ++++++++++++++++-- 1 file changed, 16 insertions(+), 2 deletions(-) diff --git a/turbopack/crates/turbo-tasks-backend/src/backend/mod.rs b/turbopack/crates/turbo-tasks-backend/src/backend/mod.rs index bdd9a65f1ed77..680ce5ff499af 100644 --- a/turbopack/crates/turbo-tasks-backend/src/backend/mod.rs +++ b/turbopack/crates/turbo-tasks-backend/src/backend/mod.rs @@ -23,7 +23,6 @@ use parking_lot::{Condvar, Mutex}; use rustc_hash::FxHasher; use smallvec::smallvec; pub use storage::TaskDataCategory; -use tokio::time::timeout; use turbo_tasks::{ backend::{ Backend, BackendJobId, CachedTaskType, CellContent, TaskExecutionSpec, TransientTaskRoot, @@ -129,6 +128,7 @@ struct TurboTasksBackendInner { stopping: AtomicBool, stopping_event: Event, + idle_event: Event, backing_storage: Arc, } @@ -164,6 +164,7 @@ impl TurboTasksBackendInner { last_snapshot: AtomicU64::new(0), stopping: AtomicBool::new(false), stopping_event: Event::new(|| "TurboTasksBackend::stopping_event".to_string()), + idle_event: Event::new(|| "TurboTasksBackend::idle_event".to_string()), backing_storage, } } @@ -589,6 +590,10 @@ impl TurboTasksBackendInner { self.stopping_event.notify(usize::MAX); } + fn idle_start(&self) { + self.idle_event.notify(usize::MAX); + } + fn get_or_create_persistent_task( &self, task_type: CachedTaskType, @@ -1113,8 +1118,13 @@ impl TurboTasksBackendInner { let elapsed = last_snapshot.elapsed(); if elapsed < time { let listener = self.stopping_event.listen(); + let listener2 = self.idle_event.listen(); if !self.stopping.load(Ordering::Acquire) { - let _ = timeout(time - elapsed, listener).await; + tokio::select! { + _ = listener => {}, + _ = listener2 => {}, + _ = tokio::time::sleep(time - elapsed) => {}, + } } } @@ -1220,6 +1230,10 @@ impl Backend for TurboTasksBackend { self.0.stopping(); } + fn idle_start(&self, _turbo_tasks: &dyn TurboTasksBackendApi) { + self.0.idle_start(); + } + fn get_or_create_persistent_task( &self, task_type: CachedTaskType, From 1624294d913d70b47454399d1c73f533af69122c Mon Sep 17 00:00:00 2001 From: Tobias Koppers Date: Wed, 25 Sep 2024 17:35:19 +0200 Subject: [PATCH 34/46] wait until idle for a second before persisting --- .../turbo-tasks-backend/src/backend/mod.rs | 32 ++++++++++++++----- turbopack/crates/turbo-tasks/src/manager.rs | 7 ++++ 2 files changed, 31 insertions(+), 8 deletions(-) diff --git a/turbopack/crates/turbo-tasks-backend/src/backend/mod.rs b/turbopack/crates/turbo-tasks-backend/src/backend/mod.rs index 680ce5ff499af..5aaff28abb866 100644 --- a/turbopack/crates/turbo-tasks-backend/src/backend/mod.rs +++ b/turbopack/crates/turbo-tasks-backend/src/backend/mod.rs @@ -1108,6 +1108,7 @@ impl TurboTasksBackendInner { loop { const FIRST_SNAPSHOT_WAIT: Duration = Duration::from_secs(30); const SNAPSHOT_INTERVAL: Duration = Duration::from_secs(15); + const IDLE_TIMEOUT: Duration = Duration::from_secs(1); let time = if *id == 1 { FIRST_SNAPSHOT_WAIT @@ -1115,15 +1116,30 @@ impl TurboTasksBackendInner { SNAPSHOT_INTERVAL }; - let elapsed = last_snapshot.elapsed(); - if elapsed < time { - let listener = self.stopping_event.listen(); - let listener2 = self.idle_event.listen(); + let until = last_snapshot + time; + if until > Instant::now() { + let mut stop_listener = self.stopping_event.listen(); if !self.stopping.load(Ordering::Acquire) { - tokio::select! { - _ = listener => {}, - _ = listener2 => {}, - _ = tokio::time::sleep(time - elapsed) => {}, + let mut idle_listener = self.idle_event.listen(); + let mut idle_time = until + IDLE_TIMEOUT; + loop { + tokio::select! { + _ = &mut stop_listener => { + break; + }, + _ = &mut idle_listener => { + idle_time = Instant::now() + IDLE_TIMEOUT; + idle_listener = self.idle_event.listen() + }, + _ = tokio::time::sleep_until(until.into()) => { + break; + }, + _ = tokio::time::sleep_until(idle_time.into()) => { + if turbo_tasks.is_idle() { + break; + } + }, + } } } } diff --git a/turbopack/crates/turbo-tasks/src/manager.rs b/turbopack/crates/turbo-tasks/src/manager.rs index e6f81c3ae43c5..60a69b4f642f8 100644 --- a/turbopack/crates/turbo-tasks/src/manager.rs +++ b/turbopack/crates/turbo-tasks/src/manager.rs @@ -271,6 +271,9 @@ pub trait TurboTasksBackendApi: TurboTasksCallApi + Sync + /// should prefer the extension trait's version of this method. fn write_task_state_dyn(&self, func: &mut dyn FnMut(&mut B::TaskState)); + /// Returns true if the system is idle. + fn is_idle(&self) -> bool; + /// Returns a reference to the backend. fn backend(&self) -> &B; } @@ -1564,6 +1567,10 @@ impl TurboTasksBackendApi for TurboTasks { CURRENT_GLOBAL_TASK_STATE .with(move |ts| func(ts.write().unwrap().backend_state.downcast_mut().unwrap())) } + + fn is_idle(&self) -> bool { + self.currently_scheduled_tasks.load(Ordering::Acquire) == 0 + } } pub(crate) fn current_task(from: &str) -> TaskId { From 2f3b5fb25404f273d753b2754267297bc4fc34f4 Mon Sep 17 00:00:00 2001 From: Tobias Koppers Date: Thu, 26 Sep 2024 02:14:03 +0200 Subject: [PATCH 35/46] increase max DB size --- .../crates/turbo-tasks-backend/src/lmdb_backing_storage.rs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/turbopack/crates/turbo-tasks-backend/src/lmdb_backing_storage.rs b/turbopack/crates/turbo-tasks-backend/src/lmdb_backing_storage.rs index 8219ff589d13f..e953c853be74b 100644 --- a/turbopack/crates/turbo-tasks-backend/src/lmdb_backing_storage.rs +++ b/turbopack/crates/turbo-tasks-backend/src/lmdb_backing_storage.rs @@ -66,7 +66,7 @@ impl LmdbBackingStorage { ) .set_max_readers((available_parallelism().map_or(16, |v| v.get()) * 8) as u32) .set_max_dbs(5) - .set_map_size(20 * 1024 * 1024 * 1024) + .set_map_size(40 * 1024 * 1024 * 1024) .open(path)?; let infra_db = env.create_db(Some("infra"), DatabaseFlags::INTEGER_KEY)?; let data_db = env.create_db(Some("data"), DatabaseFlags::INTEGER_KEY)?; From e49274165fc0200390b3e6891696373a43cbe780 Mon Sep 17 00:00:00 2001 From: Tobias Koppers Date: Thu, 26 Sep 2024 10:52:08 +0200 Subject: [PATCH 36/46] remove db lookup tracing --- .../src/lmdb_backing_storage.rs | 22 ++++--------------- 1 file changed, 4 insertions(+), 18 deletions(-) diff --git a/turbopack/crates/turbo-tasks-backend/src/lmdb_backing_storage.rs b/turbopack/crates/turbo-tasks-backend/src/lmdb_backing_storage.rs index e953c853be74b..8d28beefebf24 100644 --- a/turbopack/crates/turbo-tasks-backend/src/lmdb_backing_storage.rs +++ b/turbopack/crates/turbo-tasks-backend/src/lmdb_backing_storage.rs @@ -12,7 +12,6 @@ use std::{ use anyhow::{anyhow, Context, Result}; use lmdb::{Database, DatabaseFlags, Environment, EnvironmentFlags, Transaction, WriteFlags}; -use tracing::Span; use turbo_tasks::{backend::CachedTaskType, KeyValuePair, TaskId}; use crate::{ @@ -278,15 +277,9 @@ impl BackingStorage for LmdbBackingStorage { } fn forward_lookup_task_cache(&self, task_type: &CachedTaskType) -> Option { - let span = tracing::trace_span!("forward lookup task cache", key_bytes = 0usize).entered(); - fn lookup( - this: &LmdbBackingStorage, - task_type: &CachedTaskType, - span: &Span, - ) -> Result> { + fn lookup(this: &LmdbBackingStorage, task_type: &CachedTaskType) -> Result> { let tx = this.env.begin_ro_txn()?; let task_type = pot::to_vec(task_type)?; - span.record("key_bytes", task_type.len()); let bytes = match extended_key::get(&tx, this.forward_task_cache_db, &task_type) { Ok(result) => result, Err(err) => { @@ -302,18 +295,16 @@ impl BackingStorage for LmdbBackingStorage { tx.commit()?; Ok(Some(id)) } - let id = lookup(self, task_type, &span) + let id = lookup(self, task_type) .inspect_err(|err| println!("Looking up task id for {task_type:?} failed: {err:?}")) .ok()??; Some(id) } fn reverse_lookup_task_cache(&self, task_id: TaskId) -> Option> { - let span = tracing::trace_span!("reverse lookup task cache", bytes = 0usize).entered(); fn lookup( this: &LmdbBackingStorage, task_id: TaskId, - span: &Span, ) -> Result>> { let tx = this.env.begin_ro_txn()?; let bytes = match tx.get(this.reverse_task_cache_db, &IntKey::new(*task_id)) { @@ -326,24 +317,21 @@ impl BackingStorage for LmdbBackingStorage { } } }; - span.record("bytes", bytes.len()); let result = pot::from_slice(bytes)?; tx.commit()?; Ok(Some(result)) } - let result = lookup(self, task_id, &span) + let result = lookup(self, task_id) .inspect_err(|err| println!("Looking up task type for {task_id} failed: {err:?}")) .ok()??; Some(result) } fn lookup_data(&self, task_id: TaskId, category: TaskDataCategory) -> Vec { - let span = tracing::trace_span!("restore data", bytes = 0usize, items = 0usize).entered(); fn lookup( this: &LmdbBackingStorage, task_id: TaskId, category: TaskDataCategory, - span: &Span, ) -> Result> { let tx = this.env.begin_ro_txn()?; let bytes = match tx.get(this.db(category), &IntKey::new(*task_id)) { @@ -356,13 +344,11 @@ impl BackingStorage for LmdbBackingStorage { } } }; - span.record("bytes", bytes.len()); let result: Vec = pot::from_slice(bytes)?; - span.record("items", result.len()); tx.commit()?; Ok(result) } - lookup(self, task_id, category, &span) + lookup(self, task_id, category) .inspect_err(|err| println!("Looking up data for {task_id} failed: {err:?}")) .unwrap_or_default() } From 80241415c3c60df3e30da39f441b084fde2cd5c8 Mon Sep 17 00:00:00 2001 From: Tobias Koppers Date: Thu, 26 Sep 2024 10:57:22 +0200 Subject: [PATCH 37/46] remove database println --- turbopack/crates/turbo-tasks-backend/src/lmdb_backing_storage.rs | 1 - 1 file changed, 1 deletion(-) diff --git a/turbopack/crates/turbo-tasks-backend/src/lmdb_backing_storage.rs b/turbopack/crates/turbo-tasks-backend/src/lmdb_backing_storage.rs index 8d28beefebf24..01b8fa4b8a751 100644 --- a/turbopack/crates/turbo-tasks-backend/src/lmdb_backing_storage.rs +++ b/turbopack/crates/turbo-tasks-backend/src/lmdb_backing_storage.rs @@ -56,7 +56,6 @@ pub struct LmdbBackingStorage { impl LmdbBackingStorage { pub fn new(path: &Path) -> Result { create_dir_all(path)?; - println!("opening lmdb {:?}", path); let env = Environment::new() .set_flags( EnvironmentFlags::WRITE_MAP From 361c1cf027f4ed5919303692bcd5b40b742dbad3 Mon Sep 17 00:00:00 2001 From: Tobias Koppers Date: Thu, 26 Sep 2024 15:29:11 +0200 Subject: [PATCH 38/46] sharded transaction log --- .../turbo-tasks-backend/src/backend/mod.rs | 47 ++++---- .../src/backend/operation/mod.rs | 14 +-- .../src/backing_storage.rs | 6 +- .../src/lmdb_backing_storage.rs | 100 ++++++++++-------- .../src/utils/chunked_vec.rs | 23 ++-- .../turbo-tasks-backend/src/utils/mod.rs | 1 + .../turbo-tasks-backend/src/utils/sharded.rs | 52 +++++++++ 7 files changed, 163 insertions(+), 80 deletions(-) create mode 100644 turbopack/crates/turbo-tasks-backend/src/utils/sharded.rs diff --git a/turbopack/crates/turbo-tasks-backend/src/backend/mod.rs b/turbopack/crates/turbo-tasks-backend/src/backend/mod.rs index 5aaff28abb866..340132d57d698 100644 --- a/turbopack/crates/turbo-tasks-backend/src/backend/mod.rs +++ b/turbopack/crates/turbo-tasks-backend/src/backend/mod.rs @@ -13,6 +13,7 @@ use std::{ atomic::{AtomicBool, AtomicU64, AtomicUsize, Ordering}, Arc, }, + thread::available_parallelism, time::{Duration, Instant}, }; @@ -51,7 +52,7 @@ use crate::{ CachedDataItemValue, CachedDataUpdate, CellRef, InProgressCellState, InProgressState, OutputValue, RootState, }, - utils::{bi_map::BiMap, chunked_vec::ChunkedVec, ptr_eq_arc::PtrEqArc}, + utils::{bi_map::BiMap, chunked_vec::ChunkedVec, ptr_eq_arc::PtrEqArc, sharded::Sharded}, }; const SNAPSHOT_REQUESTED_BIT: usize = 1 << (usize::BITS - 1); @@ -100,12 +101,12 @@ struct TurboTasksBackendInner { persisted_task_id_factory: IdFactoryWithReuse, transient_task_id_factory: IdFactoryWithReuse, - persisted_task_cache_log: Mutex, TaskId)>>, + persisted_task_cache_log: Sharded, TaskId)>>, task_cache: BiMap, TaskId>, transient_tasks: DashMap>, - persisted_storage_data_log: Mutex>, - persisted_storage_meta_log: Mutex>, + persisted_storage_data_log: Sharded>, + persisted_storage_meta_log: Sharded>, storage: Storage, /// Number of executing operations + Highest bit is set when snapshot is @@ -141,6 +142,8 @@ impl TurboTasksBackend { impl TurboTasksBackendInner { pub fn new(backing_storage: Arc) -> Self { + let shard_amount = + (available_parallelism().map_or(4, |v| v.get()) * 64).next_power_of_two(); Self { start_time: Instant::now(), persisted_task_id_factory: IdFactoryWithReuse::new( @@ -151,11 +154,11 @@ impl TurboTasksBackendInner { TRANSIENT_TASK_BIT as u64, u32::MAX as u64, ), - persisted_task_cache_log: Mutex::new(ChunkedVec::new()), + persisted_task_cache_log: Sharded::new(shard_amount), task_cache: BiMap::new(), transient_tasks: DashMap::new(), - persisted_storage_data_log: Mutex::new(ChunkedVec::new()), - persisted_storage_meta_log: Mutex::new(ChunkedVec::new()), + persisted_storage_data_log: Sharded::new(shard_amount), + persisted_storage_meta_log: Sharded::new(shard_amount), storage: Storage::new(), in_progress_operations: AtomicUsize::new(0), snapshot_request: Mutex::new(SnapshotRequest::new()), @@ -227,7 +230,7 @@ impl TurboTasksBackendInner { fn persisted_storage_log( &self, category: TaskDataCategory, - ) -> &Mutex> { + ) -> &Sharded> { match category { TaskDataCategory::Data => &self.persisted_storage_data_log, TaskDataCategory::Meta => &self.persisted_storage_meta_log, @@ -523,9 +526,9 @@ impl TurboTasksBackendInner { .map(|op| op.arc().clone()) .collect::>(); drop(snapshot_request); - let persisted_storage_meta_log = take(&mut *self.persisted_storage_meta_log.lock()); - let persisted_storage_data_log = take(&mut *self.persisted_storage_data_log.lock()); - let persisted_task_cache_log = take(&mut *self.persisted_task_cache_log.lock()); + let persisted_storage_meta_log = self.persisted_storage_meta_log.take(); + let persisted_storage_data_log = self.persisted_storage_data_log.take(); + let persisted_task_cache_log = self.persisted_task_cache_log.take(); let mut snapshot_request = self.snapshot_request.lock(); snapshot_request.snapshot_requested = false; self.in_progress_operations @@ -535,18 +538,24 @@ impl TurboTasksBackendInner { drop(snapshot_request); let mut counts: HashMap = HashMap::new(); - for CachedDataUpdate { task, .. } in persisted_storage_data_log + for log in persisted_storage_meta_log .iter() - .chain(persisted_storage_meta_log.iter()) + .chain(persisted_storage_data_log.iter()) { - *counts.entry(*task).or_default() += 1; + for CachedDataUpdate { task, .. } in log.iter() { + *counts.entry(*task).or_default() += 1; + } } let mut new_items = false; - if !persisted_task_cache_log.is_empty() - || !persisted_storage_meta_log.is_empty() - || !persisted_storage_data_log.is_empty() + fn shards_empty(shards: &Vec>) -> bool { + shards.iter().all(|shard| shard.is_empty()) + } + + if !shards_empty(&persisted_task_cache_log) + || !shards_empty(&persisted_storage_meta_log) + || !shards_empty(&persisted_storage_data_log) { new_items = true; if let Err(err) = self.backing_storage.save_snapshot( @@ -619,13 +628,13 @@ impl TurboTasksBackendInner { self.persisted_task_id_factory.reuse(task_id); } self.persisted_task_cache_log - .lock() + .lock(existing_task_id) .push((task_type, existing_task_id)); self.connect_child(parent_task, existing_task_id, turbo_tasks); return existing_task_id; } self.persisted_task_cache_log - .lock() + .lock(task_id) .push((task_type, task_id)); self.connect_child(parent_task, task_id, turbo_tasks); diff --git a/turbopack/crates/turbo-tasks-backend/src/backend/operation/mod.rs b/turbopack/crates/turbo-tasks-backend/src/backend/operation/mod.rs index ca80da978a134..d81ed8127bbd5 100644 --- a/turbopack/crates/turbo-tasks-backend/src/backend/operation/mod.rs +++ b/turbopack/crates/turbo-tasks-backend/src/backend/operation/mod.rs @@ -222,7 +222,7 @@ impl TaskGuard<'_> { self.task.persistance_state_mut().add_persisting_item(); self.backend .persisted_storage_log(key.category()) - .lock() + .lock(self.task_id) .push(CachedDataUpdate { key, task: self.task_id, @@ -253,7 +253,7 @@ impl TaskGuard<'_> { self.task.persistance_state_mut().add_persisting_item(); self.backend .persisted_storage_log(key.category()) - .lock() + .lock(self.task_id) .push(CachedDataUpdate { key, task: self.task_id, @@ -270,7 +270,7 @@ impl TaskGuard<'_> { self.task.persistance_state_mut().add_persisting_item(); self.backend .persisted_storage_log(key.category()) - .lock() + .lock(self.task_id) .push(CachedDataUpdate { key, task: self.task_id, @@ -314,7 +314,7 @@ impl TaskGuard<'_> { add_persisting_item = true; backend .persisted_storage_log(key.category()) - .lock() + .lock(*task_id) .push(CachedDataUpdate { key: key.clone(), task: *task_id, @@ -326,7 +326,7 @@ impl TaskGuard<'_> { add_persisting_item = true; backend .persisted_storage_log(key.category()) - .lock() + .lock(*task_id) .push(CachedDataUpdate { key: key.clone(), task: *task_id, @@ -351,7 +351,7 @@ impl TaskGuard<'_> { self.task.persistance_state_mut().add_persisting_item(); self.backend .persisted_storage_log(key.category()) - .lock() + .lock(self.task_id) .push(CachedDataUpdate { key, task: self.task_id, @@ -410,7 +410,7 @@ impl TaskGuard<'_> { let mut guard = self .backend .persisted_storage_log(TaskDataCategory::Data) - .lock(); + .lock(self.task_id); guard.extend(cell_data); self.task .persistance_state_mut() diff --git a/turbopack/crates/turbo-tasks-backend/src/backing_storage.rs b/turbopack/crates/turbo-tasks-backend/src/backing_storage.rs index 3082785f3ad8d..b3b717f23f955 100644 --- a/turbopack/crates/turbo-tasks-backend/src/backing_storage.rs +++ b/turbopack/crates/turbo-tasks-backend/src/backing_storage.rs @@ -15,9 +15,9 @@ pub trait BackingStorage { fn save_snapshot( &self, operations: Vec>, - task_cache_updates: ChunkedVec<(Arc, TaskId)>, - meta_updates: ChunkedVec, - data_updates: ChunkedVec, + task_cache_updates: Vec, TaskId)>>, + meta_updates: Vec>, + data_updates: Vec>, ) -> Result<()>; fn forward_lookup_task_cache(&self, key: &CachedTaskType) -> Option; fn reverse_lookup_task_cache(&self, task_id: TaskId) -> Option>; diff --git a/turbopack/crates/turbo-tasks-backend/src/lmdb_backing_storage.rs b/turbopack/crates/turbo-tasks-backend/src/lmdb_backing_storage.rs index 01b8fa4b8a751..130a9fb475e9e 100644 --- a/turbopack/crates/turbo-tasks-backend/src/lmdb_backing_storage.rs +++ b/turbopack/crates/turbo-tasks-backend/src/lmdb_backing_storage.rs @@ -12,6 +12,8 @@ use std::{ use anyhow::{anyhow, Context, Result}; use lmdb::{Database, DatabaseFlags, Environment, EnvironmentFlags, Transaction, WriteFlags}; +use rayon::iter::{IntoParallelIterator, ParallelIterator}; +use tracing::Span; use turbo_tasks::{backend::CachedTaskType, KeyValuePair, TaskId}; use crate::{ @@ -117,16 +119,16 @@ impl BackingStorage for LmdbBackingStorage { fn save_snapshot( &self, operations: Vec>, - task_cache_updates: ChunkedVec<(Arc, TaskId)>, - meta_updates: ChunkedVec, - data_updates: ChunkedVec, + task_cache_updates: Vec, TaskId)>>, + meta_updates: Vec>, + data_updates: Vec>, ) -> Result<()> { println!( "Persisting {} operations, {} task cache updates, {} meta updates, {} data updates...", operations.len(), - task_cache_updates.len(), - meta_updates.len(), - data_updates.len() + task_cache_updates.iter().map(|u| u.len()).sum::(), + meta_updates.iter().map(|u| u.len()).sum::(), + data_updates.iter().map(|u| u.len()).sum::() ); let start = Instant::now(); let mut op_count = 0; @@ -180,9 +182,9 @@ impl BackingStorage for LmdbBackingStorage { let _span = tracing::trace_span!("update task cache", items = task_cache_updates.len()) .entered(); - for (task_type, task_id) in task_cache_updates.iter() { - let task_id = **task_id; - let task_type_bytes = pot::to_vec(&**task_type).with_context(|| { + for (task_type, task_id) in task_cache_updates.into_iter().flatten() { + let task_id = *task_id; + let task_type_bytes = pot::to_vec(&*task_type).with_context(|| { anyhow!("Unable to serialize task cache key {task_type:?}") })?; #[cfg(feature = "verify_serialization")] @@ -354,51 +356,65 @@ impl BackingStorage for LmdbBackingStorage { } fn organize_task_data( - updates: ChunkedVec, -) -> HashMap< - TaskId, - HashMap, Option)>, -> { - let mut task_updates: HashMap< + updates: Vec>, +) -> Vec< + HashMap< TaskId, HashMap, Option)>, - > = HashMap::new(); - for CachedDataUpdate { - task, - key, - value, - old_value, - } in updates.into_iter() - { - let data = task_updates.entry(task).or_default(); - match data.entry(key) { - Entry::Occupied(mut entry) => { - entry.get_mut().1 = value; - } - Entry::Vacant(entry) => { - entry.insert((old_value, value)); + >, +> { + let span = Span::current(); + updates + .into_par_iter() + .map(|updates| { + let _span = span.clone().entered(); + let mut task_updates: HashMap< + TaskId, + HashMap< + CachedDataItemKey, + (Option, Option), + >, + > = HashMap::new(); + for CachedDataUpdate { + task, + key, + value, + old_value, + } in updates.into_iter() + { + let data = task_updates.entry(task).or_default(); + match data.entry(key) { + Entry::Occupied(mut entry) => { + entry.get_mut().1 = value; + } + Entry::Vacant(entry) => { + entry.insert((old_value, value)); + } + } } - } - } - task_updates.retain(|_, data| { - data.retain(|_, (old_value, value)| *old_value != *value); - !data.is_empty() - }); - task_updates + task_updates.retain(|_, data| { + data.retain(|_, (old_value, value)| *old_value != *value); + !data.is_empty() + }); + task_updates + }) + .collect() } fn restore_task_data( this: &LmdbBackingStorage, db: Database, - task_updates: HashMap< - TaskId, - HashMap, Option)>, + task_updates: Vec< + HashMap< + TaskId, + HashMap, Option)>, + >, >, ) -> Result)>> { - let mut result = Vec::with_capacity(task_updates.len()); + let mut result = Vec::with_capacity(task_updates.iter().map(|m| m.len()).sum()); let tx = this.env.begin_ro_txn()?; - for (task, updates) in task_updates.into_iter() { + for (task, updates) in task_updates.into_iter().flatten() { let mut map; if let Ok(old_data) = tx.get(db, &IntKey::new(*task)) { let old_data: Vec = match pot::from_slice(old_data) { diff --git a/turbopack/crates/turbo-tasks-backend/src/utils/chunked_vec.rs b/turbopack/crates/turbo-tasks-backend/src/utils/chunked_vec.rs index 2d71fd13851d2..19cb89e66c376 100644 --- a/turbopack/crates/turbo-tasks-backend/src/utils/chunked_vec.rs +++ b/turbopack/crates/turbo-tasks-backend/src/utils/chunked_vec.rs @@ -41,14 +41,6 @@ impl ChunkedVec { } } - pub fn into_iter(self) -> impl Iterator { - let len = self.len(); - ExactSizeIter { - iter: self.chunks.into_iter().flat_map(|chunk| chunk.into_iter()), - len, - } - } - pub fn iter(&self) -> impl Iterator { ExactSizeIter { iter: self.chunks.iter().flat_map(|chunk| chunk.iter()), @@ -61,6 +53,19 @@ impl ChunkedVec { } } +impl IntoIterator for ChunkedVec { + type Item = T; + type IntoIter = ExactSizeIter>>>; + + fn into_iter(self) -> Self::IntoIter { + let len = self.len(); + ExactSizeIter { + iter: self.chunks.into_iter().flatten(), + len, + } + } +} + impl Extend for ChunkedVec { fn extend>(&mut self, iter: I) { for item in iter { @@ -77,7 +82,7 @@ fn cummulative_chunk_size(chunk_index: usize) -> usize { (8 << (chunk_index + 1)) - 8 } -struct ExactSizeIter { +pub struct ExactSizeIter { iter: I, len: usize, } diff --git a/turbopack/crates/turbo-tasks-backend/src/utils/mod.rs b/turbopack/crates/turbo-tasks-backend/src/utils/mod.rs index 676e3b809b388..c2c585efbd0cf 100644 --- a/turbopack/crates/turbo-tasks-backend/src/utils/mod.rs +++ b/turbopack/crates/turbo-tasks-backend/src/utils/mod.rs @@ -2,3 +2,4 @@ pub mod bi_map; pub mod chunked_vec; pub mod dash_map_multi; pub mod ptr_eq_arc; +pub mod sharded; diff --git a/turbopack/crates/turbo-tasks-backend/src/utils/sharded.rs b/turbopack/crates/turbo-tasks-backend/src/utils/sharded.rs new file mode 100644 index 0000000000000..722a22986bd60 --- /dev/null +++ b/turbopack/crates/turbo-tasks-backend/src/utils/sharded.rs @@ -0,0 +1,52 @@ +use std::hash::{BuildHasher, BuildHasherDefault, Hash, Hasher}; + +use parking_lot::{Mutex, MutexGuard}; +use rustc_hash::FxHasher; + +pub struct Sharded> { + data: Box<[Mutex]>, + hasher: H, + bitmask: u16, +} + +impl Sharded { + pub fn new(mut shard_amount: usize) -> Self + where + T: Default, + H: Default, + { + assert!(shard_amount.is_power_of_two()); + shard_amount = shard_amount.min(u16::MAX as usize + 1); + let data = (0..shard_amount) + .map(|_| Mutex::new(T::default())) + .collect::>() + .into_boxed_slice(); + Self { + data, + hasher: H::default(), + bitmask: (shard_amount - 1) as u16, + } + } + + pub fn lock(&self, key: K) -> MutexGuard<'_, T> + where + K: Hash, + H: BuildHasher, + { + let mut h = self.hasher.build_hasher(); + key.hash(&mut h); + let shard = h.finish() as u16 & self.bitmask; + self.data[shard as usize].lock() + } + + pub fn take(&self) -> Vec + where + T: Default, + { + let locked = self.data.iter().map(|m| m.lock()).collect::>(); + locked + .into_iter() + .map(|mut m| std::mem::take(&mut *m)) + .collect() + } +} From 451c1355b317de992d5b6f5dac7bb0f4cff3ffdf Mon Sep 17 00:00:00 2001 From: Tobias Koppers Date: Fri, 4 Oct 2024 06:21:58 +0200 Subject: [PATCH 39/46] share read transition --- .../turbo-tasks-backend/src/backend/mod.rs | 84 +++++++++++++----- .../backend/operation/aggregation_update.rs | 20 ++--- .../backend/operation/cleanup_old_edges.rs | 6 +- .../src/backend/operation/connect_child.rs | 6 +- .../src/backend/operation/invalidate.rs | 12 ++- .../src/backend/operation/mod.rs | 73 ++++++++++++++-- .../src/backend/operation/update_cell.rs | 2 +- .../src/backend/operation/update_output.rs | 2 +- .../src/backing_storage.rs | 36 +++++++- .../src/lmdb_backing_storage.rs | 85 +++++++++++++++---- 10 files changed, 254 insertions(+), 72 deletions(-) diff --git a/turbopack/crates/turbo-tasks-backend/src/backend/mod.rs b/turbopack/crates/turbo-tasks-backend/src/backend/mod.rs index 340132d57d698..215ad0571076a 100644 --- a/turbopack/crates/turbo-tasks-backend/src/backend/mod.rs +++ b/turbopack/crates/turbo-tasks-backend/src/backend/mod.rs @@ -46,7 +46,7 @@ use crate::{ }, storage::{get, get_many, remove, Storage}, }, - backing_storage::BackingStorage, + backing_storage::{BackingStorage, ReadTransaction}, data::{ ActiveType, AggregationNumber, CachedDataItem, CachedDataItemIndex, CachedDataItemKey, CachedDataItemValue, CachedDataUpdate, CellRef, InProgressCellState, InProgressState, @@ -179,6 +179,18 @@ impl TurboTasksBackendInner { ExecuteContext::new(self, turbo_tasks) } + /// # Safety + /// + /// `tx` must be a transaction from this TurboTasksBackendInner instance. + unsafe fn execute_context_with_tx<'a>( + &'a self, + tx: Option, + turbo_tasks: &'a dyn TurboTasksBackendApi, + ) -> ExecuteContext<'a> { + // Safety: `tx` is from `self`. + unsafe { ExecuteContext::new_with_tx(self, tx, turbo_tasks) } + } + fn suspending_requested(&self) -> bool { (self.in_progress_operations.load(Ordering::Relaxed) & SNAPSHOT_REQUESTED_BIT) != 0 } @@ -257,17 +269,31 @@ impl Drop for OperationGuard<'_> { // Operations impl TurboTasksBackendInner { + /// # Safety + /// + /// `tx` must be a transaction from this TurboTasksBackendInner instance. + unsafe fn connect_child_with_tx( + &self, + tx: Option, + parent_task: TaskId, + child_task: TaskId, + turbo_tasks: &dyn TurboTasksBackendApi, + ) { + operation::ConnectChildOperation::run(parent_task, child_task, unsafe { + self.execute_context_with_tx(tx, turbo_tasks) + }); + } + fn connect_child( &self, parent_task: TaskId, child_task: TaskId, turbo_tasks: &dyn TurboTasksBackendApi, ) { - operation::ConnectChildOperation::run( - parent_task, - child_task, - self.execute_context(turbo_tasks), - ); + operation::ConnectChildOperation::run(parent_task, child_task, unsafe { + // Safety: Passing `None` is safe. + self.execute_context_with_tx(None, turbo_tasks) + }); } fn try_read_task_output( @@ -277,7 +303,7 @@ impl TurboTasksBackendInner { consistency: ReadConsistency, turbo_tasks: &dyn TurboTasksBackendApi, ) -> Result> { - let ctx = self.execute_context(turbo_tasks); + let mut ctx = self.execute_context(turbo_tasks); let mut task = ctx.task(task_id, TaskDataCategory::All); if let Some(in_progress) = get!(task, InProgress) { @@ -311,7 +337,7 @@ impl TurboTasksBackendInner { base_aggregation_number: u32::MAX, distance: None, }, - &ctx, + &mut ctx, ); task = ctx.task(task_id, TaskDataCategory::All); } @@ -401,7 +427,7 @@ impl TurboTasksBackendInner { cell: CellId, turbo_tasks: &dyn TurboTasksBackendApi, ) -> Result> { - let ctx = self.execute_context(turbo_tasks); + let mut ctx = self.execute_context(turbo_tasks); let mut task = ctx.task(task_id, TaskDataCategory::Data); if let Some(content) = get!(task, CellData { cell }) { let content = content.clone(); @@ -488,7 +514,10 @@ impl TurboTasksBackendInner { if let Some(task_type) = self.task_cache.lookup_reverse(&task_id) { return Some(task_type); } - if let Some(task_type) = self.backing_storage.reverse_lookup_task_cache(task_id) { + if let Some(task_type) = unsafe { + self.backing_storage + .reverse_lookup_task_cache(None, task_id) + } { let _ = self.task_cache.try_insert(task_type.clone(), task_id); return Some(task_type); } @@ -585,9 +614,11 @@ impl TurboTasksBackendInner { // They can't be interrupted by a snapshot since the snapshotting job has not been scheduled // yet. let uncompleted_operations = self.backing_storage.uncompleted_operations(); - let ctx = self.execute_context(turbo_tasks); - for op in uncompleted_operations { - op.execute(&ctx); + if !uncompleted_operations.is_empty() { + let mut ctx = self.execute_context(turbo_tasks); + for op in uncompleted_operations { + op.execute(&mut ctx); + } } // Schedule the snapshot job @@ -614,9 +645,15 @@ impl TurboTasksBackendInner { return task_id; } - if let Some(task_id) = self.backing_storage.forward_lookup_task_cache(&task_type) { + let tx = self.backing_storage.start_read_transaction(); + // Safety: `tx` is a valid transaction from `self.backend.backing_storage`. + if let Some(task_id) = unsafe { + self.backing_storage + .forward_lookup_task_cache(tx, &task_type) + } { let _ = self.task_cache.try_insert(Arc::new(task_type), task_id); - self.connect_child(parent_task, task_id, turbo_tasks); + // Safety: `tx` is a valid transaction from `self.backend.backing_storage`. + unsafe { self.connect_child_with_tx(tx, parent_task, task_id, turbo_tasks) }; return task_id; } @@ -630,14 +667,16 @@ impl TurboTasksBackendInner { self.persisted_task_cache_log .lock(existing_task_id) .push((task_type, existing_task_id)); - self.connect_child(parent_task, existing_task_id, turbo_tasks); + // Safety: `tx` is a valid transaction from `self.backend.backing_storage`. + unsafe { self.connect_child_with_tx(tx, parent_task, existing_task_id, turbo_tasks) }; return existing_task_id; } self.persisted_task_cache_log .lock(task_id) .push((task_type, task_id)); - self.connect_child(parent_task, task_id, turbo_tasks); + // Safety: `tx` is a valid transaction from `self.backend.backing_storage`. + unsafe { self.connect_child_with_tx(tx, parent_task, task_id, turbo_tasks) }; task_id } @@ -657,6 +696,7 @@ impl TurboTasksBackendInner { ); } if let Some(task_id) = self.task_cache.lookup_forward(&task_type) { + // Safety: `tx` is a valid transaction from `self.backend.backing_storage`. self.connect_child(parent_task, task_id, turbo_tasks); return task_id; } @@ -668,10 +708,12 @@ impl TurboTasksBackendInner { unsafe { self.transient_task_id_factory.reuse(task_id); } + // Safety: `tx` is a valid transaction from `self.backend.backing_storage`. self.connect_child(parent_task, existing_task_id, turbo_tasks); return existing_task_id; } + // Safety: `tx` is a valid transaction from `self.backend.backing_storage`. self.connect_child(parent_task, task_id, turbo_tasks); task_id @@ -715,7 +757,7 @@ impl TurboTasksBackendInner { if task_id.is_transient() { return; } - let ctx = self.execute_context(turbo_tasks); + let mut ctx = self.execute_context(turbo_tasks); let mut task = ctx.task(task_id, TaskDataCategory::Data); task.invalidate_serialization(); } @@ -753,7 +795,7 @@ impl TurboTasksBackendInner { return None; }; { - let ctx = self.execute_context(turbo_tasks); + let mut ctx = self.execute_context(turbo_tasks); let mut task = ctx.task(task_id, TaskDataCategory::Data); let in_progress = remove!(task, InProgress)?; let InProgressState::Scheduled { done_event } = in_progress else { @@ -944,7 +986,7 @@ impl TurboTasksBackendInner { stateful: bool, turbo_tasks: &dyn TurboTasksBackendApi, ) -> bool { - let ctx = self.execute_context(turbo_tasks); + let mut ctx = self.execute_context(turbo_tasks); let mut task = ctx.task(task_id, TaskDataCategory::All); let Some(CachedDataItemValue::InProgress { value: in_progress }) = task.remove(&CachedDataItemKey::InProgress {}) @@ -1178,7 +1220,7 @@ impl TurboTasksBackendInner { cell: CellId, turbo_tasks: &dyn TurboTasksBackendApi, ) -> Result { - let ctx = self.execute_context(turbo_tasks); + let mut ctx = self.execute_context(turbo_tasks); let task = ctx.task(task_id, TaskDataCategory::Data); if let Some(content) = get!(task, CellData { cell }) { Ok(CellContent(Some(content.1.clone())).into_typed(cell.type_id)) diff --git a/turbopack/crates/turbo-tasks-backend/src/backend/operation/aggregation_update.rs b/turbopack/crates/turbo-tasks-backend/src/backend/operation/aggregation_update.rs index 48ef106dcab3a..c4e74cd4beaa5 100644 --- a/turbopack/crates/turbo-tasks-backend/src/backend/operation/aggregation_update.rs +++ b/turbopack/crates/turbo-tasks-backend/src/backend/operation/aggregation_update.rs @@ -251,13 +251,13 @@ impl AggregationUpdateQueue { self.jobs.extend(jobs); } - pub fn run(job: AggregationUpdateJob, ctx: &ExecuteContext<'_>) { + pub fn run(job: AggregationUpdateJob, ctx: &mut ExecuteContext<'_>) { let mut queue = Self::new(); queue.push(job); queue.execute(ctx); } - pub fn process(&mut self, ctx: &ExecuteContext<'_>) -> bool { + pub fn process(&mut self, ctx: &mut ExecuteContext<'_>) -> bool { if let Some(job) = self.jobs.pop_front() { match job { AggregationUpdateJob::UpdateAggregationNumber { @@ -385,7 +385,7 @@ impl AggregationUpdateQueue { self.jobs.is_empty() } - fn balance_edge(&mut self, ctx: &ExecuteContext, upper_id: TaskId, task_id: TaskId) { + fn balance_edge(&mut self, ctx: &mut ExecuteContext, upper_id: TaskId, task_id: TaskId) { let (mut upper, mut task) = ctx.task_pair(upper_id, task_id, TaskDataCategory::Meta); let upper_aggregation_number = get_aggregation_number(&upper); let task_aggregation_number = get_aggregation_number(&task); @@ -485,7 +485,7 @@ impl AggregationUpdateQueue { } } - fn find_and_schedule_dirty(&mut self, mut task_ids: Vec, ctx: &ExecuteContext) { + fn find_and_schedule_dirty(&mut self, mut task_ids: Vec, ctx: &mut ExecuteContext) { let popped = task_ids.pop(); if !task_ids.is_empty() { self.push(AggregationUpdateJob::FindAndScheduleDirty { task_ids }); @@ -519,7 +519,7 @@ impl AggregationUpdateQueue { fn aggregated_data_update( &mut self, upper_ids: Vec, - ctx: &ExecuteContext, + ctx: &mut ExecuteContext, update: AggregatedDataUpdate, ) { for upper_id in upper_ids { @@ -539,7 +539,7 @@ impl AggregationUpdateQueue { fn inner_lost_follower( &mut self, - ctx: &ExecuteContext, + ctx: &mut ExecuteContext, lost_follower_id: TaskId, mut upper_ids: Vec, ) { @@ -613,7 +613,7 @@ impl AggregationUpdateQueue { fn inner_of_uppers_has_new_follower( &mut self, - ctx: &ExecuteContext, + ctx: &mut ExecuteContext, new_follower_id: TaskId, mut upper_ids: Vec, ) { @@ -698,7 +698,7 @@ impl AggregationUpdateQueue { fn inner_of_upper_has_new_followers( &mut self, - ctx: &ExecuteContext, + ctx: &mut ExecuteContext, new_follower_ids: Vec, upper_id: TaskId, ) { @@ -797,7 +797,7 @@ impl AggregationUpdateQueue { fn update_aggregation_number( &mut self, - ctx: &ExecuteContext, + ctx: &mut ExecuteContext, task_id: TaskId, base_effective_distance: Option>, base_aggregation_number: u32, @@ -871,7 +871,7 @@ impl AggregationUpdateQueue { } impl Operation for AggregationUpdateQueue { - fn execute(mut self, ctx: &ExecuteContext<'_>) { + fn execute(mut self, ctx: &mut ExecuteContext<'_>) { let _span = tracing::trace_span!("aggregation update queue").entered(); loop { ctx.operation_suspend_point(&self); diff --git a/turbopack/crates/turbo-tasks-backend/src/backend/operation/cleanup_old_edges.rs b/turbopack/crates/turbo-tasks-backend/src/backend/operation/cleanup_old_edges.rs index 46b7e1ec66cc4..624fcd00543ad 100644 --- a/turbopack/crates/turbo-tasks-backend/src/backend/operation/cleanup_old_edges.rs +++ b/turbopack/crates/turbo-tasks-backend/src/backend/operation/cleanup_old_edges.rs @@ -46,7 +46,7 @@ impl CleanupOldEdgesOperation { task_id: TaskId, outdated: Vec, data_update: Option, - ctx: ExecuteContext<'_>, + mut ctx: ExecuteContext<'_>, ) { let mut queue = AggregationUpdateQueue::new(); queue.extend(data_update); @@ -55,12 +55,12 @@ impl CleanupOldEdgesOperation { outdated, queue, } - .execute(&ctx); + .execute(&mut ctx); } } impl Operation for CleanupOldEdgesOperation { - fn execute(mut self, ctx: &ExecuteContext<'_>) { + fn execute(mut self, ctx: &mut ExecuteContext<'_>) { loop { ctx.operation_suspend_point(&self); match self { diff --git a/turbopack/crates/turbo-tasks-backend/src/backend/operation/connect_child.rs b/turbopack/crates/turbo-tasks-backend/src/backend/operation/connect_child.rs index e5d99de0106e0..626698d7e9145 100644 --- a/turbopack/crates/turbo-tasks-backend/src/backend/operation/connect_child.rs +++ b/turbopack/crates/turbo-tasks-backend/src/backend/operation/connect_child.rs @@ -29,7 +29,7 @@ pub enum ConnectChildOperation { } impl ConnectChildOperation { - pub fn run(parent_task_id: TaskId, child_task_id: TaskId, ctx: ExecuteContext<'_>) { + pub fn run(parent_task_id: TaskId, child_task_id: TaskId, mut ctx: ExecuteContext<'_>) { let mut parent_task = ctx.task(parent_task_id, TaskDataCategory::All); parent_task.remove(&CachedDataItemKey::OutdatedChild { task: child_task_id, @@ -123,13 +123,13 @@ impl ConnectChildOperation { ConnectChildOperation::UpdateAggregation { aggregation_update: queue, } - .execute(&ctx); + .execute(&mut ctx); } } } impl Operation for ConnectChildOperation { - fn execute(mut self, ctx: &ExecuteContext<'_>) { + fn execute(mut self, ctx: &mut ExecuteContext<'_>) { loop { ctx.operation_suspend_point(&self); match self { diff --git a/turbopack/crates/turbo-tasks-backend/src/backend/operation/invalidate.rs b/turbopack/crates/turbo-tasks-backend/src/backend/operation/invalidate.rs index 7131f9fe10026..2872f4f80fe5c 100644 --- a/turbopack/crates/turbo-tasks-backend/src/backend/operation/invalidate.rs +++ b/turbopack/crates/turbo-tasks-backend/src/backend/operation/invalidate.rs @@ -31,13 +31,13 @@ pub enum InvalidateOperation { } impl InvalidateOperation { - pub fn run(task_ids: SmallVec<[TaskId; 4]>, ctx: ExecuteContext<'_>) { - InvalidateOperation::MakeDirty { task_ids }.execute(&ctx) + pub fn run(task_ids: SmallVec<[TaskId; 4]>, mut ctx: ExecuteContext<'_>) { + InvalidateOperation::MakeDirty { task_ids }.execute(&mut ctx) } } impl Operation for InvalidateOperation { - fn execute(mut self, ctx: &ExecuteContext<'_>) { + fn execute(mut self, ctx: &mut ExecuteContext<'_>) { loop { ctx.operation_suspend_point(&self); match self { @@ -66,7 +66,11 @@ impl Operation for InvalidateOperation { } } -pub fn make_task_dirty(task_id: TaskId, queue: &mut AggregationUpdateQueue, ctx: &ExecuteContext) { +pub fn make_task_dirty( + task_id: TaskId, + queue: &mut AggregationUpdateQueue, + ctx: &mut ExecuteContext, +) { if ctx.is_once_task(task_id) { return; } diff --git a/turbopack/crates/turbo-tasks-backend/src/backend/operation/mod.rs b/turbopack/crates/turbo-tasks-backend/src/backend/operation/mod.rs index d81ed8127bbd5..f8589a70a8826 100644 --- a/turbopack/crates/turbo-tasks-backend/src/backend/operation/mod.rs +++ b/turbopack/crates/turbo-tasks-backend/src/backend/operation/mod.rs @@ -18,6 +18,7 @@ use crate::{ storage::StorageWriteGuard, OperationGuard, TaskDataCategory, TransientTask, TurboTasksBackend, TurboTasksBackendInner, }, + backing_storage::ReadTransaction, data::{ CachedDataItem, CachedDataItemIndex, CachedDataItemKey, CachedDataItemValue, CachedDataUpdate, @@ -31,7 +32,7 @@ pub trait Operation: + TryFrom + Into { - fn execute(self, ctx: &ExecuteContext<'_>); + fn execute(self, ctx: &mut ExecuteContext<'_>); } pub struct ExecuteContext<'a> { @@ -39,6 +40,22 @@ pub struct ExecuteContext<'a> { turbo_tasks: &'a dyn TurboTasksBackendApi, _operation_guard: Option>, parent: Option<(&'a AnyOperation, &'a ExecuteContext<'a>)>, + transaction: Option>, +} + +impl<'a> Drop for ExecuteContext<'a> { + fn drop(&mut self) { + if self.parent.is_none() { + if let Some(Some(transaction)) = self.transaction { + // Safety: `transaction` is a valid transaction from `self.backend.backing_storage`. + unsafe { + self.backend + .backing_storage + .end_read_transaction(transaction) + }; + } + } + } } impl<'a> ExecuteContext<'a> { @@ -51,10 +68,35 @@ impl<'a> ExecuteContext<'a> { turbo_tasks, _operation_guard: Some(backend.start_operation()), parent: None, + transaction: None, } } - pub fn task(&self, task_id: TaskId, category: TaskDataCategory) -> TaskGuard<'a> { + pub(super) unsafe fn new_with_tx( + backend: &'a TurboTasksBackendInner, + transaction: Option, + turbo_tasks: &'a dyn TurboTasksBackendApi, + ) -> Self { + Self { + backend, + turbo_tasks, + _operation_guard: Some(backend.start_operation()), + parent: None, + transaction: Some(transaction), + } + } + + fn transaction(&mut self) -> Option { + if let Some(tx) = self.transaction { + tx + } else { + let tx = self.backend.backing_storage.start_read_transaction(); + self.transaction = Some(tx); + tx + } + } + + pub fn task(&mut self, task_id: TaskId, category: TaskDataCategory) -> TaskGuard<'a> { let mut task = self.backend.storage.access_mut(task_id); if !task.persistance_state().is_restored(category) { if task_id.is_transient() { @@ -65,7 +107,7 @@ impl<'a> ExecuteContext<'a> { // Avoid holding the lock too long since this can also affect other tasks drop(task); - let items = self.backend.backing_storage.lookup_data(task_id, category); + let items = self.restore_task_data(task_id, category); task = self.backend.storage.access_mut(task_id); if !task.persistance_state().is_restored(category) { for item in items { @@ -83,6 +125,20 @@ impl<'a> ExecuteContext<'a> { } } + fn restore_task_data( + &mut self, + task_id: TaskId, + category: TaskDataCategory, + ) -> Vec { + // Safety: `transaction` is a valid transaction from `self.backend.backing_storage`. + let items = unsafe { + self.backend + .backing_storage + .lookup_data(self.transaction(), task_id, category) + }; + items + } + pub fn is_once_task(&self, task_id: TaskId) -> bool { if !task_id.is_transient() { return false; @@ -95,7 +151,7 @@ impl<'a> ExecuteContext<'a> { } pub fn task_pair( - &self, + &mut self, task_id1: TaskId, task_id2: TaskId, category: TaskDataCategory, @@ -109,10 +165,8 @@ impl<'a> ExecuteContext<'a> { drop(task1); drop(task2); - let items1 = (!is_restored1) - .then(|| self.backend.backing_storage.lookup_data(task_id1, category)); - let items2 = (!is_restored2) - .then(|| self.backend.backing_storage.lookup_data(task_id2, category)); + let items1 = (!is_restored1).then(|| self.restore_task_data(task_id1, category)); + let items2 = (!is_restored2).then(|| self.restore_task_data(task_id2, category)); let (t1, t2) = self.backend.storage.access_pair_mut(task_id1, task_id2); task1 = t1; @@ -182,6 +236,7 @@ impl<'a> ExecuteContext<'a> { turbo_tasks: self.turbo_tasks, _operation_guard: None, parent: Some((&parent_op, self)), + transaction: self.transaction, }; run(inner_ctx); *parent_op_ref = parent_op.try_into().unwrap(); @@ -452,7 +507,7 @@ pub enum AnyOperation { } impl AnyOperation { - pub fn execute(self, ctx: &ExecuteContext<'_>) { + pub fn execute(self, ctx: &mut ExecuteContext<'_>) { match self { AnyOperation::ConnectChild(op) => op.execute(ctx), AnyOperation::Invalidate(op) => op.execute(ctx), diff --git a/turbopack/crates/turbo-tasks-backend/src/backend/operation/update_cell.rs b/turbopack/crates/turbo-tasks-backend/src/backend/operation/update_cell.rs index 90fb7e8daf3d1..330849010fdc4 100644 --- a/turbopack/crates/turbo-tasks-backend/src/backend/operation/update_cell.rs +++ b/turbopack/crates/turbo-tasks-backend/src/backend/operation/update_cell.rs @@ -12,7 +12,7 @@ use crate::{ pub struct UpdateCellOperation; impl UpdateCellOperation { - pub fn run(task_id: TaskId, cell: CellId, content: CellContent, ctx: ExecuteContext<'_>) { + pub fn run(task_id: TaskId, cell: CellId, content: CellContent, mut ctx: ExecuteContext<'_>) { let mut task = ctx.task(task_id, TaskDataCategory::All); let old_content = if let CellContent(Some(new_content)) = content { task.insert(CachedDataItem::CellData { diff --git a/turbopack/crates/turbo-tasks-backend/src/backend/operation/update_output.rs b/turbopack/crates/turbo-tasks-backend/src/backend/operation/update_output.rs index be107e17fbbd0..fb12b8752bf7d 100644 --- a/turbopack/crates/turbo-tasks-backend/src/backend/operation/update_output.rs +++ b/turbopack/crates/turbo-tasks-backend/src/backend/operation/update_output.rs @@ -18,7 +18,7 @@ impl UpdateOutputOperation { pub fn run( task_id: TaskId, output: Result, Option>>, - ctx: ExecuteContext<'_>, + mut ctx: ExecuteContext<'_>, ) { let mut task = ctx.task(task_id, TaskDataCategory::Data); let old_error = task.remove(&CachedDataItemKey::Error {}); diff --git a/turbopack/crates/turbo-tasks-backend/src/backing_storage.rs b/turbopack/crates/turbo-tasks-backend/src/backing_storage.rs index b3b717f23f955..59be1ab742020 100644 --- a/turbopack/crates/turbo-tasks-backend/src/backing_storage.rs +++ b/turbopack/crates/turbo-tasks-backend/src/backing_storage.rs @@ -9,6 +9,9 @@ use crate::{ utils::chunked_vec::ChunkedVec, }; +#[derive(Clone, Copy)] +pub struct ReadTransaction(pub *const ()); + pub trait BackingStorage { fn next_free_task_id(&self) -> TaskId; fn uncompleted_operations(&self) -> Vec; @@ -19,7 +22,34 @@ pub trait BackingStorage { meta_updates: Vec>, data_updates: Vec>, ) -> Result<()>; - fn forward_lookup_task_cache(&self, key: &CachedTaskType) -> Option; - fn reverse_lookup_task_cache(&self, task_id: TaskId) -> Option>; - fn lookup_data(&self, task_id: TaskId, category: TaskDataCategory) -> Vec; + fn start_read_transaction(&self) -> Option; + /// # Safety + /// + /// `tx` must be a transaction from this BackingStorage instance. + unsafe fn end_read_transaction(&self, tx: ReadTransaction); + /// # Safety + /// + /// `tx` must be a transaction from this BackingStorage instance. + unsafe fn forward_lookup_task_cache( + &self, + tx: Option, + key: &CachedTaskType, + ) -> Option; + /// # Safety + /// + /// `tx` must be a transaction from this BackingStorage instance. + unsafe fn reverse_lookup_task_cache( + &self, + tx: Option, + task_id: TaskId, + ) -> Option>; + /// # Safety + /// + /// `tx` must be a transaction from this BackingStorage instance. + unsafe fn lookup_data( + &self, + tx: Option, + task_id: TaskId, + category: TaskDataCategory, + ) -> Vec; } diff --git a/turbopack/crates/turbo-tasks-backend/src/lmdb_backing_storage.rs b/turbopack/crates/turbo-tasks-backend/src/lmdb_backing_storage.rs index 130a9fb475e9e..f770bc203b849 100644 --- a/turbopack/crates/turbo-tasks-backend/src/lmdb_backing_storage.rs +++ b/turbopack/crates/turbo-tasks-backend/src/lmdb_backing_storage.rs @@ -4,6 +4,7 @@ use std::{ collections::{hash_map::Entry, HashMap}, error::Error, fs::create_dir_all, + mem::{transmute, ManuallyDrop}, path::Path, sync::Arc, thread::available_parallelism, @@ -11,14 +12,16 @@ use std::{ }; use anyhow::{anyhow, Context, Result}; -use lmdb::{Database, DatabaseFlags, Environment, EnvironmentFlags, Transaction, WriteFlags}; +use lmdb::{ + Database, DatabaseFlags, Environment, EnvironmentFlags, RoTransaction, Transaction, WriteFlags, +}; use rayon::iter::{IntoParallelIterator, ParallelIterator}; use tracing::Span; use turbo_tasks::{backend::CachedTaskType, KeyValuePair, TaskId}; use crate::{ backend::{AnyOperation, TaskDataCategory}, - backing_storage::BackingStorage, + backing_storage::{BackingStorage, ReadTransaction}, data::{CachedDataItem, CachedDataItemKey, CachedDataItemValue, CachedDataUpdate}, utils::chunked_vec::ChunkedVec, }; @@ -92,6 +95,30 @@ impl LmdbBackingStorage { _ => unreachable!(), } } + + fn to_tx(&self, tx: ReadTransaction) -> ManuallyDrop> { + ManuallyDrop::new(unsafe { transmute::<*const (), RoTransaction<'_>>(tx.0) }) + } + + fn from_tx(tx: RoTransaction<'_>) -> ReadTransaction { + ReadTransaction(unsafe { transmute::, *const ()>(tx) }) + } + + fn with_tx( + &self, + tx: Option, + f: impl FnOnce(&RoTransaction<'_>) -> Result, + ) -> Result { + if let Some(tx) = tx { + let tx = self.to_tx(tx); + f(&*tx) + } else { + let tx = self.env.begin_ro_txn()?; + let r = f(&tx)?; + tx.commit()?; + Ok(r) + } + } } impl BackingStorage for LmdbBackingStorage { @@ -277,9 +304,26 @@ impl BackingStorage for LmdbBackingStorage { Ok(()) } - fn forward_lookup_task_cache(&self, task_type: &CachedTaskType) -> Option { - fn lookup(this: &LmdbBackingStorage, task_type: &CachedTaskType) -> Result> { - let tx = this.env.begin_ro_txn()?; + fn start_read_transaction(&self) -> Option { + Some(Self::from_tx(self.env.begin_ro_txn().ok()?)) + } + + unsafe fn end_read_transaction(&self, transaction: ReadTransaction) { + ManuallyDrop::into_inner(self.to_tx(transaction)) + .commit() + .unwrap(); + } + + unsafe fn forward_lookup_task_cache( + &self, + tx: Option, + task_type: &CachedTaskType, + ) -> Option { + fn lookup( + this: &LmdbBackingStorage, + tx: &RoTransaction<'_>, + task_type: &CachedTaskType, + ) -> Result> { let task_type = pot::to_vec(task_type)?; let bytes = match extended_key::get(&tx, this.forward_task_cache_db, &task_type) { Ok(result) => result, @@ -293,21 +337,25 @@ impl BackingStorage for LmdbBackingStorage { }; let bytes = bytes.try_into()?; let id = TaskId::from(u32::from_be_bytes(bytes)); - tx.commit()?; Ok(Some(id)) } - let id = lookup(self, task_type) + let id = self + .with_tx(tx, |tx| lookup(self, tx, task_type)) .inspect_err(|err| println!("Looking up task id for {task_type:?} failed: {err:?}")) .ok()??; Some(id) } - fn reverse_lookup_task_cache(&self, task_id: TaskId) -> Option> { + unsafe fn reverse_lookup_task_cache( + &self, + tx: Option, + task_id: TaskId, + ) -> Option> { fn lookup( this: &LmdbBackingStorage, + tx: &RoTransaction<'_>, task_id: TaskId, ) -> Result>> { - let tx = this.env.begin_ro_txn()?; let bytes = match tx.get(this.reverse_task_cache_db, &IntKey::new(*task_id)) { Ok(bytes) => bytes, Err(err) => { @@ -318,23 +366,27 @@ impl BackingStorage for LmdbBackingStorage { } } }; - let result = pot::from_slice(bytes)?; - tx.commit()?; - Ok(Some(result)) + Ok(Some(pot::from_slice(bytes)?)) } - let result = lookup(self, task_id) + let result = self + .with_tx(tx, |tx| lookup(self, tx, task_id)) .inspect_err(|err| println!("Looking up task type for {task_id} failed: {err:?}")) .ok()??; Some(result) } - fn lookup_data(&self, task_id: TaskId, category: TaskDataCategory) -> Vec { + unsafe fn lookup_data( + &self, + tx: Option, + task_id: TaskId, + category: TaskDataCategory, + ) -> Vec { fn lookup( this: &LmdbBackingStorage, + tx: &RoTransaction<'_>, task_id: TaskId, category: TaskDataCategory, ) -> Result> { - let tx = this.env.begin_ro_txn()?; let bytes = match tx.get(this.db(category), &IntKey::new(*task_id)) { Ok(bytes) => bytes, Err(err) => { @@ -346,10 +398,9 @@ impl BackingStorage for LmdbBackingStorage { } }; let result: Vec = pot::from_slice(bytes)?; - tx.commit()?; Ok(result) } - lookup(self, task_id, category) + self.with_tx(tx, |tx| lookup(self, tx, task_id, category)) .inspect_err(|err| println!("Looking up data for {task_id} failed: {err:?}")) .unwrap_or_default() } From bbc443b8f0e5f25e91748d5d898ed9124ba4a12a Mon Sep 17 00:00:00 2001 From: Tobias Koppers Date: Fri, 27 Sep 2024 07:46:01 +0200 Subject: [PATCH 40/46] remove aggregation update queue tracing --- .../src/backend/operation/aggregation_update.rs | 1 - 1 file changed, 1 deletion(-) diff --git a/turbopack/crates/turbo-tasks-backend/src/backend/operation/aggregation_update.rs b/turbopack/crates/turbo-tasks-backend/src/backend/operation/aggregation_update.rs index c4e74cd4beaa5..c999dc28cab9d 100644 --- a/turbopack/crates/turbo-tasks-backend/src/backend/operation/aggregation_update.rs +++ b/turbopack/crates/turbo-tasks-backend/src/backend/operation/aggregation_update.rs @@ -872,7 +872,6 @@ impl AggregationUpdateQueue { impl Operation for AggregationUpdateQueue { fn execute(mut self, ctx: &mut ExecuteContext<'_>) { - let _span = tracing::trace_span!("aggregation update queue").entered(); loop { ctx.operation_suspend_point(&self); if self.process(ctx) { From dee5f7b7c4be95454385609388c8863e706582b4 Mon Sep 17 00:00:00 2001 From: Tobias Koppers Date: Fri, 27 Sep 2024 08:46:26 +0200 Subject: [PATCH 41/46] improve messaging --- turbopack/crates/turbo-tasks-backend/src/backend/mod.rs | 1 - 1 file changed, 1 deletion(-) diff --git a/turbopack/crates/turbo-tasks-backend/src/backend/mod.rs b/turbopack/crates/turbo-tasks-backend/src/backend/mod.rs index 215ad0571076a..2b16af3c9510f 100644 --- a/turbopack/crates/turbo-tasks-backend/src/backend/mod.rs +++ b/turbopack/crates/turbo-tasks-backend/src/backend/mod.rs @@ -596,7 +596,6 @@ impl TurboTasksBackendInner { println!("Persising failed: {:#?}", err); return None; } - println!("Snapshot saved"); } for (task_id, count) in counts { From be67250227bfca782d6009795bbd90ac33c163d8 Mon Sep 17 00:00:00 2001 From: Tobias Koppers Date: Fri, 27 Sep 2024 11:51:59 +0200 Subject: [PATCH 42/46] handle idle end event for idle detection in persisting --- .../turbo-tasks-backend/src/backend/mod.rs | 27 ++++++++++++++----- turbopack/crates/turbo-tasks/src/backend.rs | 2 ++ turbopack/crates/turbo-tasks/src/manager.rs | 1 + 3 files changed, 24 insertions(+), 6 deletions(-) diff --git a/turbopack/crates/turbo-tasks-backend/src/backend/mod.rs b/turbopack/crates/turbo-tasks-backend/src/backend/mod.rs index 2b16af3c9510f..3027a5b1c089f 100644 --- a/turbopack/crates/turbo-tasks-backend/src/backend/mod.rs +++ b/turbopack/crates/turbo-tasks-backend/src/backend/mod.rs @@ -129,7 +129,8 @@ struct TurboTasksBackendInner { stopping: AtomicBool, stopping_event: Event, - idle_event: Event, + idle_start_event: Event, + idle_end_event: Event, backing_storage: Arc, } @@ -167,7 +168,8 @@ impl TurboTasksBackendInner { last_snapshot: AtomicU64::new(0), stopping: AtomicBool::new(false), stopping_event: Event::new(|| "TurboTasksBackend::stopping_event".to_string()), - idle_event: Event::new(|| "TurboTasksBackend::idle_event".to_string()), + idle_start_event: Event::new(|| "TurboTasksBackend::idle_start_event".to_string()), + idle_end_event: Event::new(|| "TurboTasksBackend::idle_end_event".to_string()), backing_storage, } } @@ -630,7 +632,11 @@ impl TurboTasksBackendInner { } fn idle_start(&self) { - self.idle_event.notify(usize::MAX); + self.idle_start_event.notify(usize::MAX); + } + + fn idle_end(&self) { + self.idle_end_event.notify(usize::MAX); } fn get_or_create_persistent_task( @@ -1170,16 +1176,21 @@ impl TurboTasksBackendInner { if until > Instant::now() { let mut stop_listener = self.stopping_event.listen(); if !self.stopping.load(Ordering::Acquire) { - let mut idle_listener = self.idle_event.listen(); + let mut idle_start_listener = self.idle_start_event.listen(); + let mut idle_end_listener = self.idle_end_event.listen(); let mut idle_time = until + IDLE_TIMEOUT; loop { tokio::select! { _ = &mut stop_listener => { break; }, - _ = &mut idle_listener => { + _ = &mut idle_start_listener => { idle_time = Instant::now() + IDLE_TIMEOUT; - idle_listener = self.idle_event.listen() + idle_start_listener = self.idle_start_event.listen() + }, + _ = &mut idle_end_listener => { + idle_time = until + IDLE_TIMEOUT; + idle_end_listener = self.idle_end_event.listen() }, _ = tokio::time::sleep_until(until.into()) => { break; @@ -1300,6 +1311,10 @@ impl Backend for TurboTasksBackend { self.0.idle_start(); } + fn idle_end(&self, _turbo_tasks: &dyn TurboTasksBackendApi) { + self.0.idle_end(); + } + fn get_or_create_persistent_task( &self, task_type: CachedTaskType, diff --git a/turbopack/crates/turbo-tasks/src/backend.rs b/turbopack/crates/turbo-tasks/src/backend.rs index a7096f9634474..42792a3f69506 100644 --- a/turbopack/crates/turbo-tasks/src/backend.rs +++ b/turbopack/crates/turbo-tasks/src/backend.rs @@ -534,6 +534,8 @@ pub trait Backend: Sync + Send { #[allow(unused_variables)] fn idle_start(&self, turbo_tasks: &dyn TurboTasksBackendApi) {} + #[allow(unused_variables)] + fn idle_end(&self, turbo_tasks: &dyn TurboTasksBackendApi) {} fn invalidate_task(&self, task: TaskId, turbo_tasks: &dyn TurboTasksBackendApi); diff --git a/turbopack/crates/turbo-tasks/src/manager.rs b/turbopack/crates/turbo-tasks/src/manager.rs index 60a69b4f642f8..e5dd9cc4b06ae 100644 --- a/turbopack/crates/turbo-tasks/src/manager.rs +++ b/turbopack/crates/turbo-tasks/src/manager.rs @@ -851,6 +851,7 @@ impl TurboTasks { { *self.start.lock().unwrap() = Some(Instant::now()); self.event_start.notify(usize::MAX); + self.backend.idle_end(self); } } From 077eec92ac7dec5f703829b023f2ccd003866549 Mon Sep 17 00:00:00 2001 From: Tobias Koppers Date: Fri, 4 Oct 2024 06:19:20 +0200 Subject: [PATCH 43/46] improve imports --- turbopack/crates/turbo-tasks-backend/src/backend/mod.rs | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/turbopack/crates/turbo-tasks-backend/src/backend/mod.rs b/turbopack/crates/turbo-tasks-backend/src/backend/mod.rs index 3027a5b1c089f..db96d714cadbb 100644 --- a/turbopack/crates/turbo-tasks-backend/src/backend/mod.rs +++ b/turbopack/crates/turbo-tasks-backend/src/backend/mod.rs @@ -23,7 +23,6 @@ use dashmap::DashMap; use parking_lot::{Condvar, Mutex}; use rustc_hash::FxHasher; use smallvec::smallvec; -pub use storage::TaskDataCategory; use turbo_tasks::{ backend::{ Backend, BackendJobId, CachedTaskType, CellContent, TaskExecutionSpec, TransientTaskRoot, @@ -36,7 +35,7 @@ use turbo_tasks::{ ValueTypeId, TRANSIENT_TASK_BIT, }; -pub use self::operation::AnyOperation; +pub use self::{operation::AnyOperation, storage::TaskDataCategory}; use crate::{ backend::{ operation::{ From 041751d71119860a4c6c48a82d59f7e2a570702b Mon Sep 17 00:00:00 2001 From: Tobias Koppers Date: Fri, 4 Oct 2024 06:26:41 +0200 Subject: [PATCH 44/46] clippy --- .../turbo-tasks-backend/src/backend/mod.rs | 2 +- .../src/backend/operation/mod.rs | 8 ++--- .../src/lmdb_backing_storage.rs | 34 ++++++------------- .../turbo-tasks-backend/src/utils/sharded.rs | 7 ++-- 4 files changed, 18 insertions(+), 33 deletions(-) diff --git a/turbopack/crates/turbo-tasks-backend/src/backend/mod.rs b/turbopack/crates/turbo-tasks-backend/src/backend/mod.rs index db96d714cadbb..c9a30009b0e27 100644 --- a/turbopack/crates/turbo-tasks-backend/src/backend/mod.rs +++ b/turbopack/crates/turbo-tasks-backend/src/backend/mod.rs @@ -579,7 +579,7 @@ impl TurboTasksBackendInner { let mut new_items = false; - fn shards_empty(shards: &Vec>) -> bool { + fn shards_empty(shards: &[ChunkedVec]) -> bool { shards.iter().all(|shard| shard.is_empty()) } diff --git a/turbopack/crates/turbo-tasks-backend/src/backend/operation/mod.rs b/turbopack/crates/turbo-tasks-backend/src/backend/operation/mod.rs index f8589a70a8826..e3a536c3f9f52 100644 --- a/turbopack/crates/turbo-tasks-backend/src/backend/operation/mod.rs +++ b/turbopack/crates/turbo-tasks-backend/src/backend/operation/mod.rs @@ -131,12 +131,11 @@ impl<'a> ExecuteContext<'a> { category: TaskDataCategory, ) -> Vec { // Safety: `transaction` is a valid transaction from `self.backend.backing_storage`. - let items = unsafe { + unsafe { self.backend .backing_storage .lookup_data(self.transaction(), task_id, category) - }; - items + } } pub fn is_once_task(&self, task_id: TaskId) -> bool { @@ -358,8 +357,7 @@ impl TaskGuard<'_> { task.update(key, |old| { let old_value_when_persistent = old .as_ref() - .map(|old| old.is_persistent().then(|| old.clone())) - .flatten(); + .and_then(|old| old.is_persistent().then(|| old.clone())); let new = update(old); let new_persistent = new.as_ref().map(|new| new.is_persistent()).unwrap_or(false); diff --git a/turbopack/crates/turbo-tasks-backend/src/lmdb_backing_storage.rs b/turbopack/crates/turbo-tasks-backend/src/lmdb_backing_storage.rs index f770bc203b849..7bc1aeb471c4a 100644 --- a/turbopack/crates/turbo-tasks-backend/src/lmdb_backing_storage.rs +++ b/turbopack/crates/turbo-tasks-backend/src/lmdb_backing_storage.rs @@ -111,7 +111,7 @@ impl LmdbBackingStorage { ) -> Result { if let Some(tx) = tx { let tx = self.to_tx(tx); - f(&*tx) + f(&tx) } else { let tx = self.env.begin_ro_txn()?; let r = f(&tx)?; @@ -325,7 +325,7 @@ impl BackingStorage for LmdbBackingStorage { task_type: &CachedTaskType, ) -> Result> { let task_type = pot::to_vec(task_type)?; - let bytes = match extended_key::get(&tx, this.forward_task_cache_db, &task_type) { + let bytes = match extended_key::get(tx, this.forward_task_cache_db, &task_type) { Ok(result) => result, Err(err) => { if err == lmdb::Error::NotFound { @@ -406,26 +406,19 @@ impl BackingStorage for LmdbBackingStorage { } } -fn organize_task_data( - updates: Vec>, -) -> Vec< - HashMap< - TaskId, - HashMap, Option)>, - >, -> { +type OrganizedTaskData = HashMap< + TaskId, + HashMap, Option)>, +>; +type ShardedOrganizedTaskData = Vec; + +fn organize_task_data(updates: Vec>) -> ShardedOrganizedTaskData { let span = Span::current(); updates .into_par_iter() .map(|updates| { let _span = span.clone().entered(); - let mut task_updates: HashMap< - TaskId, - HashMap< - CachedDataItemKey, - (Option, Option), - >, - > = HashMap::new(); + let mut task_updates: OrganizedTaskData = HashMap::new(); for CachedDataUpdate { task, key, @@ -455,12 +448,7 @@ fn organize_task_data( fn restore_task_data( this: &LmdbBackingStorage, db: Database, - task_updates: Vec< - HashMap< - TaskId, - HashMap, Option)>, - >, - >, + task_updates: ShardedOrganizedTaskData, ) -> Result)>> { let mut result = Vec::with_capacity(task_updates.iter().map(|m| m.len()).sum()); diff --git a/turbopack/crates/turbo-tasks-backend/src/utils/sharded.rs b/turbopack/crates/turbo-tasks-backend/src/utils/sharded.rs index 722a22986bd60..2c0d79d3b4440 100644 --- a/turbopack/crates/turbo-tasks-backend/src/utils/sharded.rs +++ b/turbopack/crates/turbo-tasks-backend/src/utils/sharded.rs @@ -1,4 +1,4 @@ -use std::hash::{BuildHasher, BuildHasherDefault, Hash, Hasher}; +use std::hash::{BuildHasher, BuildHasherDefault, Hash}; use parking_lot::{Mutex, MutexGuard}; use rustc_hash::FxHasher; @@ -33,9 +33,8 @@ impl Sharded { K: Hash, H: BuildHasher, { - let mut h = self.hasher.build_hasher(); - key.hash(&mut h); - let shard = h.finish() as u16 & self.bitmask; + let hash = self.hasher.hash_one(key); + let shard = hash as u16 & self.bitmask; self.data[shard as usize].lock() } From 594b0a8c437eebd91c3a47c60d883d115543747c Mon Sep 17 00:00:00 2001 From: Tobias Koppers Date: Tue, 8 Oct 2024 08:45:45 +0200 Subject: [PATCH 45/46] apply review comments --- .../turbo-tasks-backend/src/backend/mod.rs | 57 ++++++++++++------- .../crates/turbo-tasks-backend/src/data.rs | 56 ++++++++---------- turbopack/crates/turbo-tasks/src/id.rs | 2 +- .../crates/turbo-tasks/src/key_value_pair.rs | 6 +- turbopack/crates/turbo-tasks/src/scope.rs | 10 ++-- 5 files changed, 71 insertions(+), 60 deletions(-) diff --git a/turbopack/crates/turbo-tasks-backend/src/backend/mod.rs b/turbopack/crates/turbo-tasks-backend/src/backend/mod.rs index c9a30009b0e27..9f0e241ed8e99 100644 --- a/turbopack/crates/turbo-tasks-backend/src/backend/mod.rs +++ b/turbopack/crates/turbo-tasks-backend/src/backend/mod.rs @@ -14,7 +14,6 @@ use std::{ Arc, }, thread::available_parallelism, - time::{Duration, Instant}, }; use anyhow::{bail, Result}; @@ -23,6 +22,7 @@ use dashmap::DashMap; use parking_lot::{Condvar, Mutex}; use rustc_hash::FxHasher; use smallvec::smallvec; +use tokio::time::{Duration, Instant}; use turbo_tasks::{ backend::{ Backend, BackendJobId, CachedTaskType, CellContent, TaskExecutionSpec, TransientTaskRoot, @@ -54,6 +54,9 @@ use crate::{ utils::{bi_map::BiMap, chunked_vec::ChunkedVec, ptr_eq_arc::PtrEqArc, sharded::Sharded}, }; +const BACKEND_JOB_INITIAL_SNAPSHOT: BackendJobId = unsafe { BackendJobId::new_unchecked(1) }; +const BACKEND_JOB_FOLLOW_UP_SNAPSHOT: BackendJobId = unsafe { BackendJobId::new_unchecked(2) }; + const SNAPSHOT_REQUESTED_BIT: usize = 1 << (usize::BITS - 1); struct SnapshotRequest { @@ -123,7 +126,7 @@ struct TurboTasksBackendInner { /// Condition Variable that is triggered when a snapshot is completed and /// operations can continue. snapshot_completed: Condvar, - /// The timestamp of the last started snapshot. + /// The timestamp of the last started snapshot since [`Self::start_time`]. last_snapshot: AtomicU64, stopping: AtomicBool, @@ -291,10 +294,11 @@ impl TurboTasksBackendInner { child_task: TaskId, turbo_tasks: &dyn TurboTasksBackendApi, ) { - operation::ConnectChildOperation::run(parent_task, child_task, unsafe { - // Safety: Passing `None` is safe. - self.execute_context_with_tx(None, turbo_tasks) - }); + operation::ConnectChildOperation::run( + parent_task, + child_task, + self.execute_context(turbo_tasks), + ); } fn try_read_task_output( @@ -541,13 +545,11 @@ impl TurboTasksBackendInner { snapshot_request.snapshot_requested = true; let active_operations = self .in_progress_operations - .fetch_or(SNAPSHOT_REQUESTED_BIT, std::sync::atomic::Ordering::Relaxed); + .fetch_or(SNAPSHOT_REQUESTED_BIT, Ordering::Relaxed); if active_operations != 0 { self.operations_suspended .wait_while(&mut snapshot_request, |_| { - self.in_progress_operations - .load(std::sync::atomic::Ordering::Relaxed) - != SNAPSHOT_REQUESTED_BIT + self.in_progress_operations.load(Ordering::Relaxed) != SNAPSHOT_REQUESTED_BIT }); } let suspended_operations = snapshot_request @@ -562,7 +564,7 @@ impl TurboTasksBackendInner { let mut snapshot_request = self.snapshot_request.lock(); snapshot_request.snapshot_requested = false; self.in_progress_operations - .fetch_sub(SNAPSHOT_REQUESTED_BIT, std::sync::atomic::Ordering::Relaxed); + .fetch_sub(SNAPSHOT_REQUESTED_BIT, Ordering::Relaxed); self.snapshot_completed.notify_all(); let snapshot_time = Instant::now(); drop(snapshot_request); @@ -622,7 +624,7 @@ impl TurboTasksBackendInner { } // Schedule the snapshot job - turbo_tasks.schedule_backend_background_job(BackendJobId::from(1)); + turbo_tasks.schedule_backend_background_job(BACKEND_JOB_INITIAL_SNAPSHOT); } fn stopping(&self) { @@ -1157,7 +1159,7 @@ impl TurboTasksBackendInner { turbo_tasks: &'a dyn TurboTasksBackendApi, ) -> Pin + Send + 'a>> { Box::pin(async move { - if *id == 1 || *id == 2 { + if id == BACKEND_JOB_INITIAL_SNAPSHOT || id == BACKEND_JOB_FOLLOW_UP_SNAPSHOT { let last_snapshot = self.last_snapshot.load(Ordering::Relaxed); let mut last_snapshot = self.start_time + Duration::from_millis(last_snapshot); loop { @@ -1165,7 +1167,7 @@ impl TurboTasksBackendInner { const SNAPSHOT_INTERVAL: Duration = Duration::from_secs(15); const IDLE_TIMEOUT: Duration = Duration::from_secs(1); - let time = if *id == 1 { + let time = if id == BACKEND_JOB_INITIAL_SNAPSHOT { FIRST_SNAPSHOT_WAIT } else { SNAPSHOT_INTERVAL @@ -1177,7 +1179,11 @@ impl TurboTasksBackendInner { if !self.stopping.load(Ordering::Acquire) { let mut idle_start_listener = self.idle_start_event.listen(); let mut idle_end_listener = self.idle_end_event.listen(); - let mut idle_time = until + IDLE_TIMEOUT; + let mut idle_time = if turbo_tasks.is_idle() { + Instant::now() + IDLE_TIMEOUT + } else { + far_future() + }; loop { tokio::select! { _ = &mut stop_listener => { @@ -1191,10 +1197,10 @@ impl TurboTasksBackendInner { idle_time = until + IDLE_TIMEOUT; idle_end_listener = self.idle_end_event.listen() }, - _ = tokio::time::sleep_until(until.into()) => { + _ = tokio::time::sleep_until(until) => { break; }, - _ = tokio::time::sleep_until(idle_time.into()) => { + _ = tokio::time::sleep_until(idle_time) => { if turbo_tasks.is_idle() { break; } @@ -1212,10 +1218,12 @@ impl TurboTasksBackendInner { continue; } let last_snapshot = last_snapshot.duration_since(self.start_time); - self.last_snapshot - .store(last_snapshot.as_millis() as u64, Ordering::Relaxed); + self.last_snapshot.store( + last_snapshot.as_millis().try_into().unwrap(), + Ordering::Relaxed, + ); - turbo_tasks.schedule_backend_background_job(BackendJobId::from(2)); + turbo_tasks.schedule_backend_background_job(BACKEND_JOB_FOLLOW_UP_SNAPSHOT); return; } } @@ -1525,3 +1533,12 @@ impl Backend for TurboTasksBackend { todo!() } } + +// from https://github.com/tokio-rs/tokio/blob/29cd6ec1ec6f90a7ee1ad641c03e0e00badbcb0e/tokio/src/time/instant.rs#L57-L63 +fn far_future() -> Instant { + // Roughly 30 years from now. + // API does not provide a way to obtain max `Instant` + // or convert specific date in the future to instant. + // 1000 years overflows on macOS, 100 years overflows on FreeBSD. + Instant::now() + Duration::from_secs(86400 * 365 * 30) +} diff --git a/turbopack/crates/turbo-tasks-backend/src/data.rs b/turbopack/crates/turbo-tasks-backend/src/data.rs index 62285cb04436f..4500d7037e1a4 100644 --- a/turbopack/crates/turbo-tasks-backend/src/data.rs +++ b/turbopack/crates/turbo-tasks-backend/src/data.rs @@ -8,6 +8,26 @@ use turbo_tasks::{ use crate::backend::{indexed::Indexed, TaskDataCategory}; +// this traits are needed for the transient variants of `CachedDataItem` +// transient variants are never cloned or compared +macro_rules! transient_traits { + ($name:ident) => { + impl Clone for $name { + fn clone(&self) -> Self { + // this impl is needed for the transient variants of `CachedDataItem` + // transient variants are never cloned + panic!(concat!(stringify!($name), " cannot be cloned")); + } + } + + impl PartialEq for $name { + fn eq(&self, _other: &Self) -> bool { + panic!(concat!(stringify!($name), " cannot be compared")); + } + } + }; +} + #[derive(Debug, Copy, Clone, Hash, PartialEq, Eq, Serialize, Deserialize)] pub struct CellRef { pub task: TaskId, @@ -53,17 +73,7 @@ impl RootState { } } -impl Clone for RootState { - fn clone(&self) -> Self { - panic!("RootState cannot be cloned"); - } -} - -impl PartialEq for RootState { - fn eq(&self, _other: &Self) -> bool { - panic!("RootState cannot be compared"); - } -} +transient_traits!(RootState); impl Eq for RootState {} @@ -90,17 +100,7 @@ pub enum InProgressState { }, } -impl Clone for InProgressState { - fn clone(&self) -> Self { - panic!("InProgressState cannot be cloned"); - } -} - -impl PartialEq for InProgressState { - fn eq(&self, _other: &Self) -> bool { - panic!("InProgressState cannot be compared"); - } -} +transient_traits!(InProgressState); impl Eq for InProgressState {} @@ -109,17 +109,7 @@ pub struct InProgressCellState { pub event: Event, } -impl Clone for InProgressCellState { - fn clone(&self) -> Self { - panic!("InProgressCell cannot be cloned"); - } -} - -impl PartialEq for InProgressCellState { - fn eq(&self, _other: &Self) -> bool { - panic!("InProgressCell cannot be compared"); - } -} +transient_traits!(InProgressCellState); impl Eq for InProgressCellState {} diff --git a/turbopack/crates/turbo-tasks/src/id.rs b/turbopack/crates/turbo-tasks/src/id.rs index 566800b0dbb17..99c7b0997a047 100644 --- a/turbopack/crates/turbo-tasks/src/id.rs +++ b/turbopack/crates/turbo-tasks/src/id.rs @@ -30,7 +30,7 @@ macro_rules! define_id { /// # Safety /// /// The passed `id` must not be zero. - pub unsafe fn new_unchecked(id: $primitive) -> Self { + pub const unsafe fn new_unchecked(id: $primitive) -> Self { Self { id: unsafe { NonZero::<$primitive>::new_unchecked(id) } } } } diff --git a/turbopack/crates/turbo-tasks/src/key_value_pair.rs b/turbopack/crates/turbo-tasks/src/key_value_pair.rs index 6aceaea04d4f7..dfd0e7bdb92e0 100644 --- a/turbopack/crates/turbo-tasks/src/key_value_pair.rs +++ b/turbopack/crates/turbo-tasks/src/key_value_pair.rs @@ -1,6 +1,8 @@ +use std::fmt::Debug; + pub trait KeyValuePair { - type Key: PartialEq + Eq + std::hash::Hash; - type Value; + type Key: Debug + Clone + PartialEq + Eq + std::hash::Hash; + type Value: Debug + Clone + Default + PartialEq + Eq; fn key(&self) -> Self::Key; fn value(&self) -> Self::Value; fn from_key_and_value(key: Self::Key, value: Self::Value) -> Self; diff --git a/turbopack/crates/turbo-tasks/src/scope.rs b/turbopack/crates/turbo-tasks/src/scope.rs index e7bb8713bd4cb..6420a71e3591f 100644 --- a/turbopack/crates/turbo-tasks/src/scope.rs +++ b/turbopack/crates/turbo-tasks/src/scope.rs @@ -2,6 +2,7 @@ use std::sync::Arc; use crate::{turbo_tasks, turbo_tasks_scope, TurboTasksApi}; +/// A wrapper around [`rayon::Scope`] that preserves the [`turbo_tasks_scope`]. pub struct Scope<'scope, 'a> { scope: &'a rayon::Scope<'scope>, handle: tokio::runtime::Handle, @@ -10,9 +11,9 @@ pub struct Scope<'scope, 'a> { } impl<'scope, 'a> Scope<'scope, 'a> { - pub fn spawn(&self, body: BODY) + pub fn spawn(&self, body: Body) where - BODY: FnOnce(&Scope<'scope, '_>) + Send + 'scope, + Body: FnOnce(&Scope<'scope, '_>) + Send + 'scope, { let span = self.span.clone(); let handle = self.handle.clone(); @@ -32,9 +33,10 @@ impl<'scope, 'a> Scope<'scope, 'a> { } } -pub fn scope<'scope, OP, R>(op: OP) -> R +/// A wrapper around [`rayon::in_place_scope`] that preserves the [`turbo_tasks_scope`]. +pub fn scope<'scope, Op, R>(op: Op) -> R where - OP: FnOnce(&Scope<'scope, '_>) -> R, + Op: FnOnce(&Scope<'scope, '_>) -> R, { let span = tracing::Span::current(); let handle = tokio::runtime::Handle::current(); From bdd1c6214dcff23084cbd9566c6b8128220f17a9 Mon Sep 17 00:00:00 2001 From: Tobias Koppers Date: Tue, 8 Oct 2024 09:33:01 +0200 Subject: [PATCH 46/46] clippy --- .../crates/turbo-tasks-backend/src/backend/operation/mod.rs | 2 +- turbopack/crates/turbo-tasks/src/scope.rs | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/turbopack/crates/turbo-tasks-backend/src/backend/operation/mod.rs b/turbopack/crates/turbo-tasks-backend/src/backend/operation/mod.rs index e3a536c3f9f52..6218ab9c032f3 100644 --- a/turbopack/crates/turbo-tasks-backend/src/backend/operation/mod.rs +++ b/turbopack/crates/turbo-tasks-backend/src/backend/operation/mod.rs @@ -43,7 +43,7 @@ pub struct ExecuteContext<'a> { transaction: Option>, } -impl<'a> Drop for ExecuteContext<'a> { +impl Drop for ExecuteContext<'_> { fn drop(&mut self) { if self.parent.is_none() { if let Some(Some(transaction)) = self.transaction { diff --git a/turbopack/crates/turbo-tasks/src/scope.rs b/turbopack/crates/turbo-tasks/src/scope.rs index 6420a71e3591f..3d8fbf30ec2a8 100644 --- a/turbopack/crates/turbo-tasks/src/scope.rs +++ b/turbopack/crates/turbo-tasks/src/scope.rs @@ -10,7 +10,7 @@ pub struct Scope<'scope, 'a> { span: tracing::Span, } -impl<'scope, 'a> Scope<'scope, 'a> { +impl<'scope> Scope<'scope, '_> { pub fn spawn(&self, body: Body) where Body: FnOnce(&Scope<'scope, '_>) + Send + 'scope,