diff --git a/Cargo.lock b/Cargo.lock index 2a7132608..818c10f6d 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -3991,6 +3991,16 @@ dependencies = [ "human_format", ] +[[package]] +name = "pyo3-build-config" +version = "0.19.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "076c73d0bc438f7a4ef6fdd0c3bb4732149136abd952b110ac93e4edb13a6ba5" +dependencies = [ + "once_cell", + "target-lexicon", +] + [[package]] name = "quote" version = "1.0.33" @@ -4064,6 +4074,16 @@ dependencies = [ "num_cpus", ] +[[package]] +name = "redb" +version = "1.2.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "ff09a3feca6114998810d129ebbc8ca4a39d0ed7b8433ded2a27c7f84fc60abb" +dependencies = [ + "libc", + "pyo3-build-config", +] + [[package]] name = "redox_syscall" version = "0.2.16" @@ -4319,6 +4339,7 @@ dependencies = [ "pathdiff", "petgraph", "predicates", + "redb", "reqwest", "scarb-build-metadata", "scarb-metadata 1.8.0", @@ -4999,6 +5020,12 @@ dependencies = [ "xattr", ] +[[package]] +name = "target-lexicon" +version = "0.12.12" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "14c39fd04924ca3a864207c66fc2cd7d22d7c016007f9ce846cbb9326331930a" + [[package]] name = "tempfile" version = "3.8.0" diff --git a/Cargo.toml b/Cargo.toml index 6a377b30e..0ba5bcbd8 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -72,6 +72,7 @@ petgraph = "0.6" predicates = "3" proc-macro2 = "1" quote = "1" +redb = "1.2" reqwest = { version = "0.11.22", features = ["gzip", "brotli", "deflate", "json", "stream"] } semver = { version = "1", features = ["serde"] } serde = { version = "1", features = ["serde_derive"] } diff --git a/scarb/Cargo.toml b/scarb/Cargo.toml index db4143592..d95699b68 100644 --- a/scarb/Cargo.toml +++ b/scarb/Cargo.toml @@ -46,6 +46,7 @@ itertools.workspace = true once_cell.workspace = true pathdiff.workspace = true petgraph.workspace = true +redb.workspace = true reqwest.workspace = true scarb-build-metadata = { path = "../utils/scarb-build-metadata" } scarb-metadata = { path = "../scarb-metadata", default-features = false, features = ["builder"] } @@ -55,8 +56,8 @@ serde-untagged.workspace = true serde-value.workspace = true serde.workspace = true serde_json.workspace = true -sha2.workspace = true serde_repr.workspace = true +sha2.workspace = true smallvec.workspace = true smol_str.workspace = true tar.workspace = true diff --git a/scarb/src/core/registry/client/cache.rs b/scarb/src/core/registry/client/cache.rs index 7dceeb144..3c273c02c 100644 --- a/scarb/src/core/registry/client/cache.rs +++ b/scarb/src/core/registry/client/cache.rs @@ -1,22 +1,89 @@ use std::path::PathBuf; +use std::sync::Arc; -use anyhow::{bail, Result}; +use anyhow::{bail, Context, Result}; +use camino::Utf8PathBuf; +use redb::{ + Database, MultimapTableDefinition, ReadableMultimapTable, ReadableTable, TableDefinition, +}; +use semver::Version; +use tokio::sync::OnceCell; +use tokio::task::block_in_place; use tracing::trace; +use scarb_ui::Ui; + use crate::core::registry::client::{BeforeNetworkCallback, RegistryClient, RegistryResource}; -use crate::core::registry::index::IndexRecords; -use crate::core::{Config, ManifestDependency, PackageId}; +use crate::core::registry::index::{IndexRecord, IndexRecords}; +use crate::core::{Config, ManifestDependency, PackageId, SourceId}; +use crate::internal::fsx; + +// TODO(mkaput): Implement cache downloading. +// FIXME(mkaput): Avoid creating database if inner client does not trigger cache writes. +// FIXME(mkaput): We probably have to call db.compact() after all write txs we run in Scarb run. + +/// Multimap: `package name -> (version, index records)`. +const RECORDS: MultimapTableDefinition<'_, &str, (&str, &[u8])> = + MultimapTableDefinition::new("records"); +/// Map: `package name -> index records cache key`. +/// +/// Cache key as returned by wrapped [`RegistryClient`]. +const RECORDS_CACHE_KEYS: TableDefinition<'_, &str, &str> = + TableDefinition::new("records_cache_keys"); + +/// A caching layer on top of a [`RegistryClient`]. +/// +/// ## Database +/// +/// It uses [`redb`] as a local key-value database, where this object stores the following: +/// 1. Multimap table `records`: mapping from _package name_ to all index records that came from +/// the registry to date. +/// +/// On the disk, each record are stored as a pair of _package version_ and the record itself +/// serialized as minified JSON. This allows the cache to filter out records that do not match +/// requested dependency specification before deserializing the record itself, saving some +/// execution time (exact numbers are unknown, but Cargo suffered from the same problem, and it +/// implemented identical measures). +/// 2. Table `records_cache_keys`: which maps _package name_ to the last known _cache key_ returned +/// from the [`RegistryClient::get_records`] method call. +/// +/// Database files are stored in the `$SCARB_GLOBAL_CACHE/registry/cache` directory. For each +/// `SourceId` a separate database file is maintained, named `{source_id.ident()}.v1.redb`. +/// In case a new database format is used, it should be saved in a `*.v2.redb` file and so on. +/// Old versions should be simply deleted, without using sophisticated migration logic (remember, +/// this is just a cache!) Also, if the database file appears to be corrupted, it is simply deleted +/// and recreated from scratch. +/// +/// ## Workflow +/// +/// Each wrapper method of this struct performs more or less the same flow of steps: +/// 1. Get existing cache key from the database if exists. +/// 2. Call actual [`RegistryClient`] method with found cache key (or `None`). +/// 3. If the method returned [`RegistryResource::NotFound`], then everything related to queried +/// resource is removed from the cache. +/// 4. Or, if the method returned [`RegistryResource::InCache`], then cached value is deserialized +/// and returned. +/// 5. Or, if the method returned [`RegistryResource::Download`], then new resource data is saved +/// in cache (replacing existing items) along with new cache key and returned to the caller. pub struct RegistryClientCache<'c> { + source_id: SourceId, client: Box, - _config: &'c Config, + db_cell: OnceCell>, + config: &'c Config, } impl<'c> RegistryClientCache<'c> { - pub fn new(client: Box, config: &'c Config) -> Result { + pub fn new( + source_id: SourceId, + client: Box, + config: &'c Config, + ) -> Result { Ok(Self { + source_id, client, - _config: config, + db_cell: OnceCell::new(), + config, }) } @@ -30,22 +97,127 @@ impl<'c> RegistryClientCache<'c> { dependency: &ManifestDependency, before_network: BeforeNetworkCallback, ) -> Result { + let package_name = dependency.name.as_str(); + let db = self.db().await?; + + let cache_key = block_in_place(|| -> Result<_> { + trace!("looking up cache key"); + let tx = db.begin_read()?; + let table = tx.open_table(RECORDS_CACHE_KEYS)?; + let cache_key = table.get(package_name)?.map(|g| g.value().to_owned()); + trace!(?cache_key); + Ok(cache_key) + }) + .with_context(|| db_error("failed to lookup cache key in registry cache")) + .or_else(|err| -> Result<_> { + self.config.ui().warn_anyhow(&err); + Ok(None) + })?; + match self .client - .get_records(dependency.name.clone(), before_network) + .get_records( + dependency.name.clone(), + cache_key.as_deref(), + before_network, + ) .await? { RegistryResource::NotFound => { trace!("package not found in registry, pruning cache"); + + block_in_place(|| -> Result<_> { + let tx = db.begin_write()?; + { + let mut table = tx.open_multimap_table(RECORDS)?; + table.remove_all(package_name)?; + } + tx.commit()?; + Ok(()) + }) + .with_context(|| db_error("failed to purge cache from now non-existent entries")) + .or_else(|err| -> Result<_> { + self.config.ui().warn_anyhow(&err); + Ok(()) + })?; + + trace!("cache pruned successfully"); + bail!("package not found in registry: {dependency}") } + RegistryResource::InCache => { trace!("getting records from cache"); - todo!() + + block_in_place(|| -> Result<_> { + let tx = db.begin_read()?; + let table = tx.open_multimap_table(RECORDS)?; + + let mut records = IndexRecords::new(); + for g in table.get(package_name)? { + let g = g?; + let (raw_version, raw_record) = g.value(); + + let version = Version::parse(raw_version) + .with_context(|| db_fatal("failed to parse version from cache"))?; + if !dependency.matches_name_and_version(&dependency.name, &version) { + continue; + } + + let record = serde_json::from_slice::(raw_record) + .with_context(|| { + db_fatal("failed to deserialize index record from cache") + })?; + + records.push(record); + } + + trace!("records read successfully"); + + Ok(records) + }) } - RegistryResource::Download { resource, .. } => { + + RegistryResource::Download { + resource: records, + cache_key, + } => { trace!("got new records, invalidating cache"); - Ok(resource) + trace!(?cache_key); + + if let Some(cache_key) = cache_key { + block_in_place(|| -> Result<_> { + let tx = db.begin_write()?; + { + let mut table = tx.open_table(RECORDS_CACHE_KEYS)?; + table.insert(package_name, cache_key.as_str())?; + } + { + let mut table = tx.open_multimap_table(RECORDS)?; + table.remove_all(package_name)?; + + for record in &records { + let raw_version = record.version.to_string(); + let raw_record = serde_json::to_vec(&record)?; + table.insert( + package_name, + (raw_version.as_str(), raw_record.as_slice()), + )?; + } + } + tx.commit()?; + Ok(()) + }) + .with_context(|| db_error("failed to cache registry index records")) + .or_else(|err| -> Result<_> { + self.config.ui().warn_anyhow(&err); + Ok(()) + })?; + + trace!("cache updated successfully"); + } + + Ok(records) } } } @@ -72,4 +244,66 @@ impl<'c> RegistryClientCache<'c> { } } } + + #[tracing::instrument(level = "trace", skip_all)] + async fn db(&self) -> Result> { + fn create(db_path: Utf8PathBuf, ui: Ui) -> Result> { + Database::create(&db_path) + .context("failed to open local registry cache, trying to recreate it") + .or_else(|error| { + ui.warn_anyhow(&error); + fsx::remove_file(&db_path).context("failed to remove local registry cache")?; + Database::create(&db_path) + .with_context(|| db_fatal("failed to open local registry cache")) + }) + .map(Arc::new) + } + + fn init_tables(db: &Database) -> Result<()> { + let tx = db.begin_write()?; + { + tx.open_multimap_table(RECORDS)?; + tx.open_table(RECORDS_CACHE_KEYS)?; + } + tx.commit()?; + Ok(()) + } + + self.db_cell + .get_or_try_init(|| async { + let ui = self.config.ui(); + let fs = self.config.dirs().registry_dir().into_child("cache"); + let db_path = fs + .path_existent()? + .join(format!("{}.v1.redb", self.source_id.ident())); + + trace!("opening local registry cache: {db_path}"); + block_in_place(move || { + let db = create(db_path, ui)?; + trace!("database opened/created successfully"); + init_tables(&db) + .context("failed to initialize local registry cache database")?; + trace!("created all tables in local registry cache database"); + Ok(db) + }) + }) + .await + .cloned() + } +} + +fn db_error(message: &str) -> String { + format!( + "{message}\n\ + note: perhaps cache is corrupted\n\ + help: try restarting scarb to recreate it" + ) +} + +fn db_fatal(message: &str) -> String { + format!( + "{message}\n\ + note: cache is corrupted and is in unrecoverable state\n\ + help: run the following to wipe entire cache: scarb cache clean" + ) } diff --git a/scarb/src/core/registry/client/http.rs b/scarb/src/core/registry/client/http.rs index 1e4e248ad..c466fdb8c 100644 --- a/scarb/src/core/registry/client/http.rs +++ b/scarb/src/core/registry/client/http.rs @@ -80,6 +80,7 @@ impl<'c> RegistryClient for HttpRegistryClient<'c> { async fn get_records( &self, package: PackageName, + _cache_key: Option<&str>, before_network: BeforeNetworkCallback, ) -> Result> { let index_config = self.index_config().await?; diff --git a/scarb/src/core/registry/client/local.rs b/scarb/src/core/registry/client/local.rs index 119d4d27a..e69bb7cc2 100644 --- a/scarb/src/core/registry/client/local.rs +++ b/scarb/src/core/registry/client/local.rs @@ -97,10 +97,13 @@ impl RegistryClient for LocalRegistryClient { async fn get_records( &self, package: PackageName, + cache_key: Option<&str>, _: BeforeNetworkCallback, ) -> Result> { trace!(?package); + assert!(cache_key.is_none()); + let records_path = self.records_path(&package); spawn_blocking(move || { diff --git a/scarb/src/core/registry/client/mod.rs b/scarb/src/core/registry/client/mod.rs index 44b1fa6f8..ca0d2db57 100644 --- a/scarb/src/core/registry/client/mod.rs +++ b/scarb/src/core/registry/client/mod.rs @@ -49,6 +49,7 @@ pub trait RegistryClient: Send + Sync { async fn get_records( &self, package: PackageName, + cache_key: Option<&str>, before_network: BeforeNetworkCallback, ) -> Result>; diff --git a/scarb/src/internal/fsx.rs b/scarb/src/internal/fsx.rs index df3ed405c..3530c8c62 100644 --- a/scarb/src/internal/fsx.rs +++ b/scarb/src/internal/fsx.rs @@ -35,6 +35,16 @@ pub fn create_dir_all(p: impl AsRef) -> Result<()> { } } +/// Equivalent to [`fs::remove_file`] with better error messages. +pub fn remove_file(p: impl AsRef) -> Result<()> { + return inner(p.as_ref()); + + fn inner(p: &Path) -> Result<()> { + fs::remove_file(p).with_context(|| format!("failed to remove file `{}`", p.display()))?; + Ok(()) + } +} + /// Equivalent to [`fs::remove_dir_all`] with better error messages. pub fn remove_dir_all(p: impl AsRef) -> Result<()> { return inner(p.as_ref()); diff --git a/scarb/src/sources/registry.rs b/scarb/src/sources/registry.rs index 7d35142d6..44f36a8d8 100644 --- a/scarb/src/sources/registry.rs +++ b/scarb/src/sources/registry.rs @@ -31,7 +31,7 @@ pub struct RegistrySource<'c> { impl<'c> RegistrySource<'c> { pub fn new(source_id: SourceId, config: &'c Config) -> Result { let client = Self::create_client(source_id, config)?; - let client = RegistryClientCache::new(client, config)?; + let client = RegistryClientCache::new(source_id, client, config)?; let package_sources = PackageSourceStore::new(source_id, config); diff --git a/utils/scarb-ui/src/lib.rs b/utils/scarb-ui/src/lib.rs index a7f8e0279..78a2fb8b5 100644 --- a/utils/scarb-ui/src/lib.rs +++ b/utils/scarb-ui/src/lib.rs @@ -122,6 +122,14 @@ impl Ui { self.error(format!("{error:?}").trim()) } + /// Nicely format an [`anyhow::Error`] for display to the user, and print it with [`Ui::warn`]. + pub fn warn_anyhow(&self, error: &anyhow::Error) { + // NOTE: Some errors, particularly ones from `toml_edit` like to add trailing newlines. + // This isn't a big problem for users, but it's causing issues in tests, where trailing + // whitespace collides with `indoc`. + self.warn(format!("{error:?}").trim()) + } + fn do_print(&self, message: T) { match self.output_format { OutputFormat::Text => message.print_text(),