diff --git a/scarb/src/core/registry/client/cache.rs b/scarb/src/core/registry/client/cache.rs new file mode 100644 index 000000000..7dceeb144 --- /dev/null +++ b/scarb/src/core/registry/client/cache.rs @@ -0,0 +1,75 @@ +use std::path::PathBuf; + +use anyhow::{bail, Result}; +use tracing::trace; + +use crate::core::registry::client::{BeforeNetworkCallback, RegistryClient, RegistryResource}; +use crate::core::registry::index::IndexRecords; +use crate::core::{Config, ManifestDependency, PackageId}; + +pub struct RegistryClientCache<'c> { + client: Box, + _config: &'c Config, +} + +impl<'c> RegistryClientCache<'c> { + pub fn new(client: Box, config: &'c Config) -> Result { + Ok(Self { + client, + _config: config, + }) + } + + /// Layer over [`RegistryClient::get_records`] that caches the result. + /// + /// It takes [`ManifestDependency`] instead of [`PackageName`] to allow performing some + /// optimizations by pre-filtering index records on cache-level. + #[tracing::instrument(level = "trace", skip_all)] + pub async fn get_records_with_cache( + &self, + dependency: &ManifestDependency, + before_network: BeforeNetworkCallback, + ) -> Result { + match self + .client + .get_records(dependency.name.clone(), before_network) + .await? + { + RegistryResource::NotFound => { + trace!("package not found in registry, pruning cache"); + bail!("package not found in registry: {dependency}") + } + RegistryResource::InCache => { + trace!("getting records from cache"); + todo!() + } + RegistryResource::Download { resource, .. } => { + trace!("got new records, invalidating cache"); + Ok(resource) + } + } + } + + /// Layer over [`RegistryClient::download`] that caches the result. + #[tracing::instrument(level = "trace", skip_all)] + pub async fn download_with_cache( + &self, + package: PackageId, + before_network: BeforeNetworkCallback, + ) -> Result { + match self.client.download(package, before_network).await? { + RegistryResource::NotFound => { + trace!("archive not found in registry, pruning cache"); + bail!("could not find downloadable archive for package indexed in registry: {package}") + } + RegistryResource::InCache => { + trace!("using cached archive"); + todo!() + } + RegistryResource::Download { resource, .. } => { + trace!("got new archive, invalidating cache"); + Ok(resource) + } + } + } +} diff --git a/scarb/src/core/registry/client/http.rs b/scarb/src/core/registry/client/http.rs index 792ae96f1..1e4e248ad 100644 --- a/scarb/src/core/registry/client/http.rs +++ b/scarb/src/core/registry/client/http.rs @@ -1,5 +1,4 @@ use std::path::PathBuf; -use std::sync::Arc; use anyhow::{Context, Result}; use async_trait::async_trait; @@ -10,9 +9,9 @@ use tokio::fs::OpenOptions; use tokio::io; use tokio::io::BufWriter; use tokio::sync::OnceCell; -use tracing::{debug, trace}; +use tracing::{debug, error, trace}; -use crate::core::registry::client::RegistryClient; +use crate::core::registry::client::{BeforeNetworkCallback, RegistryClient, RegistryResource}; use crate::core::registry::index::{IndexConfig, IndexRecords}; use crate::core::{Config, Package, PackageId, PackageName, SourceId}; use crate::flock::{FileLockGuard, Filesystem}; @@ -78,14 +77,16 @@ impl<'c> HttpRegistryClient<'c> { #[async_trait] impl<'c> RegistryClient for HttpRegistryClient<'c> { - fn is_offline(&self) -> bool { - false - } - - async fn get_records(&self, package: PackageName) -> Result>> { + async fn get_records( + &self, + package: PackageName, + before_network: BeforeNetworkCallback, + ) -> Result> { let index_config = self.index_config().await?; let records_url = index_config.index.expand(package.into())?; + before_network()?; + let response = self .config .http()? @@ -97,7 +98,7 @@ impl<'c> RegistryClient for HttpRegistryClient<'c> { if let Err(err) = &response { if let Some(status) = err.status() { if status == StatusCode::NOT_FOUND { - return Ok(None); + return Ok(RegistryResource::NotFound); } } } @@ -107,24 +108,39 @@ impl<'c> RegistryClient for HttpRegistryClient<'c> { .await .context("failed to deserialize index records")?; - Ok(Some(Arc::new(records))) - } - - async fn is_downloaded(&self, _package: PackageId) -> bool { - // TODO(mkaput): Cache downloaded packages. - false + Ok(RegistryResource::Download { + resource: records, + cache_key: None, + }) } - async fn download(&self, package: PackageId) -> Result { + async fn download( + &self, + package: PackageId, + before_network: BeforeNetworkCallback, + ) -> Result> { let dl_url = self.index_config().await?.dl.expand(package.into())?; + before_network()?; + let response = self .config .http()? .get(dl_url) .send() .await? - .error_for_status()?; + .error_for_status(); + + if let Err(err) = &response { + if let Some(status) = err.status() { + if status == StatusCode::NOT_FOUND { + error!("package `{package}` not found in registry: {err:?}"); + return Ok(RegistryResource::NotFound); + } + } + } + + let response = response?; let output_path = self.dl_fs.path_existent()?.join(package.tarball_name()); let output_file = OpenOptions::new() @@ -149,7 +165,10 @@ impl<'c> RegistryClient for HttpRegistryClient<'c> { .context("failed to save response chunk on disk")?; } - Ok(output_path.into_std_path_buf()) + Ok(RegistryResource::Download { + resource: output_path.into_std_path_buf(), + cache_key: None, + }) } async fn supports_publish(&self) -> Result { diff --git a/scarb/src/core/registry/client/local.rs b/scarb/src/core/registry/client/local.rs index 88928325d..119d4d27a 100644 --- a/scarb/src/core/registry/client/local.rs +++ b/scarb/src/core/registry/client/local.rs @@ -3,15 +3,15 @@ use std::io; use std::io::{BufReader, BufWriter, Seek, SeekFrom}; use std::ops::Deref; use std::path::{Path, PathBuf}; -use std::sync::Arc; use anyhow::{ensure, Context, Error, Result}; use async_trait::async_trait; use fs4::FileExt; use tokio::task::spawn_blocking; +use tracing::trace; use url::Url; -use crate::core::registry::client::RegistryClient; +use crate::core::registry::client::{BeforeNetworkCallback, RegistryClient, RegistryResource}; use crate::core::registry::index::{IndexDependency, IndexRecord, IndexRecords, TemplateUrl}; use crate::core::{Checksum, Digest, Package, PackageId, PackageName, Summary}; use crate::flock::FileLockGuard; @@ -93,12 +93,14 @@ impl LocalRegistryClient { #[async_trait] impl RegistryClient for LocalRegistryClient { - fn is_offline(&self) -> bool { - true - } + #[tracing::instrument(level = "trace", skip_all)] + async fn get_records( + &self, + package: PackageName, + _: BeforeNetworkCallback, + ) -> Result> { + trace!(?package); - #[tracing::instrument(level = "trace", skip(self))] - async fn get_records(&self, package: PackageName) -> Result>> { let records_path = self.records_path(&package); spawn_blocking(move || { @@ -107,22 +109,28 @@ impl RegistryClient for LocalRegistryClient { if e.downcast_ref::() .map_or(false, |ioe| ioe.kind() == io::ErrorKind::NotFound) => { - return Ok(None); + return Ok(RegistryResource::NotFound); } r => r?, }; let records = serde_json::from_slice(&records)?; - Ok(Some(Arc::new(records))) + Ok(RegistryResource::Download { + resource: records, + cache_key: None, + }) }) .await? } - async fn is_downloaded(&self, _package: PackageId) -> bool { - true - } - - async fn download(&self, package: PackageId) -> Result { - Ok(self.dl_path(package)) + async fn download( + &self, + package: PackageId, + _: BeforeNetworkCallback, + ) -> Result> { + Ok(RegistryResource::Download { + resource: self.dl_path(package), + cache_key: None, + }) } async fn supports_publish(&self) -> Result { diff --git a/scarb/src/core/registry/client/mod.rs b/scarb/src/core/registry/client/mod.rs index 861f24668..44b1fa6f8 100644 --- a/scarb/src/core/registry/client/mod.rs +++ b/scarb/src/core/registry/client/mod.rs @@ -1,5 +1,4 @@ use std::path::PathBuf; -use std::sync::Arc; use anyhow::Result; use async_trait::async_trait; @@ -8,43 +7,70 @@ use crate::core::registry::index::IndexRecords; use crate::core::{Package, PackageId, PackageName}; use crate::flock::FileLockGuard; +pub mod cache; pub mod http; pub mod local; +/// Result from loading data from a registry. +pub enum RegistryResource { + /// The requested resource was not found. + NotFound, + /// The cache is valid and the cached data should be used. + #[allow(dead_code)] + InCache, + /// The cache is out of date, new data was downloaded and should be used from now on. + Download { + resource: T, + /// Client-dependent opaque value used to determine whether resource is out of date. + /// + /// Returning `None` means that this client/resource is not cacheable. + cache_key: Option, + }, +} + +pub type BeforeNetworkCallback = Box Result<()> + Send>; + #[async_trait] pub trait RegistryClient: Send + Sync { - /// State whether this registry works in offline mode. - /// - /// Local registries are expected to perform immediate file operations, while remote registries - /// can take some IO-bound time. This flag also influences appearance of various UI elements. - fn is_offline(&self) -> bool; - /// Get the index record for a specific named package from this index. /// /// Returns `None` if the package is not present in the index. /// + /// ## Callbacks + /// + /// The `before_network` callback **must** be called right before doing actual network requests. + /// It might return an error which **must** be immediately bubbled out. If this client does not + /// perform network requests, this callback **must** not be called at all. + /// /// ## Caching /// /// This method is not expected to internally cache the result, but it is not prohibited either. /// Scarb applies specialized caching layers on top of clients. - async fn get_records(&self, package: PackageName) -> Result>>; - - /// Check if the package `.tar.zst` file has already been downloaded and is stored on disk. - /// - /// On internal errors, this method should return `false`. This method must not perform any - /// network operations (it can be called before offline mode check). - async fn is_downloaded(&self, package: PackageId) -> bool; + async fn get_records( + &self, + package: PackageName, + before_network: BeforeNetworkCallback, + ) -> Result>; /// Download the package `.tar.zst` file. /// /// Returns a [`PathBuf`] to the downloaded `.tar.zst` file. /// + /// ## Callbacks + /// + /// The `before_network` callback **must** be called right before doing actual network requests. + /// It might return an error which **must** be immediately bubbled out. If this client does not + /// perform network requests, this callback **must** not be called at all. + /// /// ## Caching /// - /// If the registry is remote, i.e. actually downloads files and writes them to disk, - /// it should write downloaded files to Scarb cache directory. If the file has already been - /// downloaded, it should avoid downloading it again, and read it from this cache instead. - async fn download(&self, package: PackageId) -> Result; + /// This method is not expected to internally cache the result, but it is not prohibited either. + /// Scarb applies specialized caching layers on top of clients. + async fn download( + &self, + package: PackageId, + before_network: BeforeNetworkCallback, + ) -> Result>; /// State whether packages can be published to this registry. /// diff --git a/scarb/src/sources/registry.rs b/scarb/src/sources/registry.rs index b4d5a1632..7d35142d6 100644 --- a/scarb/src/sources/registry.rs +++ b/scarb/src/sources/registry.rs @@ -8,6 +8,7 @@ use tracing::trace; use scarb_ui::components::Status; +use crate::core::registry::client::cache::RegistryClientCache; use crate::core::registry::client::http::HttpRegistryClient; use crate::core::registry::client::local::LocalRegistryClient; use crate::core::registry::client::RegistryClient; @@ -23,16 +24,14 @@ use crate::sources::PathSource; pub struct RegistrySource<'c> { source_id: SourceId, config: &'c Config, - client: Box, + client: RegistryClientCache<'c>, package_sources: PackageSourceStore<'c>, } impl<'c> RegistrySource<'c> { pub fn new(source_id: SourceId, config: &'c Config) -> Result { let client = Self::create_client(source_id, config)?; - - // TODO(mkaput): Wrap remote clients in a disk caching layer. - // TODO(mkaput): Wrap all clients in an in-memory caching layer. + let client = RegistryClientCache::new(client, config)?; let package_sources = PackageSourceStore::new(source_id, config); @@ -73,19 +72,28 @@ impl<'c> RegistrySource<'c> { impl<'c> Source for RegistrySource<'c> { #[tracing::instrument(level = "trace", skip(self))] async fn query(&self, dependency: &ManifestDependency) -> Result> { - let Some(records) = self + let before_network = { + let source_id = self.source_id; + let network_allowed = self.config.network_allowed(); + Box::new(move || { + ensure!( + network_allowed, + "cannot query `{source_id}` in offline mode" + ); + Ok(()) + }) + }; + + let records = self .client - .get_records(dependency.name.clone()) + .get_records_with_cache(dependency, before_network) .await .with_context(|| { format!( "failed to lookup for `{dependency}` in registry: {}", self.source_id ) - })? - else { - bail!("package not found in registry: {dependency}"); - }; + })?; let build_summary_from_index_record = |record: &IndexRecord| { let package_id = PackageId::new( @@ -120,6 +128,8 @@ impl<'c> Source for RegistrySource<'c> { .iter() // NOTE: We filter based on IndexRecords here, to avoid unnecessarily allocating // PackageIds just to abandon them soon after. + // NOTE: Technically, RegistryClientCache may already have filtered the records, + // but it is not required to do so, so we do it here again as a safety measure. .filter(|record| dependency.version_req.matches(&record.version)) .map(build_summary_from_index_record) .collect()) @@ -127,21 +137,27 @@ impl<'c> Source for RegistrySource<'c> { #[tracing::instrument(level = "trace", skip(self))] async fn download(&self, id: PackageId) -> Result { - let is_downloaded = self.client.is_downloaded(id).await; - - ensure!( - self.config.network_allowed() || self.client.is_offline() || is_downloaded, - "cannot download from `{}` in offline mode", - self.source_id - ); - - if !is_downloaded && !self.client.is_offline() { - self.config - .ui() - .print(Status::new("Downloading", &id.to_string())); - } + let before_network = { + let source_id = self.source_id; + let network_allowed = self.config.network_allowed(); + let ui = self.config.ui(); + Box::new(move || { + ensure!( + network_allowed, + "cannot query `{source_id}` in offline mode" + ); + + ui.print(Status::new("Downloading", &id.to_string())); + + Ok(()) + }) + }; - let archive = self.client.download(id).await?; + let archive = self + .client + .download_with_cache(id, before_network) + .await + .with_context(|| format!("failed to download package: {id}"))?; self.verify_checksum(id, archive.clone()).await?; self.load_package(id, archive).await @@ -164,15 +180,9 @@ impl<'c> RegistrySource<'c> { /// This method extracts the tarball into cache directory, and then loads it using /// suitably configured [`PathSource`]. async fn load_package(&self, id: PackageId, archive: PathBuf) -> Result { - if self.client.is_offline() { - self.config - .ui() - .print(Status::new("Unpacking", &id.to_string())); - } else { - self.config - .ui() - .verbose(Status::new("Unpacking", &id.to_string())); - } + self.config + .ui() + .verbose(Status::new("Unpacking", &id.to_string())); let path = self.package_sources.extract(id, archive).await?; let path_source = PathSource::recursive_at(&path, self.source_id, self.config); diff --git a/scarb/tests/http_registry.rs b/scarb/tests/http_registry.rs index 969230b71..cbb93464e 100644 --- a/scarb/tests/http_registry.rs +++ b/scarb/tests/http_registry.rs @@ -66,7 +66,10 @@ fn not_found() { .assert() .failure() .stdout_matches(indoc! {r#" - error: package not found in registry: baz ^1 (registry+http://[..]) + error: failed to lookup for `baz ^1 (registry+http://[..])` in registry: registry+http://[..] + + Caused by: + package not found in registry: baz ^1 (registry+http://[..]) "#}); } diff --git a/scarb/tests/local_registry.rs b/scarb/tests/local_registry.rs index 62688c983..53eb2129a 100644 --- a/scarb/tests/local_registry.rs +++ b/scarb/tests/local_registry.rs @@ -34,9 +34,7 @@ fn usage() { .current_dir(&t) .assert() .success() - .stdout_matches(indoc! {r#" - [..] Unpacking bar v1.0.0 ([..]) - "#}); + .stdout_matches(indoc! {r#""#}); } #[test] @@ -65,7 +63,10 @@ fn not_found() { .assert() .failure() .stdout_matches(indoc! {r#" - error: package not found in registry: baz ^1 (registry+file://[..]) + error: failed to lookup for `baz ^1 (registry+file://[..])` in registry: registry+file://[..] + + Caused by: + package not found in registry: baz ^1 (registry+file://[..]) "#}); } @@ -89,7 +90,10 @@ fn empty_registry() { .assert() .failure() .stdout_matches(indoc! {r#" - error: package not found in registry: baz ^1 (registry+file://[..]) + error: failed to lookup for `baz ^1 (registry+file://[..])` in registry: registry+file://[..] + + Caused by: + package not found in registry: baz ^1 (registry+file://[..]) "#}); }