From 6739e49c27d3eac6478cb1b50a7d92f1feab0769 Mon Sep 17 00:00:00 2001 From: Marek Kaput Date: Wed, 18 Oct 2023 16:10:34 +0200 Subject: [PATCH] Initial implementation of HTTP registry client commit-id:d95e5f26 --- Cargo.lock | 2 + Cargo.toml | 2 +- scarb/src/core/registry/client/http.rs | 163 ++++++++++++++++++ scarb/src/core/registry/client/mod.rs | 1 + scarb/src/core/registry/index/config.rs | 4 + scarb/src/sources/registry.rs | 28 ++- scarb/tests/http_registry.rs | 102 +++++++++++ utils/scarb-test-support/src/registry/http.rs | 79 +++++++++ utils/scarb-test-support/src/registry/mod.rs | 1 + 9 files changed, 373 insertions(+), 9 deletions(-) create mode 100644 scarb/src/core/registry/client/http.rs create mode 100644 scarb/tests/http_registry.rs create mode 100644 utils/scarb-test-support/src/registry/http.rs diff --git a/Cargo.lock b/Cargo.lock index d4e9d03ba..df80da040 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -1828,7 +1828,9 @@ version = "0.7.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "29f9df8a11882c4e3335eb2d18a0137c505d9ca927470b0cac9c6f0ae07d28f7" dependencies = [ + "async-trait", "rustix", + "tokio", "windows-sys 0.48.0", ] diff --git a/Cargo.toml b/Cargo.toml index 2feecf1de..f350e5208 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -53,7 +53,7 @@ deno_task_shell = "0.13" derive_builder = "0.12" directories = "5" dunce = "1" -fs4 = "0.7" +fs4 = { version = "0.7", features = ["tokio"] } fs_extra = "1.3.0" futures = { version = "0.3", default-features = false, features = ["std", "async-await"] } gix = "0.54" diff --git a/scarb/src/core/registry/client/http.rs b/scarb/src/core/registry/client/http.rs new file mode 100644 index 000000000..792ae96f1 --- /dev/null +++ b/scarb/src/core/registry/client/http.rs @@ -0,0 +1,163 @@ +use std::path::PathBuf; +use std::sync::Arc; + +use anyhow::{Context, Result}; +use async_trait::async_trait; +use fs4::tokio::AsyncFileExt; +use futures::StreamExt; +use reqwest::StatusCode; +use tokio::fs::OpenOptions; +use tokio::io; +use tokio::io::BufWriter; +use tokio::sync::OnceCell; +use tracing::{debug, trace}; + +use crate::core::registry::client::RegistryClient; +use crate::core::registry::index::{IndexConfig, IndexRecords}; +use crate::core::{Config, Package, PackageId, PackageName, SourceId}; +use crate::flock::{FileLockGuard, Filesystem}; + +// TODO(mkaput): Honour ETag and Last-Modified headers. +// TODO(mkaput): Progressbar. +// TODO(mkaput): Request timeout. + +/// Remote registry served by the HTTP-based registry API. +pub struct HttpRegistryClient<'c> { + source_id: SourceId, + config: &'c Config, + cached_index_config: OnceCell, + dl_fs: Filesystem<'c>, +} + +impl<'c> HttpRegistryClient<'c> { + pub fn new(source_id: SourceId, config: &'c Config) -> Result { + let dl_fs = config + .dirs() + .registry_dir() + .into_child("dl") + .into_child(source_id.ident()); + + Ok(Self { + source_id, + config, + cached_index_config: Default::default(), + dl_fs, + }) + } + + async fn index_config(&self) -> Result<&IndexConfig> { + // TODO(mkaput): Cache config locally, honouring ETag and Last-Modified headers. + + async fn load(source_id: SourceId, config: &Config) -> Result { + let index_config_url = source_id + .url + .join(IndexConfig::WELL_KNOWN_PATH) + .expect("Registry config URL should always be valid."); + debug!("fetching registry config: {index_config_url}"); + + let index_config = config + .http()? + .get(index_config_url) + .send() + .await? + .error_for_status()? + .json::() + .await?; + + trace!(index_config = %serde_json::to_string(&index_config).unwrap()); + + Ok(index_config) + } + + self.cached_index_config + .get_or_try_init(|| load(self.source_id, self.config)) + .await + .context("failed to fetch registry config") + } +} + +#[async_trait] +impl<'c> RegistryClient for HttpRegistryClient<'c> { + fn is_offline(&self) -> bool { + false + } + + async fn get_records(&self, package: PackageName) -> Result>> { + let index_config = self.index_config().await?; + let records_url = index_config.index.expand(package.into())?; + + let response = self + .config + .http()? + .get(records_url) + .send() + .await? + .error_for_status(); + + if let Err(err) = &response { + if let Some(status) = err.status() { + if status == StatusCode::NOT_FOUND { + return Ok(None); + } + } + } + + let records = response? + .json() + .await + .context("failed to deserialize index records")?; + + Ok(Some(Arc::new(records))) + } + + async fn is_downloaded(&self, _package: PackageId) -> bool { + // TODO(mkaput): Cache downloaded packages. + false + } + + async fn download(&self, package: PackageId) -> Result { + let dl_url = self.index_config().await?.dl.expand(package.into())?; + + let response = self + .config + .http()? + .get(dl_url) + .send() + .await? + .error_for_status()?; + + let output_path = self.dl_fs.path_existent()?.join(package.tarball_name()); + let output_file = OpenOptions::new() + .read(true) + .write(true) + .truncate(true) + .create(true) + .open(&output_path) + .await + .with_context(|| format!("failed to open: {output_path}"))?; + + output_file + .lock_exclusive() + .with_context(|| format!("failed to lock file: {output_path}"))?; + + let mut stream = response.bytes_stream(); + let mut writer = BufWriter::new(output_file); + while let Some(chunk) = stream.next().await { + let chunk = chunk.context("failed to read response chunk")?; + io::copy_buf(&mut &*chunk, &mut writer) + .await + .context("failed to save response chunk on disk")?; + } + + Ok(output_path.into_std_path_buf()) + } + + async fn supports_publish(&self) -> Result { + // TODO(mkaput): Publishing to HTTP registries is not implemented yet. + Ok(false) + } + + async fn publish(&self, _package: Package, _tarball: FileLockGuard) -> Result<()> { + todo!("Publishing to HTTP registries is not implemented yet.") + } +} diff --git a/scarb/src/core/registry/client/mod.rs b/scarb/src/core/registry/client/mod.rs index 1c1f6af74..861f24668 100644 --- a/scarb/src/core/registry/client/mod.rs +++ b/scarb/src/core/registry/client/mod.rs @@ -8,6 +8,7 @@ use crate::core::registry::index::IndexRecords; use crate::core::{Package, PackageId, PackageName}; use crate::flock::FileLockGuard; +pub mod http; pub mod local; #[async_trait] diff --git a/scarb/src/core/registry/index/config.rs b/scarb/src/core/registry/index/config.rs index f8b6773a2..1fd6e0853 100644 --- a/scarb/src/core/registry/index/config.rs +++ b/scarb/src/core/registry/index/config.rs @@ -43,6 +43,10 @@ pub struct IndexConfig { pub index: TemplateUrl, } +impl IndexConfig { + pub const WELL_KNOWN_PATH: &'_ str = "config.json"; +} + #[derive(Copy, Clone, Debug, Default, Eq, PartialEq, Serialize, Deserialize)] #[serde(into = "u8", try_from = "u8")] pub struct IndexVersion; diff --git a/scarb/src/sources/registry.rs b/scarb/src/sources/registry.rs index 90cccb21b..b4d5a1632 100644 --- a/scarb/src/sources/registry.rs +++ b/scarb/src/sources/registry.rs @@ -2,12 +2,13 @@ use std::collections::HashSet; use std::fmt; use std::path::PathBuf; -use anyhow::{bail, ensure, Context, Result}; +use anyhow::{anyhow, bail, ensure, Context, Result}; use async_trait::async_trait; use tracing::trace; use scarb_ui::components::Status; +use crate::core::registry::client::http::HttpRegistryClient; use crate::core::registry::client::local::LocalRegistryClient; use crate::core::registry::client::RegistryClient; use crate::core::registry::index::IndexRecord; @@ -45,14 +46,25 @@ impl<'c> RegistrySource<'c> { pub fn create_client( source_id: SourceId, - _config: &'c Config, + config: &'c Config, ) -> Result> { - if let Ok(path) = source_id.url.to_file_path() { - trace!("creating local registry client for: {source_id}"); - Ok(Box::new(LocalRegistryClient::new(&path)?)) - } else { - // TODO(mkaput): Implement pipelining HTTP client. - bail!("unsupported registry protocol: {source_id}") + assert!(source_id.is_registry()); + match source_id.url.scheme() { + "file" => { + trace!("creating local registry client for: {source_id}"); + let path = source_id + .url + .to_file_path() + .map_err(|_| anyhow!("url is not a valid path: {}", source_id.url))?; + Ok(Box::new(LocalRegistryClient::new(&path)?)) + } + "http" | "https" => { + trace!("creating http registry client for: {source_id}"); + Ok(Box::new(HttpRegistryClient::new(source_id, config)?)) + } + _ => { + bail!("unsupported registry protocol: {source_id}") + } } } } diff --git a/scarb/tests/http_registry.rs b/scarb/tests/http_registry.rs new file mode 100644 index 000000000..969230b71 --- /dev/null +++ b/scarb/tests/http_registry.rs @@ -0,0 +1,102 @@ +use assert_fs::prelude::*; +use assert_fs::TempDir; +use indoc::indoc; +use std::fs; +use std::time::Duration; + +use scarb_test_support::command::Scarb; +use scarb_test_support::project_builder::{Dep, DepBuilder, ProjectBuilder}; +use scarb_test_support::registry::http::HttpRegistry; + +#[test] +fn usage() { + let mut registry = HttpRegistry::serve(); + registry.publish(|t| { + ProjectBuilder::start() + .name("bar") + .version("1.0.0") + .lib_cairo(r#"fn f() -> felt252 { 0 }"#) + .build(t); + }); + + let t = TempDir::new().unwrap(); + ProjectBuilder::start() + .name("foo") + .version("0.1.0") + .dep("bar", Dep.version("1").registry(®istry)) + .lib_cairo(r#"fn f() -> felt252 { bar::f() }"#) + .build(&t); + + // FIXME(mkaput): Why are verbose statuses not appearing here? + Scarb::quick_snapbox() + .arg("fetch") + .current_dir(&t) + .timeout(Duration::from_secs(10)) + .assert() + .success() + .stdout_matches(indoc! {r#" + [..] Downloading bar v1.0.0 ([..]) + "#}); +} + +#[test] +fn not_found() { + let mut registry = HttpRegistry::serve(); + registry.publish(|t| { + // Publish a package so that the directory hierarchy is created. + // Note, however, that we declare a dependency on baZ. + ProjectBuilder::start() + .name("bar") + .version("1.0.0") + .lib_cairo(r#"fn f() -> felt252 { 0 }"#) + .build(t); + }); + + let t = TempDir::new().unwrap(); + ProjectBuilder::start() + .name("foo") + .version("0.1.0") + .dep("baz", Dep.version("1").registry(®istry)) + .build(&t); + + Scarb::quick_snapbox() + .arg("fetch") + .current_dir(&t) + .timeout(Duration::from_secs(10)) + .assert() + .failure() + .stdout_matches(indoc! {r#" + error: package not found in registry: baz ^1 (registry+http://[..]) + "#}); +} + +#[test] +fn missing_config_json() { + let registry = HttpRegistry::serve(); + fs::remove_file(registry.child("config.json")).unwrap(); + + let t = TempDir::new().unwrap(); + ProjectBuilder::start() + .name("foo") + .version("0.1.0") + .dep("baz", Dep.version("1").registry(®istry)) + .build(&t); + + Scarb::quick_snapbox() + .arg("fetch") + .current_dir(&t) + .timeout(Duration::from_secs(10)) + .assert() + .failure() + .stdout_matches(indoc! {r#" + error: failed to lookup for `baz ^1 (registry+http://[..])` in registry: registry+http://[..] + + Caused by: + 0: failed to fetch registry config + 1: HTTP status client error (404 Not Found) for url (http://[..]/config.json) + "#}); +} + +// TODO(mkaput): Test errors properly when package is in index, but tarball is missing. +// TODO(mkaput): Test interdependencies. +// TODO(mkaput): Test offline mode. diff --git a/utils/scarb-test-support/src/registry/http.rs b/utils/scarb-test-support/src/registry/http.rs new file mode 100644 index 000000000..5860ba9e9 --- /dev/null +++ b/utils/scarb-test-support/src/registry/http.rs @@ -0,0 +1,79 @@ +use assert_fs::fixture::ChildPath; +use assert_fs::prelude::*; +use std::fmt; +use std::path::Path; + +use assert_fs::TempDir; +use once_cell::sync::Lazy; +use serde_json::json; +use tokio::runtime; + +use crate::registry::local::LocalRegistry; +use crate::simple_http_server::SimpleHttpServer; + +pub struct HttpRegistry { + local: LocalRegistry, + url: String, + + // This needs to be stored here so that it's dropped properly. + server: SimpleHttpServer, +} + +impl HttpRegistry { + pub fn serve() -> Self { + // Keep a global multi-threading runtime to contain all running servers in one shared + // thread pool, while maintaining synchronous nature of tests. + static RUNTIME: Lazy = Lazy::new(|| { + runtime::Builder::new_multi_thread() + .worker_threads(1) + .enable_all() + .build() + .unwrap() + }); + + let local = LocalRegistry::create(); + let server = { + let _guard = RUNTIME.enter(); + SimpleHttpServer::serve(local.t.path().to_owned()) + }; + let url = server.url(); + + let config = json!({ + "version": 1, + "dl": format!("{url}{{package}}-{{version}}.tar.zst"), + "index": format!("{url}index/{{prefix}}/{{package}}.json") + }); + local + .t + .child("config.json") + .write_str(&serde_json::to_string(&config).unwrap()) + .unwrap(); + + Self { local, url, server } + } + + pub fn publish(&mut self, f: impl FnOnce(&TempDir)) -> &mut Self { + self.local.publish(f); + self + } + + /// Enable this when writing tests to see what requests are being made in the test. + pub fn debug_log_requests(&self) { + self.server.log_requests(true); + } +} + +impl PathChild for HttpRegistry { + fn child

(&self, path: P) -> ChildPath + where + P: AsRef, + { + self.local.t.child(path) + } +} + +impl fmt::Display for HttpRegistry { + fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { + fmt::Display::fmt(&self.url, f) + } +} diff --git a/utils/scarb-test-support/src/registry/mod.rs b/utils/scarb-test-support/src/registry/mod.rs index 27099624c..6b8924482 100644 --- a/utils/scarb-test-support/src/registry/mod.rs +++ b/utils/scarb-test-support/src/registry/mod.rs @@ -1 +1,2 @@ +pub mod http; pub mod local;