-
Notifications
You must be signed in to change notification settings - Fork 70
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
Initial implementation of
RegistrySource
This implementation focuses solely on local registries, and it has almost no security. commit-id:7c1e1d67
- Loading branch information
Showing
9 changed files
with
421 additions
and
2 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,105 @@ | ||
use std::io; | ||
use std::path::{Path, PathBuf}; | ||
use std::sync::Arc; | ||
|
||
use anyhow::Result; | ||
use async_trait::async_trait; | ||
use tokio::task::spawn_blocking; | ||
use url::Url; | ||
|
||
use crate::core::registry::client::RegistryClient; | ||
use crate::core::registry::index::{IndexRecords, TemplateUrl}; | ||
use crate::core::{PackageId, PackageName}; | ||
use crate::internal::fsx; | ||
|
||
/// Local registry that lives on the filesystem as a set of `.tar.zst` files with an `index` | ||
/// directory in the standard registry index format. | ||
/// | ||
/// ## Filesystem hierarchy | ||
/// | ||
/// Here is an example layout of a local registry on a local filesystem: | ||
/// | ||
/// ```text | ||
/// [registry root]/ | ||
/// ├── index/ # registry index | ||
/// │ ├── al/ | ||
/// │ │ └── ex/ | ||
/// │ │ ├── alexandria_ascii | ||
/// │ │ └── alexandria_math | ||
/// │ ├── ca/ | ||
/// │ │ └── ir/ | ||
/// │ │ └── cairo_lib | ||
/// │ └── op/ | ||
/// │ └── en/ | ||
/// │ └── open_zeppelin | ||
/// ├── alexandria_ascii-0.1.0.tar.zst # pre-downloaded package tarballs | ||
/// ├── alexandria_math-0.1.0.tar.zst | ||
/// ├── cairo_lib-0.2.0.tar.zst | ||
/// └── open_zeppelin-0.7.0.tar.zst | ||
/// ``` | ||
pub struct LocalRegistryClient { | ||
index_template_url: TemplateUrl, | ||
dl_template_url: TemplateUrl, | ||
} | ||
|
||
impl LocalRegistryClient { | ||
pub fn new(root: &Path) -> Result<Self> { | ||
let root = fsx::canonicalize(root)?; | ||
let root_url = Url::from_directory_path(root) | ||
.expect("Canonical path should always be convertible to URL."); | ||
|
||
let index_template_url = | ||
TemplateUrl::new(&format!("{root_url}index/{{prefix}}/{{package}}.json")); | ||
|
||
let dl_template_url = | ||
TemplateUrl::new(&format!("{root_url}{{package}}-{{version}}.tar.zst")); | ||
|
||
Ok(Self { | ||
index_template_url, | ||
dl_template_url, | ||
}) | ||
} | ||
} | ||
|
||
#[async_trait] | ||
impl RegistryClient for LocalRegistryClient { | ||
fn is_offline(&self) -> bool { | ||
true | ||
} | ||
|
||
#[tracing::instrument(level = "trace", skip(self))] | ||
async fn get_records(&self, package: PackageName) -> Result<Option<Arc<IndexRecords>>> { | ||
let records_path = self | ||
.index_template_url | ||
.expand(package.into())? | ||
.to_file_path() | ||
.expect("Local index should always use file:// URLs."); | ||
|
||
spawn_blocking(move || { | ||
let records = match fsx::read(records_path) { | ||
Err(e) | ||
if e.downcast_ref::<io::Error>() | ||
.map_or(false, |ioe| ioe.kind() == io::ErrorKind::NotFound) => | ||
{ | ||
return Ok(None); | ||
} | ||
r => r?, | ||
}; | ||
let records = serde_json::from_slice(&records)?; | ||
Ok(Some(Arc::new(records))) | ||
}) | ||
.await? | ||
} | ||
|
||
async fn is_downloaded(&self, _package: PackageId) -> bool { | ||
true | ||
} | ||
|
||
async fn download(&self, package: PackageId) -> Result<PathBuf> { | ||
Ok(self | ||
.dl_template_url | ||
.expand(package.into())? | ||
.to_file_path() | ||
.expect("Local index should always use file:// URLs.")) | ||
} | ||
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,46 @@ | ||
use std::path::PathBuf; | ||
use std::sync::Arc; | ||
|
||
use anyhow::Result; | ||
use async_trait::async_trait; | ||
|
||
use crate::core::registry::index::IndexRecords; | ||
use crate::core::{PackageId, PackageName}; | ||
|
||
pub mod local; | ||
|
||
#[async_trait] | ||
pub trait RegistryClient: Send + Sync { | ||
/// State whether this registry works in offline mode. | ||
/// | ||
/// Local registries are expected to perform immediate file operations, while remote registries | ||
/// can take some IO-bound time. This flag also influences appearance of various UI elements. | ||
fn is_offline(&self) -> bool; | ||
|
||
/// Get the index record for a specific named package from this index. | ||
/// | ||
/// Returns `None` if the package is not present in the index. | ||
/// | ||
/// ## Caching | ||
/// | ||
/// This method is not expected to internally cache the result, but it is not prohibited either. | ||
/// Scarb applies specialized caching layers on top of clients. | ||
async fn get_records(&self, package: PackageName) -> Result<Option<Arc<IndexRecords>>>; | ||
|
||
/// Check if the package `.tar.zst` file has already been downloaded and is stored on disk. | ||
/// | ||
/// On internal errors, this method should return `false`. This method must not perform any | ||
/// network operations (it can be called before offline mode check). | ||
async fn is_downloaded(&self, package: PackageId) -> bool; | ||
|
||
/// Download the package `.tar.zst` file. | ||
/// | ||
/// Returns a [`PathBuf`] to the downloaded `.tar.zst` file. | ||
/// | ||
/// ## Caching | ||
/// | ||
/// If the registry is remote, i.e. actually downloads files and writes them to disk, | ||
/// it should write downloaded files to Scarb cache directory. If the file has already been | ||
/// downloaded, it should avoid downloading it again, and read it from this cache instead. | ||
async fn download(&self, package: PackageId) -> Result<PathBuf>; | ||
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,107 @@ | ||
use anyhow::{ensure, Context, Result}; | ||
use camino::Utf8PathBuf; | ||
use std::path::PathBuf; | ||
use tokio::task::spawn_blocking; | ||
use tracing::trace; | ||
|
||
use crate::core::{Config, PackageId, SourceId}; | ||
use crate::flock::{protected_run_if_not_ok, Filesystem, OK_FILE}; | ||
use crate::internal::fsx; | ||
use crate::internal::fsx::PathUtf8Ext; | ||
use crate::internal::restricted_names::is_windows_restricted_path; | ||
use crate::internal::stable_hash::short_hash; | ||
|
||
pub struct PackageSourceStore<'a> { | ||
fs: Filesystem<'a>, | ||
config: &'a Config, | ||
} | ||
|
||
impl<'a> PackageSourceStore<'a> { | ||
pub fn new(source: SourceId, config: &'a Config) -> Self { | ||
let ident = source_ident(source); | ||
let fs = config | ||
.dirs() | ||
.registry_dir() | ||
.into_child("src") | ||
.into_child(ident); | ||
Self { fs, config } | ||
} | ||
|
||
/// Extract a downloaded package archive into a location where it is ready to be compiled. | ||
/// | ||
/// No action is taken if the source looks like it's already unpacked. | ||
#[tracing::instrument(level = "debug", skip(self))] | ||
pub async fn extract(&self, pkg: PackageId, archive: PathBuf) -> Result<Utf8PathBuf> { | ||
self.extract_impl(pkg, archive) | ||
.await | ||
.with_context(|| format!("failed to extract: {pkg}")) | ||
} | ||
|
||
async fn extract_impl(&self, pkg: PackageId, archive: PathBuf) -> Result<Utf8PathBuf> { | ||
let prefix = pkg.tarball_basename(); | ||
let fs = self.fs.child(&prefix); | ||
let parent_path = self.fs.path_existent()?.to_owned(); | ||
let output_path = fs.path_existent()?.to_owned(); | ||
|
||
assert_eq!(parent_path.join(&prefix), output_path); | ||
|
||
protected_run_if_not_ok!(&fs, &self.config.package_cache_lock(), { | ||
trace!("extracting tarball as `{pkg}`: {}", archive.display()); | ||
|
||
// Wipe anything already extracted. | ||
unsafe { | ||
fs.recreate()?; | ||
} | ||
|
||
spawn_blocking(move || -> Result<()> { | ||
let mut tar = { | ||
let file = fsx::open(archive)?; | ||
let zst = zstd::Decoder::new(file)?; | ||
// FIXME(mkaput): Protect against zip bomb attacks (https://github.com/rust-lang/cargo/pull/11337). | ||
// FIXME(mkaput): Protect against CVE-2023-38497 (https://github.com/rust-lang/cargo/pull/12443). | ||
tar::Archive::new(zst) | ||
}; | ||
|
||
for entry in tar.entries()? { | ||
let mut entry = entry.with_context(|| "failed to iterate over archive")?; | ||
let entry_path = entry | ||
.path() | ||
.with_context(|| "failed to read entry path")? | ||
.try_to_utf8()?; | ||
|
||
// Ensure extracting will not accidentally or maliciously overwrite files | ||
// outside extraction directory. | ||
ensure!( | ||
entry_path.starts_with(&prefix), | ||
"invalid package tarball, contains a file {entry_path} \ | ||
which is not under {prefix}" | ||
); | ||
|
||
// Prevent unpacking OK-file. | ||
if entry_path.file_name().unwrap_or_default() == OK_FILE { | ||
continue; | ||
} | ||
|
||
let mut r = entry.unpack_in(&parent_path).map_err(anyhow::Error::from); | ||
|
||
if cfg!(windows) && is_windows_restricted_path(entry_path.as_std_path()) { | ||
r = r.context("path contains Windows restricted file name"); | ||
} | ||
|
||
r.with_context(|| format!("failed to extract: {entry_path}"))?; | ||
} | ||
|
||
Ok(()) | ||
}) | ||
.await??; | ||
}); | ||
|
||
Ok(output_path) | ||
} | ||
} | ||
|
||
fn source_ident(id: SourceId) -> String { | ||
let ident = id.url.host_str().unwrap_or_else(|| id.kind.primary_field()); | ||
let hash = short_hash(id); | ||
format!("{ident}-{hash}") | ||
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -1,7 +1,9 @@ | ||
pub use git::*; | ||
pub use path::*; | ||
pub use registry::*; | ||
pub use standard_lib::*; | ||
|
||
mod git; | ||
mod path; | ||
mod registry; | ||
mod standard_lib; |
Oops, something went wrong.