Skip to content

Commit

Permalink
Initial implementation of RegistrySource
Browse files Browse the repository at this point in the history
This implementation focuses solely on local registries, and it has almost no security.

commit-id:7c1e1d67
  • Loading branch information
mkaput committed Oct 11, 2023
1 parent a2a371a commit a528b57
Show file tree
Hide file tree
Showing 9 changed files with 421 additions and 2 deletions.
105 changes: 105 additions & 0 deletions scarb/src/core/registry/client/local.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,105 @@
use std::io;
use std::path::{Path, PathBuf};
use std::sync::Arc;

use anyhow::Result;
use async_trait::async_trait;
use tokio::task::spawn_blocking;
use url::Url;

use crate::core::registry::client::RegistryClient;
use crate::core::registry::index::{IndexRecords, TemplateUrl};
use crate::core::{PackageId, PackageName};
use crate::internal::fsx;

/// Local registry that lives on the filesystem as a set of `.tar.zst` files with an `index`
/// directory in the standard registry index format.
///
/// ## Filesystem hierarchy
///
/// Here is an example layout of a local registry on a local filesystem:
///
/// ```text
/// [registry root]/
/// ├── index/ # registry index
/// │ ├── al/
/// │ │ └── ex/
/// │ │ ├── alexandria_ascii
/// │ │ └── alexandria_math
/// │ ├── ca/
/// │ │ └── ir/
/// │ │ └── cairo_lib
/// │ └── op/
/// │ └── en/
/// │ └── open_zeppelin
/// ├── alexandria_ascii-0.1.0.tar.zst # pre-downloaded package tarballs
/// ├── alexandria_math-0.1.0.tar.zst
/// ├── cairo_lib-0.2.0.tar.zst
/// └── open_zeppelin-0.7.0.tar.zst
/// ```
pub struct LocalRegistryClient {
index_template_url: TemplateUrl,
dl_template_url: TemplateUrl,
}

impl LocalRegistryClient {
pub fn new(root: &Path) -> Result<Self> {
let root = fsx::canonicalize(root)?;
let root_url = Url::from_directory_path(root)
.expect("Canonical path should always be convertible to URL.");

let index_template_url =
TemplateUrl::new(&format!("{root_url}index/{{prefix}}/{{package}}.json"));

let dl_template_url =
TemplateUrl::new(&format!("{root_url}{{package}}-{{version}}.tar.zst"));

Ok(Self {
index_template_url,
dl_template_url,
})
}
}

#[async_trait]
impl RegistryClient for LocalRegistryClient {
fn is_offline(&self) -> bool {
true
}

#[tracing::instrument(level = "trace", skip(self))]
async fn get_records(&self, package: PackageName) -> Result<Option<Arc<IndexRecords>>> {
let records_path = self
.index_template_url
.expand(package.into())?
.to_file_path()
.expect("Local index should always use file:// URLs.");

spawn_blocking(move || {
let records = match fsx::read(records_path) {
Err(e)
if e.downcast_ref::<io::Error>()
.map_or(false, |ioe| ioe.kind() == io::ErrorKind::NotFound) =>
{
return Ok(None);
}
r => r?,
};
let records = serde_json::from_slice(&records)?;
Ok(Some(Arc::new(records)))
})
.await?
}

async fn is_downloaded(&self, _package: PackageId) -> bool {
true
}

async fn download(&self, package: PackageId) -> Result<PathBuf> {
Ok(self
.dl_template_url
.expand(package.into())?
.to_file_path()
.expect("Local index should always use file:// URLs."))
}
}
46 changes: 46 additions & 0 deletions scarb/src/core/registry/client/mod.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,46 @@
use std::path::PathBuf;
use std::sync::Arc;

use anyhow::Result;
use async_trait::async_trait;

use crate::core::registry::index::IndexRecords;
use crate::core::{PackageId, PackageName};

pub mod local;

#[async_trait]
pub trait RegistryClient: Send + Sync {
/// State whether this registry works in offline mode.
///
/// Local registries are expected to perform immediate file operations, while remote registries
/// can take some IO-bound time. This flag also influences appearance of various UI elements.
fn is_offline(&self) -> bool;

/// Get the index record for a specific named package from this index.
///
/// Returns `None` if the package is not present in the index.
///
/// ## Caching
///
/// This method is not expected to internally cache the result, but it is not prohibited either.
/// Scarb applies specialized caching layers on top of clients.
async fn get_records(&self, package: PackageName) -> Result<Option<Arc<IndexRecords>>>;

/// Check if the package `.tar.zst` file has already been downloaded and is stored on disk.
///
/// On internal errors, this method should return `false`. This method must not perform any
/// network operations (it can be called before offline mode check).
async fn is_downloaded(&self, package: PackageId) -> bool;

/// Download the package `.tar.zst` file.
///
/// Returns a [`PathBuf`] to the downloaded `.tar.zst` file.
///
/// ## Caching
///
/// If the registry is remote, i.e. actually downloads files and writes them to disk,
/// it should write downloaded files to Scarb cache directory. If the file has already been
/// downloaded, it should avoid downloading it again, and read it from this cache instead.
async fn download(&self, package: PackageId) -> Result<PathBuf>;
}
2 changes: 2 additions & 0 deletions scarb/src/core/registry/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,9 @@ use async_trait::async_trait;
use crate::core::{ManifestDependency, Package, PackageId, Summary};

pub mod cache;
pub mod client;
pub mod index;
pub mod package_source_store;
pub mod patch_map;
pub mod patcher;
pub mod source_map;
Expand Down
107 changes: 107 additions & 0 deletions scarb/src/core/registry/package_source_store.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,107 @@
use anyhow::{ensure, Context, Result};
use camino::Utf8PathBuf;
use std::path::PathBuf;
use tokio::task::spawn_blocking;
use tracing::trace;

use crate::core::{Config, PackageId, SourceId};
use crate::flock::{protected_run_if_not_ok, Filesystem, OK_FILE};
use crate::internal::fsx;
use crate::internal::fsx::PathUtf8Ext;
use crate::internal::restricted_names::is_windows_restricted_path;
use crate::internal::stable_hash::short_hash;

pub struct PackageSourceStore<'a> {
fs: Filesystem<'a>,
config: &'a Config,
}

impl<'a> PackageSourceStore<'a> {
pub fn new(source: SourceId, config: &'a Config) -> Self {
let ident = source_ident(source);
let fs = config
.dirs()
.registry_dir()
.into_child("src")
.into_child(ident);
Self { fs, config }
}

/// Extract a downloaded package archive into a location where it is ready to be compiled.
///
/// No action is taken if the source looks like it's already unpacked.
#[tracing::instrument(level = "debug", skip(self))]
pub async fn extract(&self, pkg: PackageId, archive: PathBuf) -> Result<Utf8PathBuf> {
self.extract_impl(pkg, archive)
.await
.with_context(|| format!("failed to extract: {pkg}"))
}

async fn extract_impl(&self, pkg: PackageId, archive: PathBuf) -> Result<Utf8PathBuf> {
let prefix = pkg.tarball_basename();
let fs = self.fs.child(&prefix);
let parent_path = self.fs.path_existent()?.to_owned();
let output_path = fs.path_existent()?.to_owned();

assert_eq!(parent_path.join(&prefix), output_path);

protected_run_if_not_ok!(&fs, &self.config.package_cache_lock(), {
trace!("extracting tarball as `{pkg}`: {}", archive.display());

// Wipe anything already extracted.
unsafe {
fs.recreate()?;
}

spawn_blocking(move || -> Result<()> {
let mut tar = {
let file = fsx::open(archive)?;
let zst = zstd::Decoder::new(file)?;
// FIXME(mkaput): Protect against zip bomb attacks (https://github.com/rust-lang/cargo/pull/11337).
// FIXME(mkaput): Protect against CVE-2023-38497 (https://github.com/rust-lang/cargo/pull/12443).
tar::Archive::new(zst)
};

for entry in tar.entries()? {
let mut entry = entry.with_context(|| "failed to iterate over archive")?;
let entry_path = entry
.path()
.with_context(|| "failed to read entry path")?
.try_to_utf8()?;

// Ensure extracting will not accidentally or maliciously overwrite files
// outside extraction directory.
ensure!(
entry_path.starts_with(&prefix),
"invalid package tarball, contains a file {entry_path} \
which is not under {prefix}"
);

// Prevent unpacking OK-file.
if entry_path.file_name().unwrap_or_default() == OK_FILE {
continue;
}

let mut r = entry.unpack_in(&parent_path).map_err(anyhow::Error::from);

if cfg!(windows) && is_windows_restricted_path(entry_path.as_std_path()) {
r = r.context("path contains Windows restricted file name");
}

r.with_context(|| format!("failed to extract: {entry_path}"))?;
}

Ok(())
})
.await??;
});

Ok(output_path)
}
}

fn source_ident(id: SourceId) -> String {
let ident = id.url.host_str().unwrap_or_else(|| id.kind.primary_field());
let hash = short_hash(id);
format!("{ident}-{hash}")
}
2 changes: 1 addition & 1 deletion scarb/src/core/source/id.rs
Original file line number Diff line number Diff line change
Expand Up @@ -297,7 +297,7 @@ impl SourceId {
match self.kind {
SourceKind::Path => Ok(Arc::new(PathSource::new(self, config))),
SourceKind::Git(_) => Ok(Arc::new(GitSource::new(self, config)?)),
SourceKind::Registry => todo!("Registry sources are not implemented yet."),
SourceKind::Registry => Ok(Arc::new(RegistrySource::new(self, config)?)),
SourceKind::Std => Ok(Arc::new(StandardLibSource::new(config))),
}
}
Expand Down
2 changes: 1 addition & 1 deletion scarb/src/flock.rs
Original file line number Diff line number Diff line change
Expand Up @@ -14,7 +14,7 @@ use crate::core::Config;
use crate::internal::fsx;
use crate::internal::lazy_directory_creator::LazyDirectoryCreator;

const OK_FILE: &str = ".scarb-ok";
pub const OK_FILE: &str = ".scarb-ok";

#[derive(Copy, Clone, Debug, Eq, PartialEq)]
pub enum FileLockKind {
Expand Down
18 changes: 18 additions & 0 deletions scarb/src/internal/fsx.rs
Original file line number Diff line number Diff line change
Expand Up @@ -55,6 +55,15 @@ pub fn write(path: impl AsRef<Path>, contents: impl AsRef<[u8]>) -> Result<()> {
}
}

/// Equivalent to [`File::open`] with better error messages.
pub fn open(path: impl AsRef<Path>) -> Result<File> {
return inner(path.as_ref());

fn inner(path: &Path) -> Result<File> {
File::open(path).with_context(|| format!("failed to open `{}`", path.display()))
}
}

/// Equivalent to [`File::create`] with better error messages.
pub fn create(path: impl AsRef<Path>) -> Result<File> {
return inner(path.as_ref());
Expand All @@ -64,6 +73,15 @@ pub fn create(path: impl AsRef<Path>) -> Result<File> {
}
}

/// Equivalent to [`fs::read`] with better error messages.
pub fn read(path: impl AsRef<Path>) -> Result<Vec<u8>> {
return inner(path.as_ref());

fn inner(path: &Path) -> Result<Vec<u8>> {
fs::read(path).with_context(|| format!("failed to read `{}`", path.display()))
}
}

/// Equivalent to [`fs::read_to_string`] with better error messages.
pub fn read_to_string(path: impl AsRef<Path>) -> Result<String> {
return inner(path.as_ref());
Expand Down
2 changes: 2 additions & 0 deletions scarb/src/sources/mod.rs
Original file line number Diff line number Diff line change
@@ -1,7 +1,9 @@
pub use git::*;
pub use path::*;
pub use registry::*;
pub use standard_lib::*;

mod git;
mod path;
mod registry;
mod standard_lib;
Loading

0 comments on commit a528b57

Please sign in to comment.