From f0290aff3deae6a71681c048606351b246eb60d8 Mon Sep 17 00:00:00 2001 From: Marek Kaput Date: Wed, 13 Sep 2023 16:48:33 +0200 Subject: [PATCH] Create actual tar.zstd archive commit-id:1b03d139 --- Cargo.lock | 68 ++++++++++++++ Cargo.toml | 2 + scarb/Cargo.toml | 2 + scarb/src/core/package/id.rs | 21 +++++ scarb/src/internal/fsx.rs | 9 ++ scarb/src/ops/package.rs | 119 +++++++++++++++++++++++- scarb/tests/package.rs | 174 ++++++++++++++++++++++++++++++++++- 7 files changed, 390 insertions(+), 5 deletions(-) diff --git a/Cargo.lock b/Cargo.lock index 942bf3c75..b684e1d6f 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -1059,6 +1059,7 @@ version = "1.0.83" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "f1174fb0b6ec23863f8b971027804a42614e347eafb0a95bf0b12cdae21fc4d0" dependencies = [ + "jobserver", "libc", ] @@ -2848,6 +2849,15 @@ version = "1.0.9" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "af150ab688ff2122fcef229be89cb50dd66af9e01a4ff320cc137eecc9bacc38" +[[package]] +name = "jobserver" +version = "0.1.26" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "936cfd212a0155903bcbc060e316fb6cc7cbf2e1907329391ebadc1fe0ce77c2" +dependencies = [ + "libc", +] + [[package]] name = "js-sys" version = "0.3.64" @@ -3478,6 +3488,12 @@ version = "0.1.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "8b870d8c151b6f2fb93e84a13146138f05d02ed11c7e7c54f8826aaaf7c9f184" +[[package]] +name = "pkg-config" +version = "0.3.27" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "26072860ba924cbfa98ea39c8c19b4dd6a4a25423dbdf219c1eca91aa0cf6964" + [[package]] name = "portable-atomic" version = "1.4.3" @@ -3882,6 +3898,7 @@ dependencies = [ "smallvec", "smol_str", "snapbox", + "tar", "test-case", "test-for-each-example", "thiserror", @@ -3898,6 +3915,7 @@ dependencies = [ "windows-sys 0.48.0", "xxhash-rust", "zip", + "zstd", ] [[package]] @@ -4436,6 +4454,17 @@ version = "1.0.1" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "55937e1799185b12863d447f42597ed69d9928686b8d88a1df17376a097d8369" +[[package]] +name = "tar" +version = "0.4.40" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "b16afcea1f22891c49a00c751c7b63b2233284064f11a200fc624137c51e2ddb" +dependencies = [ + "filetime", + "libc", + "xattr", +] + [[package]] name = "tempfile" version = "3.8.0" @@ -5224,6 +5253,15 @@ dependencies = [ "tap", ] +[[package]] +name = "xattr" +version = "1.0.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "f4686009f71ff3e5c4dbcf1a282d0a44db3f021ba69350cd42086b3e5f1c6985" +dependencies = [ + "libc", +] + [[package]] name = "xshell" version = "0.2.5" @@ -5291,3 +5329,33 @@ dependencies = [ "crossbeam-utils", "flate2", ] + +[[package]] +name = "zstd" +version = "0.12.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "1a27595e173641171fc74a1232b7b1c7a7cb6e18222c11e9dfb9888fa424c53c" +dependencies = [ + "zstd-safe", +] + +[[package]] +name = "zstd-safe" +version = "6.0.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "ee98ffd0b48ee95e6c5168188e44a54550b1564d9d530ee21d5f0eaed1069581" +dependencies = [ + "libc", + "zstd-sys", +] + +[[package]] +name = "zstd-sys" +version = "2.0.8+zstd.1.5.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "5556e6ee25d32df2586c098bbfa278803692a20d0ab9565e049480d52707ec8c" +dependencies = [ + "cc", + "libc", + "pkg-config", +] diff --git a/Cargo.toml b/Cargo.toml index 6f9d50e23..e85a0b01b 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -79,6 +79,7 @@ similar-asserts = { version = "1.5.0", features = ["serde"] } smallvec = "1.11.1" smol_str = { version = "0.2.0", features = ["serde"] } snapbox = { version = "0.4.12", features = ["cmd", "path"] } +tar = "0.4.40" tempfile = "3.8.0" test-case = "3.2.1" thiserror = "1.0.48" @@ -97,6 +98,7 @@ windows-sys = "0.48.0" xshell = "0.2.5" xxhash-rust = { version = "0.8.7", features = ["xxh3"] } zip = { version = "0.6.6", default-features = false, features = ["deflate"] } +zstd = "0.12.4" [profile.release] lto = true diff --git a/scarb/Cargo.toml b/scarb/Cargo.toml index fb52d1dbf..7733d3fa7 100644 --- a/scarb/Cargo.toml +++ b/scarb/Cargo.toml @@ -57,6 +57,7 @@ serde.workspace = true serde_json.workspace = true smallvec.workspace = true smol_str.workspace = true +tar.workspace = true thiserror.workspace = true tokio.workspace = true toml.workspace = true @@ -71,6 +72,7 @@ which.workspace = true windows-sys.workspace = true xxhash-rust.workspace = true zip.workspace = true +zstd.workspace = true [dev-dependencies] assert_fs.workspace = true diff --git a/scarb/src/core/package/id.rs b/scarb/src/core/package/id.rs index d9d54ec19..546c64f85 100644 --- a/scarb/src/core/package/id.rs +++ b/scarb/src/core/package/id.rs @@ -90,6 +90,18 @@ impl PackageId { self.source_id.to_pretty_url(), ) } + + /// Basename of the tarball that would be created for this package, e.g. `foo-1.2.3`. + pub fn tarball_basename(&self) -> String { + format!("{}-{}", self.name, self.version) + } + + /// Filename of the tarball that would be created for this package, e.g. `foo-1.2.3.tar.zstd`. + pub fn tarball_name(&self) -> String { + let mut base = self.tarball_basename(); + base.push_str(".tar.zstd"); + base + } } impl Deref for PackageId { @@ -211,4 +223,13 @@ mod tests { let pkg_id = PackageId::new(name, version, source_id); assert_eq!(format!("foo v1.0.0 ({source_id})"), pkg_id.to_string()); } + + #[test] + fn tarball_name() { + let name = PackageName::new("foo"); + let version = Version::new(1, 0, 0); + let source_id = SourceId::mock_path(); + let pkg_id = PackageId::new(name, version, source_id); + assert_eq!("foo-1.0.0.tar.zstd", pkg_id.tarball_name()); + } } diff --git a/scarb/src/internal/fsx.rs b/scarb/src/internal/fsx.rs index 456304f03..900f0757c 100644 --- a/scarb/src/internal/fsx.rs +++ b/scarb/src/internal/fsx.rs @@ -73,6 +73,15 @@ pub fn read_to_string(path: impl AsRef) -> Result { } } +/// Equivalent to [`fs::rename`] with better error messages. +pub fn rename(from: impl AsRef, to: impl AsRef) -> Result<()> { + return inner(from.as_ref(), to.as_ref()); + + fn inner(from: &Path, to: &Path) -> Result<()> { + fs::rename(from, to).with_context(|| format!("failed to rename file: {}", from.display())) + } +} + pub trait PathUtf8Ext { fn try_as_utf8(&'_ self) -> Result<&'_ Utf8Path>; diff --git a/scarb/src/ops/package.rs b/scarb/src/ops/package.rs index 3311cedc7..4e022fa5e 100644 --- a/scarb/src/ops/package.rs +++ b/scarb/src/ops/package.rs @@ -1,12 +1,16 @@ use std::collections::BTreeMap; +use std::fs::File; +use std::io::{Seek, SeekFrom}; -use anyhow::{ensure, Result}; +use anyhow::{ensure, Context, Result}; use camino::Utf8PathBuf; use scarb_ui::components::Status; +use scarb_ui::{HumanBytes, HumanCount}; use crate::core::{Package, PackageId, PackageName, Workspace}; use crate::flock::FileLockGuard; +use crate::internal::fsx; use crate::{ops, DEFAULT_SOURCE_PATH, MANIFEST_FILE_NAME}; const VERSION: u8 = 1; @@ -98,13 +102,53 @@ fn package_one_impl( ) -> Result { let pkg = ws.fetch_package(&pkg_id)?; + ws.config() + .ui() + .print(Status::new("Packaging", &pkg_id.to_string())); + // TODO(mkaput): Check metadata // TODO(#643): Check dirty in VCS (but do not do it when listing!). - let _recipe = prepare_archive_recipe(pkg, ws)?; + let recipe = prepare_archive_recipe(pkg, ws)?; + let num_files = recipe.len(); + + // Package up and test a temporary tarball and only move it to the final location if it actually + // passes all verification checks. Any previously existing tarball can be assumed as corrupt + // or invalid, so we can overwrite it if it exists. + let filename = pkg_id.tarball_name(); + let target_dir = ws.target_dir().child("package"); + + let mut dst = + target_dir.open_rw(format!(".{filename}"), "package scratch space", ws.config())?; + + dst.set_len(0) + .with_context(|| format!("failed to truncate: {filename}"))?; + + let uncompressed_size = tar(pkg_id, recipe, &mut dst, ws)?; + + // TODO(mkaput): Verify. + + dst.seek(SeekFrom::Start(0))?; + + fsx::rename(dst.path(), dst.path().with_file_name(filename))?; - todo!("Actual packaging is not implemented yet.") + let dst_metadata = dst + .metadata() + .with_context(|| format!("failed to stat: {}", dst.path()))?; + let compressed_size = dst_metadata.len(); + + ws.config().ui().print(Status::new( + "Packaged", + &format!( + "{} files, {:.1} ({:.1} compressed)", + HumanCount(num_files as u64), + HumanBytes(uncompressed_size), + HumanBytes(compressed_size), + ), + )); + + Ok(dst) } fn list_one_impl( @@ -192,3 +236,72 @@ fn normalize_manifest(_pkg: &Package, _ws: &Workspace<'_>) -> Result> { // TODO(mkaput): Implement this properly. Ok("[package]".to_string().into_bytes()) } + +/// Compress and package the recipe, and write it into the given file. +/// +/// Returns the uncompressed size of the contents of the archive. +fn tar( + pkg_id: PackageId, + recipe: ArchiveRecipe<'_>, + dst: &mut File, + ws: &Workspace<'_>, +) -> Result { + const COMPRESSION_LEVEL: i32 = 22; + let encoder = zstd::stream::Encoder::new(dst, COMPRESSION_LEVEL)?; + let mut ar = tar::Builder::new(encoder); + + let base_path = Utf8PathBuf::from(pkg_id.tarball_basename()); + + let mut uncompressed_size = 0; + for ArchiveFile { path, contents } in recipe { + ws.config() + .ui() + .verbose(Status::new("Archiving", path.as_str())); + + let archive_path = base_path.join(&path); + let mut header = tar::Header::new_gnu(); + match contents { + ArchiveFileContents::OnDisk(disk_path) => { + let mut file = File::open(&disk_path) + .with_context(|| format!("failed to open for archiving: {disk_path}"))?; + + let metadata = file + .metadata() + .with_context(|| format!("failed to stat: {disk_path}"))?; + + header.set_metadata_in_mode(&metadata, tar::HeaderMode::Deterministic); + header.set_cksum(); + + ar.append_data(&mut header, &archive_path, &mut file) + .with_context(|| format!("could not archive source file: {disk_path}"))?; + + uncompressed_size += metadata.len(); + } + + ArchiveFileContents::Generated(generator) => { + let contents = generator()?; + + header.set_entry_type(tar::EntryType::file()); + header.set_mode(0o644); + header.set_size(contents.len() as u64); + + // From `set_metadata_in_mode` implementation in `tar` crate: + // We could in theory set the mtime to zero here, but not all + // tools seem to behave well when ingesting files with a 0 + // timestamp. + header.set_mtime(1); + + header.set_cksum(); + + ar.append_data(&mut header, &archive_path, contents.as_slice()) + .with_context(|| format!("could not archive source file: {path}"))?; + + uncompressed_size += contents.len() as u64; + } + } + } + + let encoder = ar.into_inner()?; + encoder.finish()?; + Ok(uncompressed_size) +} diff --git a/scarb/tests/package.rs b/scarb/tests/package.rs index d6d80a722..1f4202f56 100644 --- a/scarb/tests/package.rs +++ b/scarb/tests/package.rs @@ -1,11 +1,125 @@ -use assert_fs::fixture::PathChild; +use std::collections::{HashMap, HashSet}; +use std::fs::File; +use std::io::{BufReader, Read}; +use std::path::{Path, PathBuf}; + +use assert_fs::fixture::{ChildPath, PathChild}; +use assert_fs::prelude::*; use assert_fs::TempDir; -use indoc::indoc; +use indoc::{formatdoc, indoc}; +use itertools::Itertools; use scarb_test_support::command::Scarb; use scarb_test_support::project_builder::ProjectBuilder; use scarb_test_support::workspace_builder::WorkspaceBuilder; +struct PackageChecker { + actual_files: HashMap, + base_name: PathBuf, +} + +impl PackageChecker { + fn open<'b>(path: &Path) -> tar::Archive>> { + let path = ChildPath::new(path); + path.assert(predicates::path::is_file()); + + let file = File::open(&path).expect("failed to open package tarball"); + let reader = zstd::Decoder::new(file).expect("failed to create zstd decoder"); + tar::Archive::new(reader) + } + + fn assert(path: &Path) -> Self { + let mut archive = Self::open(path); + + let actual_files: HashMap = archive + .entries() + .expect("failed to get archive entries") + .map(|entry| { + let mut entry = entry.expect("failed to get archive entry"); + let name = entry + .path() + .expect("failed to get archive entry path") + .into_owned(); + let mut contents = String::new(); + entry + .read_to_string(&mut contents) + .expect("failed to read archive entry contents"); + (name, contents) + }) + .collect(); + + let base_name = { + let base_names = actual_files + .keys() + .map(|path| path.components().next().expect("empty path").as_os_str()) + .unique() + .collect::>(); + assert_eq!( + base_names.len(), + 1, + "multiple base names in package tarball: {}", + base_names.iter().map(|p| p.to_string_lossy()).join(", ") + ); + PathBuf::from(base_names.into_iter().next().unwrap()) + }; + + Self { + actual_files, + base_name, + } + } + + fn name_and_version(&self, expected_package_name: &str, expected_version: &str) -> &Self { + assert_eq!( + self.base_name.to_string_lossy(), + format!("{expected_package_name}-{expected_version}") + ); + self + } + + fn contents(&self, expected_files: &[&str]) -> &Self { + let actual_files: HashSet = self.actual_files.keys().cloned().collect(); + let expected_files: HashSet = expected_files + .iter() + .map(|name| self.base_name.join(name)) + .collect(); + let missing: Vec<&PathBuf> = expected_files.difference(&actual_files).collect(); + let extra: Vec<&PathBuf> = actual_files.difference(&expected_files).collect(); + if !missing.is_empty() || !extra.is_empty() { + panic!( + "package tarball does not match.\nMissing: {:?}\nExtra: {:?}\n", + missing, extra + ); + } + self + } +} + +#[test] +fn simple() { + let t = TempDir::new().unwrap(); + ProjectBuilder::start() + .name("foo") + .version("1.0.0") + .lib_cairo("mod foo;") + .src("src/foo.cairo", "fn foo() {}") + .build(&t); + + Scarb::quick_snapbox() + .arg("package") + .current_dir(&t) + .assert() + .success() + .stdout_matches(indoc! {r#" + [..] Packaging foo v1.0.0 [..] + [..] Packaged 4 files, [..] ([..] compressed) + "#}); + + PackageChecker::assert(&t.child("target/package/foo-1.0.0.tar.zstd")) + .name_and_version("foo", "1.0.0") + .contents(&["VERSION", "Scarb.orig.toml", "Scarb.toml", "src/lib.cairo"]); +} + #[test] fn list_simple() { let t = TempDir::new().unwrap(); @@ -63,3 +177,59 @@ fn list_workspace() { src/lib.cairo "#}); } + +#[test] +fn reserved_files_collision() { + let t = TempDir::new().unwrap(); + ProjectBuilder::start() + .name("foo") + .version("1.0.0") + .src("VERSION", "oops") + .src("Scarb.orig.toml", "oops") + .build(&t); + + Scarb::quick_snapbox() + .arg("package") + .current_dir(&t) + .assert() + .failure() + .stdout_matches(formatdoc! {r#" + [..] Packaging foo v1.0.0 [..] + error: invalid inclusion of reserved files in package: VERSION, Scarb.orig.toml + "#}); +} + +// TODO(mkaput): Manifest normalization, esp in workspaces +// TODO(mkaput): Git & local paths +// TODO(mkaput): Symlinks and other FS shenanigans +// TODO(mkaput): Gitignore +// TODO(mkaput): Invalid readme/license path +// TODO(mkaput): Restricted Windows files + +#[test] +fn clean_tar_headers() { + let t = TempDir::new().unwrap(); + ProjectBuilder::start() + .name("foo") + .version("1.0.0") + .lib_cairo("mod foo;") + .src("src/foo.cairo", "fn foo() {}") + .build(&t); + + Scarb::quick_snapbox() + .arg("package") + .current_dir(&t) + .assert() + .success(); + + let mut archive = PackageChecker::open(&t.child("target/package/foo-1.0.0.tar.zstd")); + for entry in archive.entries().expect("failed to get archive entries") { + let entry = entry.expect("failed to get archive entry"); + println!("checking: {:?}", entry.path()); + let header = entry.header(); + assert_eq!(header.mode().unwrap(), 0o644); + assert_ne!(header.mtime().unwrap(), 0); + assert_eq!(header.username().unwrap().unwrap(), ""); + assert_eq!(header.groupname().unwrap().unwrap(), ""); + } +}