Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Some more small improvements #22

Merged
merged 5 commits into from
Mar 1, 2025
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion Cargo.toml
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
[package]
name = "s3glob"
version = "0.4.2"
edition = "2021"
edition = "2024"
authors = ["Brandon W Maister <[email protected]>"]
license = "MIT, APACHE-2.0"
repository = "https://github.com/quodlibetor/s3glob"
Expand Down
42 changes: 25 additions & 17 deletions src/glob_matcher.rs
Original file line number Diff line number Diff line change
@@ -1,10 +1,9 @@
//! A pattern is a glob that knows how to split itself into a prefix and join with a partial prefix
#![allow(dead_code)]

use std::collections::BTreeSet;
use std::sync::Arc;

use anyhow::{bail, Context as _, Result};
use anyhow::{Context as _, Result, bail};
use glob::Glob;
use globset::GlobMatcher;
use itertools::Itertools as _;
Expand Down Expand Up @@ -423,9 +422,26 @@ impl S3GlobMatcher {
.take(part_idx + 1)
.map(|p| p.raw())
.join("");
let max_prefixes = if tracing::enabled!(tracing::Level::DEBUG) {
usize::MAX
} else {
20
};
let and_more = if prefixes.len() > max_prefixes {
format!(
"\n ...and {} more (run with --verbose to see all)",
prefixes.len() - max_prefixes
)
} else {
String::new()
};

bail!(
"No existing prefixes matched the filter: {glob_so_far}\n {}",
prefixes.iter().join("\n ")
"Could not continue search in prefixes:\n {}{}\
\n\n\
None of the above matched the pattern:\n {glob_so_far}",
prefixes.iter().take(max_prefixes).join("\n "),
and_more,
);
}
}
Expand All @@ -444,7 +460,9 @@ impl S3GlobMatcher {

if prefixes.len() < self.min_prefixes && !self.is_complete {
let count = prefixes.len();
progressln!("\rDiscovered prefixes: {count:>5} -- see `s3glob help parallelism` if it feels like this run is too slow");
progressln!(
"\rDiscovered prefixes: {count:>5} -- see `s3glob help parallelism` if it feels like this run is too slow"
);
} else if self.is_complete {
// clear the previous output
progressln!(
Expand Down Expand Up @@ -520,14 +538,7 @@ async fn check_prefixes(
}

fn prefix_join(prefix: &str, alt: &str) -> String {
// minio doesn't support double forward slashes in the path
// https://github.com/minio/minio/issues/5874
// TODO: make this something the user can configure?
if prefix.ends_with('/') && alt.starts_with('/') {
format!("{prefix}{}", &alt[1..])
} else {
format!("{prefix}{alt}")
}
format!("{prefix}{alt}")
}

#[cfg(test)]
Expand Down Expand Up @@ -820,10 +831,7 @@ mod tests {
]);

let prefixes = scanner.find_prefixes(engine.clone()).await?.prefixes;
// TODO: there is a legitimate case that this should only be
// src/tmp/file, but we strip double forward slashes to work around
// minio
assert!(prefixes == vec!["src/file", "src/tmp/file"]);
assert!(prefixes == vec!["src/tmp/file"]);
let e: &[(&str, &str)] = &[];
engine.assert_calls(e);
Ok(())
Expand Down
9 changes: 2 additions & 7 deletions src/glob_matcher/engine.rs
Original file line number Diff line number Diff line change
Expand Up @@ -30,16 +30,11 @@ pub trait Engine: Send + Sync + 'static {
pub struct S3Engine {
client: Client,
bucket: String,
delimiter: String,
}

impl S3Engine {
pub fn new(client: Client, bucket: String, delimiter: String) -> Self {
Self {
client,
bucket,
delimiter,
}
pub fn new(client: Client, bucket: String) -> Self {
Self { client, bucket }
}
}

Expand Down
24 changes: 6 additions & 18 deletions src/glob_matcher/glob.rs
Original file line number Diff line number Diff line change
@@ -1,10 +1,8 @@
use anyhow::{anyhow, bail, Result};
use anyhow::{Result, anyhow, bail};
use itertools::Itertools as _;

use super::prefix_join;

use regex::Regex;

/// A single part of a glob pattern
///
/// Note that the compiled regexes are designed to match against an _entire_ path segment
Expand Down Expand Up @@ -57,11 +55,6 @@ impl Glob {
matches!(self, Glob::Choice { .. })
}

/// True if this is a `*`, `?`, or `[abc]`
pub(crate) fn is_any(&self) -> bool {
matches!(self, Glob::Any { .. })
}

/// True if this is a negated character class `[!abc]`
pub(crate) fn is_negated(&self) -> bool {
matches!(self, Glob::Any { not: Some(_), .. })
Expand Down Expand Up @@ -100,10 +93,6 @@ impl Glob {
}
}

pub(crate) fn re(&self, delimiter: &str) -> Regex {
Regex::new(&self.re_string(delimiter)).unwrap()
}

/// True if this glob is a literal part and ends with the delimiter
pub(crate) fn ends_with(&self, delimiter: &str) -> bool {
match self {
Expand Down Expand Up @@ -131,12 +120,11 @@ impl Glob {
}
}

pub(crate) fn may_have_delimiter(&self, delimiter: char) -> bool {
match self {
Glob::Any { .. } | Glob::SyntheticAny => false,
Glob::Choice { allowed, .. } => allowed.iter().any(|a| a.contains(delimiter)),
Glob::Recursive { .. } => true,
}
#[cfg(test)]
pub(crate) fn re(&self, delimiter: &str) -> regex::Regex {
use regex::Regex;

Regex::new(&self.re_string(delimiter)).unwrap()
}
}

Expand Down
22 changes: 12 additions & 10 deletions src/main.rs
Original file line number Diff line number Diff line change
@@ -1,25 +1,25 @@
use std::io::IsTerminal as _;
use std::path::PathBuf;
use std::sync::atomic::{AtomicUsize, Ordering};
use std::sync::Arc;
use std::sync::atomic::{AtomicUsize, Ordering};
use std::time::{Duration, Instant};

use anyhow::{anyhow, bail, Result};
use anyhow::{Result, anyhow, bail};
use aws_config::meta::region::RegionProviderChain;
use aws_sdk_s3::operation::head_object::HeadObjectOutput;
use aws_sdk_s3::primitives::DateTime;
use aws_sdk_s3::types::Object;
use aws_sdk_s3::{config::BehaviorVersion, config::Region, Client};
use aws_sdk_s3::{Client, config::BehaviorVersion, config::Region};
use clap::{ArgAction, Parser, Subcommand, ValueEnum};
use glob_matcher::{S3Engine, S3GlobMatcher, GLOB_CHARS};
use humansize::{FormatSizeOptions, SizeFormatter, DECIMAL};
use messaging::{MessageLevel, MESSAGE_LEVEL};
use glob_matcher::{GLOB_CHARS, S3Engine, S3GlobMatcher};
use humansize::{DECIMAL, FormatSizeOptions, SizeFormatter};
use messaging::{MESSAGE_LEVEL, MessageLevel};
use num_format::{Locale, ToFormattedString};
use regex::Regex;
use tokio::io::AsyncWriteExt as _;
use tokio::runtime::Runtime;
use tokio::sync::mpsc::{UnboundedReceiver, UnboundedSender};
use tokio::sync::Semaphore;
use tokio::sync::mpsc::{UnboundedReceiver, UnboundedSender};
use tracing::{debug, trace, warn};

mod glob_matcher;
Expand Down Expand Up @@ -321,7 +321,7 @@ async fn run(opts: Opts) -> Result<()> {

let client = create_s3_client(&opts, &bucket).await?;

let engine = S3Engine::new(client.clone(), bucket.clone(), opts.delimiter.to_string());
let engine = S3Engine::new(client.clone(), bucket.clone());
let mut matcher = S3GlobMatcher::parse(raw_pattern.clone(), &opts.delimiter.to_string())?;
matcher.set_max_parallelism(opts.max_parallelism);
let presult = match matcher.find_prefixes(engine).await {
Expand Down Expand Up @@ -493,7 +493,9 @@ async fn run(opts: Opts) -> Result<()> {
);
}
}
debug!("closing downloader tx");
if !matcher.is_complete() {
progressln!();
}
// close the tx so the downloaders know to finish
drop(dl);
drop(pools);
Expand All @@ -517,9 +519,9 @@ async fn run(opts: Opts) -> Result<()> {
pools.download_object(dl.fresh(), obj);
}
} else {
progressln!();
drop(ntfctn_tx);
}
progressln!();
let start_time = Instant::now();
let mut downloaded_matches = 0;
let mut total_bytes = 0_usize;
Expand Down
8 changes: 4 additions & 4 deletions tests/integration.rs
Original file line number Diff line number Diff line change
@@ -1,14 +1,14 @@
use assert_cmd::Command;
use assert_fs::prelude::*;
use assert_fs::TempDir;
use assert_fs::prelude::*;
use aws_sdk_s3::Client;
use aws_sdk_s3::config::{BehaviorVersion, Credentials, Region};
use aws_sdk_s3::primitives::ByteStream;
use aws_sdk_s3::Client;
use predicates::prelude::*;
use predicates::str::contains;
use rstest::rstest;
use testcontainers::core::logs::consumer::LogConsumer;
use testcontainers::core::logs::LogFrame;
use testcontainers::core::logs::consumer::LogConsumer;
use testcontainers::runners::AsyncRunner;
use testcontainers::{ContainerAsync, ImageExt};
use testcontainers_modules::minio::MinIO;
Expand Down Expand Up @@ -490,7 +490,7 @@ fn print_s3glob_output(cmd: &mut Command) {

use std::borrow::Cow;

use futures::{future::BoxFuture, FutureExt};
use futures::{FutureExt, future::BoxFuture};

/// A consumer that logs the output of container with the [`log`] crate.
///
Expand Down
Loading