diff --git a/Cargo.lock b/Cargo.lock index 5fca20a7118..5e99683fb47 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -185,6 +185,21 @@ dependencies = [ "syn 2.0.79", ] +[[package]] +name = "bit-set" +version = "0.5.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "0700ddab506f33b20a03b13996eccd309a48e5ff77d0d95926aa0210fb4e95f1" +dependencies = [ + "bit-vec", +] + +[[package]] +name = "bit-vec" +version = "0.6.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "349f9b6a179ed607305526ca489b34ad0a41aed5f7980fa90eb03160b69598fb" + [[package]] name = "bitflags" version = "1.3.2" @@ -850,16 +865,6 @@ version = "0.3.6" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "a357d28ed41a50f9c765dbfe56cbc04a64e53e5fc58ba79fbc34c10ef3df831f" -[[package]] -name = "env_logger" -version = "0.8.4" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "a19187fea3ac7e84da7dacf48de0c45d63c6a76f9490dae389aead16c243fce3" -dependencies = [ - "log", - "regex", -] - [[package]] name = "equivalent" version = "1.0.1" @@ -1548,6 +1553,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "071dfc062690e90b734c0b2273ce72ad0ffa95f0c74596bc250dcfd960262841" dependencies = [ "autocfg", + "libm", ] [[package]] @@ -1791,22 +1797,37 @@ dependencies = [ "hex", ] +[[package]] +name = "proptest" +version = "1.5.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "b4c2511913b88df1637da85cc8d96ec8e43a3f8bb8ccb71ee1ac240d6f3df58d" +dependencies = [ + "bit-set", + "bit-vec", + "bitflags 2.6.0", + "lazy_static", + "num-traits", + "rand", + "rand_chacha", + "rand_xorshift", + "regex-syntax", + "rusty-fork", + "tempfile", + "unarray", +] + [[package]] name = "quick-error" -version = "2.0.1" +version = "1.2.3" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "a993555f31e5a609f617c12db6250dedcac1b0a85076912c436e6fc9b2c8e6a3" +checksum = "a1d01941d82fa2ab50be1e79e6714289dd7cde78eba4c074bc5a4374f650dfe0" [[package]] -name = "quickcheck" -version = "1.0.3" +name = "quick-error" +version = "2.0.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "588f6378e4dd99458b60ec275b4477add41ce4fa9f64dcba6f15adccb19b50d6" -dependencies = [ - "env_logger", - "log", - "rand", -] +checksum = "a993555f31e5a609f617c12db6250dedcac1b0a85076912c436e6fc9b2c8e6a3" [[package]] name = "quote" @@ -1862,6 +1883,15 @@ dependencies = [ "rand_core", ] +[[package]] +name = "rand_xorshift" +version = "0.3.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "d25bf25ec5ae4a3f1b92f929810509a2f53d7dca2f50b794ff57e3face536c8f" +dependencies = [ + "rand_core", +] + [[package]] name = "rayon" version = "1.10.0" @@ -2030,6 +2060,18 @@ dependencies = [ "windows-sys 0.52.0", ] +[[package]] +name = "rusty-fork" +version = "0.3.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "cb3dcc6e454c328bb824492db107ab7c0ae8fcffe4ad210136ef014458c1bc4f" +dependencies = [ + "fnv", + "quick-error 1.2.3", + "tempfile", + "wait-timeout", +] + [[package]] name = "same-file" version = "1.0.6" @@ -2396,6 +2438,12 @@ version = "1.15.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "dcf81ac59edc17cc8697ff311e8f5ef2d99fcbd9817b34cec66f90b6c3dfd987" +[[package]] +name = "unarray" +version = "0.1.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "eaea85b334db583fe3274d12b4cd1880032beab409c0d774be044d4480ab9a94" + [[package]] name = "unicode-ident" version = "1.0.13" @@ -2476,6 +2524,7 @@ name = "uu_base32" version = "0.0.27" dependencies = [ "clap", + "proptest", "uucore", ] @@ -2483,6 +2532,7 @@ dependencies = [ name = "uu_base64" version = "0.0.27" dependencies = [ + "clap", "uu_base32", "uucore", ] @@ -2586,7 +2636,7 @@ dependencies = [ "filetime", "indicatif", "libc", - "quick-error", + "quick-error 2.0.1", "selinux", "uucore", "walkdir", @@ -2730,7 +2780,6 @@ dependencies = [ "num-bigint", "num-prime", "num-traits", - "quickcheck", "rand", "smallvec", "uucore", @@ -3035,7 +3084,7 @@ dependencies = [ "chrono", "clap", "itertools", - "quick-error", + "quick-error 2.0.1", "regex", "uucore", ] @@ -3533,6 +3582,15 @@ version = "0.9.4" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "49874b5167b65d7193b8aba1567f5c7d93d001cafc34600cee003eda787e483f" +[[package]] +name = "wait-timeout" +version = "0.2.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "9f200f5b12eb75f8c1ed65abd4b2db8a6e1b138a20de009dacee265a2498f3f6" +dependencies = [ + "libc", +] + [[package]] name = "walkdir" version = "2.5.0" diff --git a/src/uu/base32/Cargo.toml b/src/uu/base32/Cargo.toml index 152091aa3a6..abd39787359 100644 --- a/src/uu/base32/Cargo.toml +++ b/src/uu/base32/Cargo.toml @@ -1,3 +1,5 @@ +# spell-checker:ignore proptest + [package] name = "uu_base32" version = "0.0.27" @@ -20,6 +22,9 @@ path = "src/base32.rs" clap = { workspace = true } uucore = { workspace = true, features = ["encoding"] } +[dev-dependencies] +proptest = "1.5.0" + [[bin]] name = "base32" path = "src/main.rs" diff --git a/src/uu/base32/src/base32.rs b/src/uu/base32/src/base32.rs index 09250421c25..46a0361ea4a 100644 --- a/src/uu/base32/src/base32.rs +++ b/src/uu/base32/src/base32.rs @@ -3,13 +3,11 @@ // For the full copyright and license information, please view the LICENSE // file that was distributed with this source code. -use std::io::{stdin, Read}; +pub mod base_common; use clap::Command; use uucore::{encoding::Format, error::UResult, help_about, help_usage}; -pub mod base_common; - const ABOUT: &str = help_about!("base32.md"); const USAGE: &str = help_usage!("base32.md"); @@ -17,20 +15,11 @@ const USAGE: &str = help_usage!("base32.md"); pub fn uumain(args: impl uucore::Args) -> UResult<()> { let format = Format::Base32; - let config: base_common::Config = base_common::parse_base_cmd_args(args, ABOUT, USAGE)?; + let config = base_common::parse_base_cmd_args(args, ABOUT, USAGE)?; - // Create a reference to stdin so we can return a locked stdin from - // parse_base_cmd_args - let stdin_raw = stdin(); - let mut input: Box = base_common::get_input(&config, &stdin_raw)?; + let mut input = base_common::get_input(&config)?; - base_common::handle_input( - &mut input, - format, - config.wrap_cols, - config.ignore_garbage, - config.decode, - ) + base_common::handle_input(&mut input, format, config) } pub fn uu_app() -> Command { diff --git a/src/uu/base32/src/base_common.rs b/src/uu/base32/src/base_common.rs index 897722dd36e..f6b88f55157 100644 --- a/src/uu/base32/src/base_common.rs +++ b/src/uu/base32/src/base_common.rs @@ -3,27 +3,35 @@ // For the full copyright and license information, please view the LICENSE // file that was distributed with this source code. -use std::io::{stdout, Read, Write}; +// spell-checker:ignore hexupper lsbf msbf unpadded +use clap::{crate_version, Arg, ArgAction, Command}; +use std::fs::File; +use std::io::{self, ErrorKind, Read}; +use std::path::{Path, PathBuf}; use uucore::display::Quotable; -use uucore::encoding::{wrap_print, Data, EncodeError, Format}; +use uucore::encoding::{ + for_base_common::{BASE32, BASE32HEX, BASE64, BASE64URL, HEXUPPER}, + Format, Z85Wrapper, BASE2LSBF, BASE2MSBF, +}; +use uucore::encoding::{EncodingWrapper, SupportsFastDecodeAndEncode}; use uucore::error::{FromIo, UResult, USimpleError, UUsageError}; use uucore::format_usage; -use std::fs::File; -use std::io::{BufReader, Stdin}; -use std::path::Path; - -use clap::{crate_version, Arg, ArgAction, Command}; +pub const BASE_CMD_PARSE_ERROR: i32 = 1; -pub static BASE_CMD_PARSE_ERROR: i32 = 1; +/// Encoded output will be formatted in lines of this length (the last line can be shorter) +/// +/// Other implementations default to 76 +/// +/// This default is only used if no "-w"/"--wrap" argument is passed +pub const WRAP_DEFAULT: usize = 76; -// Config. pub struct Config { pub decode: bool, pub ignore_garbage: bool, pub wrap_cols: Option, - pub to_read: Option, + pub to_read: Option, } pub mod options { @@ -35,9 +43,10 @@ pub mod options { impl Config { pub fn from(options: &clap::ArgMatches) -> UResult { - let file: Option = match options.get_many::(options::FILE) { + let to_read = match options.get_many::(options::FILE) { Some(mut values) => { let name = values.next().unwrap(); + if let Some(extra_op) = values.next() { return Err(UUsageError::new( BASE_CMD_PARSE_ERROR, @@ -48,19 +57,22 @@ impl Config { if name == "-" { None } else { - if !Path::exists(Path::new(name)) { + let path = Path::new(name); + + if !path.exists() { return Err(USimpleError::new( BASE_CMD_PARSE_ERROR, - format!("{}: No such file or directory", name.maybe_quote()), + format!("{}: No such file or directory", path.maybe_quote()), )); } - Some(name.clone()) + + Some(path.to_owned()) } } None => None, }; - let cols = options + let wrap_cols = options .get_one::(options::WRAP) .map(|num| { num.parse::().map_err(|_| { @@ -75,8 +87,8 @@ impl Config { Ok(Self { decode: options.get_flag(options::DECODE), ignore_garbage: options.get_flag(options::IGNORE_GARBAGE), - wrap_cols: cols, - to_read: file, + wrap_cols, + to_read, }) } } @@ -118,7 +130,7 @@ pub fn base_app(about: &'static str, usage: &str) -> Command { .short('w') .long(options::WRAP) .value_name("COLS") - .help("wrap encoded lines after COLS character (default 76, 0 to disable wrapping)") + .help(format!("wrap encoded lines after COLS character (default {WRAP_DEFAULT}, 0 to disable wrapping)")) .overrides_with(options::WRAP), ) // "multiple" arguments are used to check whether there is more than one @@ -131,55 +143,619 @@ pub fn base_app(about: &'static str, usage: &str) -> Command { ) } -pub fn get_input<'a>(config: &Config, stdin_ref: &'a Stdin) -> UResult> { +pub fn get_input(config: &Config) -> UResult> { match &config.to_read { - Some(name) => { - let file_buf = - File::open(Path::new(name)).map_err_context(|| name.maybe_quote().to_string())?; - Ok(Box::new(BufReader::new(file_buf))) // as Box + Some(path_buf) => { + // Do not buffer input, because buffering is handled by `fast_decode` and `fast_encode` + let file = + File::open(path_buf).map_err_context(|| path_buf.maybe_quote().to_string())?; + + Ok(Box::new(file)) } None => { - Ok(Box::new(stdin_ref.lock())) // as Box + let stdin_lock = io::stdin().lock(); + + Ok(Box::new(stdin_lock)) } } } -pub fn handle_input( - input: &mut R, - format: Format, - line_wrap: Option, - ignore_garbage: bool, - decode: bool, -) -> UResult<()> { - let mut data = Data::new(input, format).ignore_garbage(ignore_garbage); - if let Some(wrap) = line_wrap { - data = data.line_wrap(wrap); +pub fn handle_input(input: &mut R, format: Format, config: Config) -> UResult<()> { + let supports_fast_decode_and_encode = get_supports_fast_decode_and_encode(format); + + let supports_fast_decode_and_encode_ref = supports_fast_decode_and_encode.as_ref(); + + let mut stdout_lock = io::stdout().lock(); + + if config.decode { + fast_decode::fast_decode( + input, + &mut stdout_lock, + supports_fast_decode_and_encode_ref, + config.ignore_garbage, + ) + } else { + fast_encode::fast_encode( + input, + &mut stdout_lock, + supports_fast_decode_and_encode_ref, + config.wrap_cols, + ) + } +} + +pub fn get_supports_fast_decode_and_encode(format: Format) -> Box { + const BASE16_VALID_DECODING_MULTIPLE: usize = 2; + const BASE2_VALID_DECODING_MULTIPLE: usize = 8; + const BASE32_VALID_DECODING_MULTIPLE: usize = 8; + const BASE64_VALID_DECODING_MULTIPLE: usize = 4; + + const BASE16_UNPADDED_MULTIPLE: usize = 1; + const BASE2_UNPADDED_MULTIPLE: usize = 1; + const BASE32_UNPADDED_MULTIPLE: usize = 5; + const BASE64_UNPADDED_MULTIPLE: usize = 3; + + match format { + Format::Base16 => Box::from(EncodingWrapper::new( + HEXUPPER, + BASE16_VALID_DECODING_MULTIPLE, + BASE16_UNPADDED_MULTIPLE, + // spell-checker:disable-next-line + b"0123456789ABCDEF", + )), + Format::Base2Lsbf => Box::from(EncodingWrapper::new( + BASE2LSBF, + BASE2_VALID_DECODING_MULTIPLE, + BASE2_UNPADDED_MULTIPLE, + // spell-checker:disable-next-line + b"01", + )), + Format::Base2Msbf => Box::from(EncodingWrapper::new( + BASE2MSBF, + BASE2_VALID_DECODING_MULTIPLE, + BASE2_UNPADDED_MULTIPLE, + // spell-checker:disable-next-line + b"01", + )), + Format::Base32 => Box::from(EncodingWrapper::new( + BASE32, + BASE32_VALID_DECODING_MULTIPLE, + BASE32_UNPADDED_MULTIPLE, + // spell-checker:disable-next-line + b"ABCDEFGHIJKLMNOPQRSTUVWXYZ234567=", + )), + Format::Base32Hex => Box::from(EncodingWrapper::new( + BASE32HEX, + BASE32_VALID_DECODING_MULTIPLE, + BASE32_UNPADDED_MULTIPLE, + // spell-checker:disable-next-line + b"0123456789ABCDEFGHIJKLMNOPQRSTUV=", + )), + Format::Base64 => Box::from(EncodingWrapper::new( + BASE64, + BASE64_VALID_DECODING_MULTIPLE, + BASE64_UNPADDED_MULTIPLE, + // spell-checker:disable-next-line + b"abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ0123456789=+/", + )), + Format::Base64Url => Box::from(EncodingWrapper::new( + BASE64URL, + BASE64_VALID_DECODING_MULTIPLE, + BASE64_UNPADDED_MULTIPLE, + // spell-checker:disable-next-line + b"abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ0123456789=_-", + )), + Format::Z85 => Box::from(Z85Wrapper {}), } +} + +pub mod fast_encode { + use crate::base_common::{format_read_error, WRAP_DEFAULT}; + use std::{ + collections::VecDeque, + io::{self, ErrorKind, Read, Write}, + num::NonZeroUsize, + }; + use uucore::{ + encoding::SupportsFastDecodeAndEncode, + error::{UResult, USimpleError}, + }; + + struct LineWrapping { + line_length: NonZeroUsize, + print_buffer: Vec, + } + + // Start of helper functions + fn encode_in_chunks_to_buffer( + supports_fast_decode_and_encode: &dyn SupportsFastDecodeAndEncode, + encode_in_chunks_of_size: usize, + bytes_to_steal: usize, + read_buffer: &[u8], + encoded_buffer: &mut VecDeque, + leftover_buffer: &mut VecDeque, + ) -> UResult<()> { + let bytes_to_chunk = if bytes_to_steal > 0 { + let (stolen_bytes, rest_of_read_buffer) = read_buffer.split_at(bytes_to_steal); + + leftover_buffer.extend(stolen_bytes); + + // After appending the stolen bytes to `leftover_buffer`, it should be the right size + assert!(leftover_buffer.len() == encode_in_chunks_of_size); + + // Encode the old unencoded data and the stolen bytes, and add the result to + // `encoded_buffer` + supports_fast_decode_and_encode + .encode_to_vec_deque(leftover_buffer.make_contiguous(), encoded_buffer)?; + + // Reset `leftover_buffer` + leftover_buffer.clear(); + + rest_of_read_buffer + } else { + // Do not need to steal bytes from `read_buffer` + read_buffer + }; + + let chunks_exact = bytes_to_chunk.chunks_exact(encode_in_chunks_of_size); + + let remainder = chunks_exact.remainder(); + + for sl in chunks_exact { + assert!(sl.len() == encode_in_chunks_of_size); + + supports_fast_decode_and_encode.encode_to_vec_deque(sl, encoded_buffer)?; + } + + leftover_buffer.extend(remainder); + + Ok(()) + } + + fn write_without_line_breaks( + encoded_buffer: &mut VecDeque, + output: &mut dyn Write, + is_cleanup: bool, + ) -> io::Result<()> { + // TODO + // `encoded_buffer` only has to be a VecDeque if line wrapping is enabled + // (`make_contiguous` should be a no-op here) + // Refactoring could avoid this call + output.write_all(encoded_buffer.make_contiguous())?; + + if is_cleanup { + output.write_all(b"\n")?; + } else { + encoded_buffer.clear(); + } - if decode { - match data.decode() { - Ok(s) => { - // Silent the warning as we want to the error message - #[allow(clippy::question_mark)] - if stdout().write_all(&s).is_err() { - // on windows console, writing invalid utf8 returns an error - return Err(USimpleError::new(1, "error: cannot write non-utf8 data")); + Ok(()) + } + + fn write_with_line_breaks( + &mut LineWrapping { + ref line_length, + ref mut print_buffer, + }: &mut LineWrapping, + encoded_buffer: &mut VecDeque, + output: &mut dyn Write, + is_cleanup: bool, + ) -> io::Result<()> { + let line_length = line_length.get(); + + let make_contiguous_result = encoded_buffer.make_contiguous(); + + let chunks_exact = make_contiguous_result.chunks_exact(line_length); + + let mut bytes_added_to_print_buffer = 0; + + for sl in chunks_exact { + bytes_added_to_print_buffer += sl.len(); + + print_buffer.extend_from_slice(sl); + print_buffer.push(b'\n'); + } + + output.write_all(print_buffer)?; + + // Remove the bytes that were just printed from `encoded_buffer` + drop(encoded_buffer.drain(..bytes_added_to_print_buffer)); + + if is_cleanup { + if encoded_buffer.is_empty() { + // Do not write a newline in this case, because two trailing newlines should never be printed + } else { + // Print the partial line, since this is cleanup and no more data is coming + output.write_all(encoded_buffer.make_contiguous())?; + output.write_all(b"\n")?; + } + } else { + print_buffer.clear(); + } + + Ok(()) + } + + fn write_to_output( + line_wrapping_option: &mut Option, + encoded_buffer: &mut VecDeque, + output: &mut dyn Write, + is_cleanup: bool, + ) -> io::Result<()> { + // Write all data in `encoded_buffer` to `output` + if let &mut Some(ref mut li) = line_wrapping_option { + write_with_line_breaks(li, encoded_buffer, output, is_cleanup)?; + } else { + write_without_line_breaks(encoded_buffer, output, is_cleanup)?; + } + + Ok(()) + } + // End of helper functions + + pub fn fast_encode( + input: &mut R, + mut output: W, + supports_fast_decode_and_encode: &dyn SupportsFastDecodeAndEncode, + wrap: Option, + ) -> UResult<()> { + // Based on performance testing + const INPUT_BUFFER_SIZE: usize = 32 * 1_024; + + const ENCODE_IN_CHUNKS_OF_SIZE_MULTIPLE: usize = 1_024; + + let encode_in_chunks_of_size = + supports_fast_decode_and_encode.unpadded_multiple() * ENCODE_IN_CHUNKS_OF_SIZE_MULTIPLE; + + assert!(encode_in_chunks_of_size > 0); + + // The "data-encoding" crate supports line wrapping, but not arbitrary line wrapping, only certain widths, so + // line wrapping must be handled here. + // https://github.com/ia0/data-encoding/blob/4f42ad7ef242f6d243e4de90cd1b46a57690d00e/lib/src/lib.rs#L1710 + let mut line_wrapping = match wrap { + // Line wrapping is disabled because "-w"/"--wrap" was passed with "0" + Some(0) => None, + // A custom line wrapping value was passed + Some(an) => Some(LineWrapping { + line_length: NonZeroUsize::new(an).unwrap(), + print_buffer: Vec::::new(), + }), + // Line wrapping was not set, so the default is used + None => Some(LineWrapping { + line_length: NonZeroUsize::new(WRAP_DEFAULT).unwrap(), + print_buffer: Vec::::new(), + }), + }; + + // Start of buffers + // Data that was read from `input` + let mut input_buffer = vec![0; INPUT_BUFFER_SIZE]; + + assert!(!input_buffer.is_empty()); + + // Data that was read from `input` but has not been encoded yet + let mut leftover_buffer = VecDeque::::new(); + + // Encoded data that needs to be written to `output` + let mut encoded_buffer = VecDeque::::new(); + // End of buffers + + loop { + match input.read(&mut input_buffer) { + Ok(bytes_read_from_input) => { + if bytes_read_from_input == 0 { + break; + } + + // The part of `input_buffer` that was actually filled by the call to `read` + let read_buffer = &input_buffer[..bytes_read_from_input]; + + // How many bytes to steal from `read_buffer` to get `leftover_buffer` to the right size + let bytes_to_steal = encode_in_chunks_of_size - leftover_buffer.len(); + + if bytes_to_steal > bytes_read_from_input { + // Do not have enough data to encode a chunk, so copy data to `leftover_buffer` and read more + leftover_buffer.extend(read_buffer); + + assert!(leftover_buffer.len() < encode_in_chunks_of_size); + + continue; + } + + // Encode data in chunks, then place it in `encoded_buffer` + encode_in_chunks_to_buffer( + supports_fast_decode_and_encode, + encode_in_chunks_of_size, + bytes_to_steal, + read_buffer, + &mut encoded_buffer, + &mut leftover_buffer, + )?; + + assert!(leftover_buffer.len() < encode_in_chunks_of_size); + + // Write all data in `encoded_buffer` to `output` + write_to_output(&mut line_wrapping, &mut encoded_buffer, &mut output, false)?; + } + Err(er) => { + let kind = er.kind(); + + if kind == ErrorKind::Interrupted { + // TODO + // Retry reading? + } + + return Err(USimpleError::new(1, format_read_error(kind))); } - Ok(()) } - Err(_) => Err(USimpleError::new(1, "error: invalid input")), } - } else { - match data.encode() { - Ok(s) => { - wrap_print(&data, &s); - Ok(()) + + // Cleanup + // `input` has finished producing data, so the data remaining in the buffers needs to be encoded and printed + { + // Encode all remaining unencoded bytes, placing them in `encoded_buffer` + supports_fast_decode_and_encode + .encode_to_vec_deque(leftover_buffer.make_contiguous(), &mut encoded_buffer)?; + + // Write all data in `encoded_buffer` to output + // `is_cleanup` triggers special cleanup-only logic + write_to_output(&mut line_wrapping, &mut encoded_buffer, &mut output, true)?; + } + + Ok(()) + } +} + +pub mod fast_decode { + use crate::base_common::format_read_error; + use std::io::{self, ErrorKind, Read, Write}; + use uucore::{ + encoding::SupportsFastDecodeAndEncode, + error::{UResult, USimpleError}, + }; + + // Start of helper functions + fn alphabet_to_table(alphabet: &[u8], ignore_garbage: bool) -> [bool; 256] { + // If `ignore_garbage` is enabled, all characters outside the alphabet are ignored + // If it is not enabled, only '\n' and '\r' are ignored + if ignore_garbage { + // Note: "false" here + let mut table = [false; 256]; + + // Pass through no characters except those in the alphabet + for ue in alphabet { + let us = usize::from(*ue); + + // Should not have been set yet + assert!(!table[us]); + + table[us] = true; + } + + table + } else { + // Note: "true" here + let mut table = [true; 256]; + + // Pass through all characters except '\n' and '\r' + for ue in [b'\n', b'\r'] { + let us = usize::from(ue); + + // Should not have been set yet + assert!(table[us]); + + table[us] = false; + } + + table + } + } + + fn decode_in_chunks_to_buffer( + supports_fast_decode_and_encode: &dyn SupportsFastDecodeAndEncode, + decode_in_chunks_of_size: usize, + bytes_to_steal: usize, + read_buffer_filtered: &[u8], + decoded_buffer: &mut Vec, + leftover_buffer: &mut Vec, + ) -> UResult<()> { + let bytes_to_chunk = if bytes_to_steal > 0 { + let (stolen_bytes, rest_of_read_buffer_filtered) = + read_buffer_filtered.split_at(bytes_to_steal); + + leftover_buffer.extend(stolen_bytes); + + // After appending the stolen bytes to `leftover_buffer`, it should be the right size + assert!(leftover_buffer.len() == decode_in_chunks_of_size); + + // Decode the old un-decoded data and the stolen bytes, and add the result to + // `decoded_buffer` + supports_fast_decode_and_encode.decode_into_vec(leftover_buffer, decoded_buffer)?; + + // Reset `leftover_buffer` + leftover_buffer.clear(); + + rest_of_read_buffer_filtered + } else { + // Do not need to steal bytes from `read_buffer` + read_buffer_filtered + }; + + let chunks_exact = bytes_to_chunk.chunks_exact(decode_in_chunks_of_size); + + let remainder = chunks_exact.remainder(); + + for sl in chunks_exact { + assert!(sl.len() == decode_in_chunks_of_size); + + supports_fast_decode_and_encode.decode_into_vec(sl, decoded_buffer)?; + } + + leftover_buffer.extend(remainder); + + Ok(()) + } + + fn write_to_output(decoded_buffer: &mut Vec, output: &mut dyn Write) -> io::Result<()> { + // Write all data in `decoded_buffer` to `output` + output.write_all(decoded_buffer.as_slice())?; + + decoded_buffer.clear(); + + Ok(()) + } + // End of helper functions + + pub fn fast_decode( + input: &mut R, + mut output: &mut W, + supports_fast_decode_and_encode: &dyn SupportsFastDecodeAndEncode, + ignore_garbage: bool, + ) -> UResult<()> { + // Based on performance testing + const INPUT_BUFFER_SIZE: usize = 32 * 1_024; + + const DECODE_IN_CHUNKS_OF_SIZE_MULTIPLE: usize = 1_024; + + let alphabet = supports_fast_decode_and_encode.alphabet(); + let decode_in_chunks_of_size = supports_fast_decode_and_encode.valid_decoding_multiple() + * DECODE_IN_CHUNKS_OF_SIZE_MULTIPLE; + + assert!(decode_in_chunks_of_size > 0); + + // Note that it's not worth using "data-encoding"'s ignore functionality if `ignore_garbage` is true, because + // "data-encoding"'s ignore functionality cannot discard non-ASCII bytes. The data has to be filtered before + // passing it to "data-encoding", so there is no point in doing any filtering in "data-encoding". This also + // allows execution to stay on the happy path in "data-encoding": + // https://github.com/ia0/data-encoding/blob/4f42ad7ef242f6d243e4de90cd1b46a57690d00e/lib/src/lib.rs#L754-L756 + // It is also not worth using "data-encoding"'s ignore functionality when `ignore_garbage` is + // false. + // Note that the alphabet constants above already include the padding characters + // TODO + // Precompute this + let table = alphabet_to_table(alphabet, ignore_garbage); + + // Start of buffers + // Data that was read from `input` + let mut input_buffer = vec![0; INPUT_BUFFER_SIZE]; + + assert!(!input_buffer.is_empty()); + + // Data that was read from `input` but has not been decoded yet + let mut leftover_buffer = Vec::::new(); + + // Decoded data that needs to be written to `output` + let mut decoded_buffer = Vec::::new(); + + // Buffer that will be used when `ignore_garbage` is true, and the chunk read from `input` contains garbage + // data + let mut non_garbage_buffer = Vec::::new(); + // End of buffers + + loop { + match input.read(&mut input_buffer) { + Ok(bytes_read_from_input) => { + if bytes_read_from_input == 0 { + break; + } + + let read_buffer_filtered = { + // The part of `input_buffer` that was actually filled by the call to `read` + let read_buffer = &input_buffer[..bytes_read_from_input]; + + // First just scan the data for the happy path + // Yields significant speedup when the input does not contain line endings + let found_garbage = read_buffer.iter().any(|ue| { + // Garbage, since it was not found in the table + !table[usize::from(*ue)] + }); + + if found_garbage { + non_garbage_buffer.clear(); + + for ue in read_buffer { + if table[usize::from(*ue)] { + // Not garbage, since it was found in the table + non_garbage_buffer.push(*ue); + } + } + + non_garbage_buffer.as_slice() + } else { + read_buffer + } + }; + + // How many bytes to steal from `read_buffer` to get `leftover_buffer` to the right size + let bytes_to_steal = decode_in_chunks_of_size - leftover_buffer.len(); + + if bytes_to_steal > read_buffer_filtered.len() { + // Do not have enough data to decode a chunk, so copy data to `leftover_buffer` and read more + leftover_buffer.extend(read_buffer_filtered); + + assert!(leftover_buffer.len() < decode_in_chunks_of_size); + + continue; + } + + // Decode data in chunks, then place it in `decoded_buffer` + decode_in_chunks_to_buffer( + supports_fast_decode_and_encode, + decode_in_chunks_of_size, + bytes_to_steal, + read_buffer_filtered, + &mut decoded_buffer, + &mut leftover_buffer, + )?; + + assert!(leftover_buffer.len() < decode_in_chunks_of_size); + + // Write all data in `decoded_buffer` to `output` + write_to_output(&mut decoded_buffer, &mut output)?; + } + Err(er) => { + let kind = er.kind(); + + if kind == ErrorKind::Interrupted { + // TODO + // Retry reading? + } + + return Err(USimpleError::new(1, format_read_error(kind))); + } } - Err(EncodeError::InvalidInput) => Err(USimpleError::new(1, "error: invalid input")), - Err(_) => Err(USimpleError::new( - 1, - "error: invalid input (length must be multiple of 4 characters)", - )), + } + + // Cleanup + // `input` has finished producing data, so the data remaining in the buffers needs to be decoded and printed + { + // Decode all remaining encoded bytes, placing them in `decoded_buffer` + supports_fast_decode_and_encode + .decode_into_vec(&leftover_buffer, &mut decoded_buffer)?; + + // Write all data in `decoded_buffer` to `output` + write_to_output(&mut decoded_buffer, &mut output)?; + } + + Ok(()) + } +} + +fn format_read_error(kind: ErrorKind) -> String { + let kind_string = kind.to_string(); + + // e.g. "is a directory" -> "Is a directory" + let mut kind_string_capitalized = String::with_capacity(kind_string.len()); + + for (index, ch) in kind_string.char_indices() { + if index == 0 { + for cha in ch.to_uppercase() { + kind_string_capitalized.push(cha); + } + } else { + kind_string_capitalized.push(ch); } } + + format!("read error: {kind_string_capitalized}") } diff --git a/src/uu/base32/tests/property_tests.rs b/src/uu/base32/tests/property_tests.rs new file mode 100644 index 00000000000..0f2393c42ab --- /dev/null +++ b/src/uu/base32/tests/property_tests.rs @@ -0,0 +1,430 @@ +// spell-checker:ignore lsbf msbf proptest + +use proptest::{prelude::TestCaseError, prop_assert, prop_assert_eq, test_runner::TestRunner}; +use std::io::Cursor; +use uu_base32::base_common::{fast_decode, fast_encode, get_supports_fast_decode_and_encode}; +use uucore::encoding::{Format, SupportsFastDecodeAndEncode}; + +const CASES: u32 = { + #[cfg(debug_assertions)] + { + 32 + } + + #[cfg(not(debug_assertions))] + { + 128 + } +}; + +const NORMAL_INPUT_SIZE_LIMIT: usize = { + #[cfg(debug_assertions)] + { + // 256 kibibytes + 256 * 1024 + } + + #[cfg(not(debug_assertions))] + { + // 4 mebibytes + 4 * 1024 * 1024 + } +}; + +const LARGE_INPUT_SIZE_LIMIT: usize = 4 * NORMAL_INPUT_SIZE_LIMIT; + +// Note that `TestRunner`s cannot be reused +fn get_test_runner() -> TestRunner { + TestRunner::new(proptest::test_runner::Config { + cases: CASES, + failure_persistence: None, + + ..proptest::test_runner::Config::default() + }) +} + +fn generic_round_trip(format: Format) { + let supports_fast_decode_and_encode = get_supports_fast_decode_and_encode(format); + + let supports_fast_decode_and_encode_ref = supports_fast_decode_and_encode.as_ref(); + + // Make sure empty inputs round trip + { + get_test_runner() + .run( + &( + proptest::bool::ANY, + proptest::bool::ANY, + proptest::option::of(0_usize..512_usize), + ), + |(ignore_garbage, line_wrap_zero, line_wrap)| { + configurable_round_trip( + format, + supports_fast_decode_and_encode_ref, + ignore_garbage, + line_wrap_zero, + line_wrap, + // Do not add garbage + Vec::<(usize, u8)>::new(), + // Empty input + Vec::::new(), + ) + }, + ) + .unwrap(); + } + + // Unusually large line wrapping settings + { + get_test_runner() + .run( + &( + proptest::bool::ANY, + proptest::bool::ANY, + proptest::option::of(512_usize..65_535_usize), + proptest::collection::vec(proptest::num::u8::ANY, 0..NORMAL_INPUT_SIZE_LIMIT), + ), + |(ignore_garbage, line_wrap_zero, line_wrap, input)| { + configurable_round_trip( + format, + supports_fast_decode_and_encode_ref, + ignore_garbage, + line_wrap_zero, + line_wrap, + // Do not add garbage + Vec::<(usize, u8)>::new(), + input, + ) + }, + ) + .unwrap(); + } + + // Spend more time on sane line wrapping settings + { + get_test_runner() + .run( + &( + proptest::bool::ANY, + proptest::bool::ANY, + proptest::option::of(0_usize..512_usize), + proptest::collection::vec(proptest::num::u8::ANY, 0..NORMAL_INPUT_SIZE_LIMIT), + ), + |(ignore_garbage, line_wrap_zero, line_wrap, input)| { + configurable_round_trip( + format, + supports_fast_decode_and_encode_ref, + ignore_garbage, + line_wrap_zero, + line_wrap, + // Do not add garbage + Vec::<(usize, u8)>::new(), + input, + ) + }, + ) + .unwrap(); + } + + // Test with garbage data + { + get_test_runner() + .run( + &( + proptest::bool::ANY, + proptest::bool::ANY, + proptest::option::of(0_usize..512_usize), + // Garbage data to insert + proptest::collection::vec( + ( + // Random index + proptest::num::usize::ANY, + // In all of the encodings being tested, non-ASCII bytes are garbage + 128_u8..=u8::MAX, + ), + 0..4_096, + ), + proptest::collection::vec(proptest::num::u8::ANY, 0..NORMAL_INPUT_SIZE_LIMIT), + ), + |(ignore_garbage, line_wrap_zero, line_wrap, garbage_data, input)| { + configurable_round_trip( + format, + supports_fast_decode_and_encode_ref, + ignore_garbage, + line_wrap_zero, + line_wrap, + garbage_data, + input, + ) + }, + ) + .unwrap(); + } + + // Test small inputs + { + get_test_runner() + .run( + &( + proptest::bool::ANY, + proptest::bool::ANY, + proptest::option::of(0_usize..512_usize), + proptest::collection::vec(proptest::num::u8::ANY, 0..1_024), + ), + |(ignore_garbage, line_wrap_zero, line_wrap, input)| { + configurable_round_trip( + format, + supports_fast_decode_and_encode_ref, + ignore_garbage, + line_wrap_zero, + line_wrap, + // Do not add garbage + Vec::<(usize, u8)>::new(), + input, + ) + }, + ) + .unwrap(); + } + + // Test small inputs with garbage data + { + get_test_runner() + .run( + &( + proptest::bool::ANY, + proptest::bool::ANY, + proptest::option::of(0_usize..512_usize), + // Garbage data to insert + proptest::collection::vec( + ( + // Random index + proptest::num::usize::ANY, + // In all of the encodings being tested, non-ASCII bytes are garbage + 128_u8..=u8::MAX, + ), + 0..1_024, + ), + proptest::collection::vec(proptest::num::u8::ANY, 0..1_024), + ), + |(ignore_garbage, line_wrap_zero, line_wrap, garbage_data, input)| { + configurable_round_trip( + format, + supports_fast_decode_and_encode_ref, + ignore_garbage, + line_wrap_zero, + line_wrap, + garbage_data, + input, + ) + }, + ) + .unwrap(); + } + + // Test large inputs + { + get_test_runner() + .run( + &( + proptest::bool::ANY, + proptest::bool::ANY, + proptest::option::of(0_usize..512_usize), + proptest::collection::vec(proptest::num::u8::ANY, 0..LARGE_INPUT_SIZE_LIMIT), + ), + |(ignore_garbage, line_wrap_zero, line_wrap, input)| { + configurable_round_trip( + format, + supports_fast_decode_and_encode_ref, + ignore_garbage, + line_wrap_zero, + line_wrap, + // Do not add garbage + Vec::<(usize, u8)>::new(), + input, + ) + }, + ) + .unwrap(); + } +} + +fn configurable_round_trip( + format: Format, + supports_fast_decode_and_encode: &dyn SupportsFastDecodeAndEncode, + ignore_garbage: bool, + line_wrap_zero: bool, + line_wrap: Option, + garbage_data: Vec<(usize, u8)>, + mut input: Vec, +) -> Result<(), TestCaseError> { + // Z85 only accepts inputs with lengths divisible by 4 + if let Format::Z85 = format { + // Reduce length of "input" until it is divisible by 4 + input.truncate((input.len() / 4) * 4); + + assert!((input.len() % 4) == 0); + } + + let line_wrap_to_use = if line_wrap_zero { Some(0) } else { line_wrap }; + + let input_len = input.len(); + + let garbage_data_len = garbage_data.len(); + + let garbage_data_is_empty = garbage_data_len == 0; + + let (input, encoded) = { + let mut output = Vec::with_capacity(input_len * 8); + + let mut cursor = Cursor::new(input); + + fast_encode::fast_encode( + &mut cursor, + &mut output, + supports_fast_decode_and_encode, + line_wrap_to_use, + ) + .unwrap(); + + (cursor.into_inner(), output) + }; + + let encoded_or_encoded_with_garbage = if garbage_data_is_empty { + encoded + } else { + let encoded_len = encoded.len(); + + let encoded_highest_index = match encoded_len.checked_sub(1) { + Some(0) | None => None, + Some(x) => Some(x), + }; + + let mut garbage_data_indexed = vec![Option::::None; encoded_len]; + + let mut encoded_with_garbage = Vec::::with_capacity(encoded_len + garbage_data_len); + + for (index, garbage_byte) in garbage_data { + if let Some(x) = encoded_highest_index { + let index_to_use = index % x; + + garbage_data_indexed[index_to_use] = Some(garbage_byte); + } else { + encoded_with_garbage.push(garbage_byte); + } + } + + for (index, encoded_byte) in encoded.into_iter().enumerate() { + encoded_with_garbage.push(encoded_byte); + + if let Some(garbage_byte) = garbage_data_indexed[index] { + encoded_with_garbage.push(garbage_byte); + } + } + + encoded_with_garbage + }; + + match line_wrap_to_use { + Some(0) => { + let line_endings_count = encoded_or_encoded_with_garbage + .iter() + .filter(|byte| **byte == b'\n') + .count(); + + // If line wrapping is disabled, there should only be one '\n' character (at the very end of the output) + prop_assert_eq!(line_endings_count, 1); + } + _ => { + // TODO + // Validate other line wrapping settings + } + } + + let decoded_or_error = { + let mut output = Vec::with_capacity(input_len); + + let mut cursor = Cursor::new(encoded_or_encoded_with_garbage); + + match fast_decode::fast_decode( + &mut cursor, + &mut output, + supports_fast_decode_and_encode, + ignore_garbage, + ) { + Ok(()) => Ok(output), + Err(er) => Err(er), + } + }; + + let made_round_trip = match decoded_or_error { + Ok(ve) => input.as_slice() == ve.as_slice(), + Err(_) => false, + }; + + let result_was_correct = if garbage_data_is_empty || ignore_garbage { + // If there was no garbage data added, or if "ignore_garbage" was enabled, expect the round trip to succeed + made_round_trip + } else { + // If garbage data was added, and "ignore_garbage" was disabled, expect the round trip to fail + + !made_round_trip + }; + + if !result_was_correct { + eprintln!( + "\ +(configurable_round_trip) FAILURE +format: {format:?} +ignore_garbage: {ignore_garbage} +line_wrap_to_use: {line_wrap_to_use:?} +garbage_data_len: {garbage_data_len} +input_len: {input_len} +", + ); + } + + prop_assert!(result_was_correct); + + Ok(()) +} + +#[test] +fn base16_round_trip() { + generic_round_trip(Format::Base16); +} + +#[test] +fn base2lsbf_round_trip() { + generic_round_trip(Format::Base2Lsbf); +} + +#[test] +fn base2msbf_round_trip() { + generic_round_trip(Format::Base2Msbf); +} + +#[test] +fn base32_round_trip() { + generic_round_trip(Format::Base32); +} + +#[test] +fn base32hex_round_trip() { + generic_round_trip(Format::Base32Hex); +} + +#[test] +fn base64_round_trip() { + generic_round_trip(Format::Base64); +} + +#[test] +fn base64url_round_trip() { + generic_round_trip(Format::Base64Url); +} + +#[test] +fn z85_round_trip() { + generic_round_trip(Format::Z85); +} diff --git a/src/uu/base64/Cargo.toml b/src/uu/base64/Cargo.toml index c65fe5b971e..5afc4283e6d 100644 --- a/src/uu/base64/Cargo.toml +++ b/src/uu/base64/Cargo.toml @@ -17,6 +17,7 @@ readme.workspace = true path = "src/base64.rs" [dependencies] +clap = { workspace = true } uucore = { workspace = true, features = ["encoding"] } uu_base32 = { workspace = true } diff --git a/src/uu/base64/src/base64.rs b/src/uu/base64/src/base64.rs index 6544638bdae..86eb75bf119 100644 --- a/src/uu/base64/src/base64.rs +++ b/src/uu/base64/src/base64.rs @@ -3,13 +3,10 @@ // For the full copyright and license information, please view the LICENSE // file that was distributed with this source code. +use clap::Command; use uu_base32::base_common; -pub use uu_base32::uu_app; - use uucore::{encoding::Format, error::UResult, help_about, help_usage}; -use std::io::{stdin, Read}; - const ABOUT: &str = help_about!("base64.md"); const USAGE: &str = help_usage!("base64.md"); @@ -17,18 +14,13 @@ const USAGE: &str = help_usage!("base64.md"); pub fn uumain(args: impl uucore::Args) -> UResult<()> { let format = Format::Base64; - let config: base_common::Config = base_common::parse_base_cmd_args(args, ABOUT, USAGE)?; + let config = base_common::parse_base_cmd_args(args, ABOUT, USAGE)?; - // Create a reference to stdin so we can return a locked stdin from - // parse_base_cmd_args - let stdin_raw = stdin(); - let mut input: Box = base_common::get_input(&config, &stdin_raw)?; + let mut input = base_common::get_input(&config)?; + + base_common::handle_input(&mut input, format, config) +} - base_common::handle_input( - &mut input, - format, - config.wrap_cols, - config.ignore_garbage, - config.decode, - ) +pub fn uu_app() -> Command { + base_common::base_app(ABOUT, USAGE) } diff --git a/src/uu/basenc/BENCHMARKING.md b/src/uu/basenc/BENCHMARKING.md new file mode 100644 index 00000000000..8248cbbc53b --- /dev/null +++ b/src/uu/basenc/BENCHMARKING.md @@ -0,0 +1,177 @@ + + +# Benchmarking base32, base64, and basenc + +Note that the functionality of the `base32` and `base64` programs is identical to that of the `basenc` program, using +the "--base32" and "--base64" options, respectively. For that reason, it is only necessary to benchmark `basenc`. + +To compare the runtime performance of the uutils implementation with the GNU Core Utilities implementation, you can +use a benchmarking tool like [hyperfine][0]. + +hyperfine currently does not measure maximum memory usage. Memory usage can be benchmarked using [poop][2], or +[toybox][3]'s "time" subcommand (both are Linux only). + +Build the `basenc` binary using the release profile: + +```Shell +cargo build --package uu_basenc --profile release +``` + +## Expected performance + +uutils' `basenc` performs streaming decoding and encoding, and therefore should perform all operations with a constant +maximum memory usage, regardless of the size of the input. Release builds currently use less than 3 mebibytes of +memory, and memory usage greater than 10 mebibytes should be considered a bug. + +As of September 2024, uutils' `basenc` has runtime performance equal to or superior to GNU Core Utilities' `basenc` in +in most scenarios. uutils' `basenc` uses slightly more memory, but given how small these quantities are in absolute +terms (see above), this is highly unlikely to be practically relevant to users. + +## Benchmark results (2024-09-27) + +### Setup + +```Shell +# Use uutils' dd to create a 1 gibibyte in-memory file filled with random bytes (Linux only). +# On other platforms, you can use /tmp instead of /dev/shm, but note that /tmp is not guaranteed to be in-memory. +coreutils dd if=/dev/urandom of=/dev/shm/one-random-gibibyte bs=1024 count=1048576 + +# Encode this file for use in decoding performance testing +/usr/bin/basenc --base32hex -- /dev/shm/one-random-gibibyte 1>/dev/shm/one-random-gibibyte-base32hex-encoded +/usr/bin/basenc --z85 -- /dev/shm/one-random-gibibyte 1>/dev/shm/one-random-gibibyte-z85-encoded +``` + +### Programs being tested + +uutils' `basenc`: + +``` +❯ git rev-list HEAD | coreutils head -n 1 -- - +a0718ef0ffd50539a2e2bc0095c9fadcd70ab857 +``` + +GNU Core Utilities' `basenc`: + +``` +❯ /usr/bin/basenc --version | coreutils head -n 1 -- - +basenc (GNU coreutils) 9.4 +``` + +### Encoding performance + +#### "--base64", default line wrapping (76 characters) + +➕ Faster than GNU Core Utilities + +``` +❯ hyperfine \ + --sort \ + command \ + -- \ + '/usr/bin/basenc --base64 -- /dev/shm/one-random-gibibyte 1>/dev/null' \ + './target/release/basenc --base64 -- /dev/shm/one-random-gibibyte 1>/dev/null' + +Benchmark 1: /usr/bin/basenc --base64 -- /dev/shm/one-random-gibibyte 1>/dev/null + Time (mean ± σ): 965.1 ms ± 7.9 ms [User: 766.2 ms, System: 193.4 ms] + Range (min … max): 950.2 ms … 976.9 ms 10 runs + +Benchmark 2: ./target/release/basenc --base64 -- /dev/shm/one-random-gibibyte 1>/dev/null + Time (mean ± σ): 696.6 ms ± 9.1 ms [User: 574.9 ms, System: 117.3 ms] + Range (min … max): 683.1 ms … 713.5 ms 10 runs + +Relative speed comparison + 1.39 ± 0.02 /usr/bin/basenc --base64 -- /dev/shm/one-random-gibibyte 1>/dev/null + 1.00 ./target/release/basenc --base64 -- /dev/shm/one-random-gibibyte 1>/dev/null +``` + +#### "--base16", no line wrapping + +➖ Slower than GNU Core Utilities + +``` +❯ poop \ + '/usr/bin/basenc --base16 --wrap 0 -- /dev/shm/one-random-gibibyte' \ + './target/release/basenc --base16 --wrap 0 -- /dev/shm/one-random-gibibyte' + +Benchmark 1 (6 runs): /usr/bin/basenc --base16 --wrap 0 -- /dev/shm/one-random-gibibyte + measurement mean ± σ min … max outliers delta + wall_time 836ms ± 13.3ms 822ms … 855ms 0 ( 0%) 0% + peak_rss 2.05MB ± 73.0KB 1.94MB … 2.12MB 0 ( 0%) 0% + cpu_cycles 2.85G ± 32.8M 2.82G … 2.91G 0 ( 0%) 0% + instructions 14.0G ± 58.7 14.0G … 14.0G 0 ( 0%) 0% + cache_references 70.0M ± 6.48M 63.7M … 78.8M 0 ( 0%) 0% + cache_misses 582K ± 172K 354K … 771K 0 ( 0%) 0% + branch_misses 667K ± 4.55K 662K … 674K 0 ( 0%) 0% +Benchmark 2 (6 runs): ./target/release/basenc --base16 --wrap 0 -- /dev/shm/one-random-gibibyte + measurement mean ± σ min … max outliers delta + wall_time 884ms ± 6.38ms 878ms … 895ms 0 ( 0%) 💩+ 5.7% ± 1.6% + peak_rss 2.65MB ± 66.8KB 2.55MB … 2.74MB 0 ( 0%) 💩+ 29.3% ± 4.4% + cpu_cycles 3.15G ± 8.61M 3.14G … 3.16G 0 ( 0%) 💩+ 10.6% ± 1.1% + instructions 10.5G ± 275 10.5G … 10.5G 0 ( 0%) ⚡- 24.9% ± 0.0% + cache_references 93.5M ± 6.10M 87.2M … 104M 0 ( 0%) 💩+ 33.7% ± 11.6% + cache_misses 415K ± 52.3K 363K … 474K 0 ( 0%) - 28.8% ± 28.0% + branch_misses 1.43M ± 4.82K 1.42M … 1.43M 0 ( 0%) 💩+113.9% ± 0.9% +``` + +### Decoding performance + +#### "--base32hex" + +➕ Faster than GNU Core Utilities + +``` +❯ hyperfine \ + --sort \ + command \ + -- \ + '/usr/bin/basenc --base32hex --decode -- /dev/shm/one-random-gibibyte-base32hex-encoded 1>/dev/null' \ + './target/release/basenc --base32hex --decode -- /dev/shm/one-random-gibibyte-base32hex-encoded 1>/dev/null' + +Benchmark 1: /usr/bin/basenc --base32hex --decode -- /dev/shm/one-random-gibibyte-base32hex-encoded 1>/dev/null + Time (mean ± σ): 7.154 s ± 0.082 s [User: 6.802 s, System: 0.323 s] + Range (min … max): 7.051 s … 7.297 s 10 runs + +Benchmark 2: ./target/release/basenc --base32hex --decode -- /dev/shm/one-random-gibibyte-base32hex-encoded 1>/dev/null + Time (mean ± σ): 2.679 s ± 0.025 s [User: 2.446 s, System: 0.221 s] + Range (min … max): 2.649 s … 2.718 s 10 runs + +Relative speed comparison + 2.67 ± 0.04 /usr/bin/basenc --base32hex --decode -- /dev/shm/one-random-gibibyte-base32hex-encoded 1>/dev/null + 1.00 ./target/release/basenc --base32hex --decode -- /dev/shm/one-random-gibibyte-base32hex-encoded 1>/dev/null +``` + +#### "--z85", with "--ignore-garbage" + +➕ Faster than GNU Core Utilities + +``` +❯ poop \ + '/usr/bin/basenc --decode --ignore-garbage --z85 -- /dev/shm/one-random-gibibyte-z85-encoded' \ + './target/release/basenc --decode --ignore-garbage --z85 -- /dev/shm/one-random-gibibyte-z85-encoded' + +Benchmark 1 (3 runs): /usr/bin/basenc --decode --ignore-garbage --z85 -- /dev/shm/one-random-gibibyte-z85-encoded + measurement mean ± σ min … max outliers delta + wall_time 14.4s ± 68.4ms 14.3s … 14.4s 0 ( 0%) 0% + peak_rss 1.98MB ± 10.8KB 1.97MB … 1.99MB 0 ( 0%) 0% + cpu_cycles 58.4G ± 211M 58.3G … 58.7G 0 ( 0%) 0% + instructions 74.7G ± 64.0 74.7G … 74.7G 0 ( 0%) 0% + cache_references 41.8M ± 624K 41.2M … 42.4M 0 ( 0%) 0% + cache_misses 693K ± 118K 567K … 802K 0 ( 0%) 0% + branch_misses 1.24G ± 183K 1.24G … 1.24G 0 ( 0%) 0% +Benchmark 2 (3 runs): ./target/release/basenc --decode --ignore-garbage --z85 -- /dev/shm/one-random-gibibyte-z85-encoded + measurement mean ± σ min … max outliers delta + wall_time 2.80s ± 17.9ms 2.79s … 2.82s 0 ( 0%) ⚡- 80.5% ± 0.8% + peak_rss 2.61MB ± 67.4KB 2.57MB … 2.69MB 0 ( 0%) 💩+ 31.9% ± 5.5% + cpu_cycles 10.8G ± 27.9M 10.8G … 10.9G 0 ( 0%) ⚡- 81.5% ± 0.6% + instructions 39.0G ± 353 39.0G … 39.0G 0 ( 0%) ⚡- 47.7% ± 0.0% + cache_references 114M ± 2.43M 112M … 116M 0 ( 0%) 💩+173.3% ± 9.6% + cache_misses 1.06M ± 288K 805K … 1.37M 0 ( 0%) + 52.6% ± 72.0% + branch_misses 1.18M ± 14.7K 1.16M … 1.19M 0 ( 0%) ⚡- 99.9% ± 0.0% +``` + +[0]: https://github.com/sharkdp/hyperfine +[1]: https://github.com/sharkdp/hyperfine?tab=readme-ov-file#installation +[2]: https://github.com/andrewrk/poop +[3]: https://landley.net/toybox/ diff --git a/src/uu/basenc/src/basenc.rs b/src/uu/basenc/src/basenc.rs index ed117b22a0d..2de1223f4a1 100644 --- a/src/uu/basenc/src/basenc.rs +++ b/src/uu/basenc/src/basenc.rs @@ -3,19 +3,15 @@ // For the full copyright and license information, please view the LICENSE // file that was distributed with this source code. -//spell-checker:ignore (args) lsbf msbf +// spell-checker:ignore lsbf msbf use clap::{Arg, ArgAction, Command}; use uu_base32::base_common::{self, Config, BASE_CMD_PARSE_ERROR}; - +use uucore::error::UClapError; use uucore::{ encoding::Format, error::{UResult, UUsageError}, }; - -use std::io::{stdin, Read}; -use uucore::error::UClapError; - use uucore::{help_about, help_usage}; const ABOUT: &str = help_about!("basenc.md"); @@ -81,16 +77,8 @@ fn parse_cmd_args(args: impl uucore::Args) -> UResult<(Config, Format)> { #[uucore::main] pub fn uumain(args: impl uucore::Args) -> UResult<()> { let (config, format) = parse_cmd_args(args)?; - // Create a reference to stdin so we can return a locked stdin from - // parse_base_cmd_args - let stdin_raw = stdin(); - let mut input: Box = base_common::get_input(&config, &stdin_raw)?; - base_common::handle_input( - &mut input, - format, - config.wrap_cols, - config.ignore_garbage, - config.decode, - ) + let mut input = base_common::get_input(&config)?; + + base_common::handle_input(&mut input, format, config) } diff --git a/src/uu/cksum/src/cksum.rs b/src/uu/cksum/src/cksum.rs index 7ba9f78de6a..3b1057d7015 100644 --- a/src/uu/cksum/src/cksum.rs +++ b/src/uu/cksum/src/cksum.rs @@ -111,8 +111,7 @@ where OutputFormat::Hexadecimal => sum_hex, OutputFormat::Base64 => match options.algo_name { ALGORITHM_OPTIONS_CRC | ALGORITHM_OPTIONS_SYSV | ALGORITHM_OPTIONS_BSD => sum_hex, - _ => encoding::encode(encoding::Format::Base64, &hex::decode(sum_hex).unwrap()) - .unwrap(), + _ => encoding::for_cksum::BASE64.encode(&hex::decode(sum_hex).unwrap()), }, }; // The BSD checksum output is 5 digit integer diff --git a/src/uu/factor/Cargo.toml b/src/uu/factor/Cargo.toml index 49e836befa3..e28db8e6377 100644 --- a/src/uu/factor/Cargo.toml +++ b/src/uu/factor/Cargo.toml @@ -26,9 +26,6 @@ uucore = { workspace = true } num-bigint = { workspace = true } num-prime = { workspace = true } -[dev-dependencies] -quickcheck = "1.0.3" - [[bin]] name = "factor" path = "src/main.rs" diff --git a/src/uucore/Cargo.toml b/src/uucore/Cargo.toml index dd48d1b4fcf..4774180e7eb 100644 --- a/src/uucore/Cargo.toml +++ b/src/uucore/Cargo.toml @@ -77,7 +77,7 @@ default = [] backup-control = [] colors = [] checksum = ["data-encoding", "thiserror", "regex", "sum"] -encoding = ["data-encoding", "data-encoding-macro", "z85", "thiserror"] +encoding = ["data-encoding", "data-encoding-macro", "z85"] entries = ["libc"] fs = ["dunce", "libc", "winapi-util", "windows-sys"] fsext = ["libc", "windows-sys"] diff --git a/src/uucore/src/lib/features/encoding.rs b/src/uucore/src/lib/features/encoding.rs index 6a8e5ba221c..b9150114b8a 100644 --- a/src/uucore/src/lib/features/encoding.rs +++ b/src/uucore/src/lib/features/encoding.rs @@ -3,35 +3,24 @@ // For the full copyright and license information, please view the LICENSE // file that was distributed with this source code. -// spell-checker:ignore (strings) ABCDEFGHIJKLMNOPQRSTUVWXYZ ABCDEFGHIJKLMNOPQRSTUV -// spell-checker:ignore (encodings) lsbf msbf hexupper +// spell-checker:ignore (encodings) lsbf msbf +// spell-checker:ignore unpadded -use std::io::{self, Read, Write}; - -use data_encoding::{Encoding, BASE32, BASE32HEX, BASE64, BASE64URL, HEXUPPER}; +use crate::error::{UResult, USimpleError}; +use data_encoding::Encoding; use data_encoding_macro::new_encoding; -#[cfg(feature = "thiserror")] -use thiserror::Error; - -#[derive(Debug, Error)] -pub enum DecodeError { - #[error("{}", _0)] - Decode(#[from] data_encoding::DecodeError), - #[error("{}", _0)] - DecodeZ85(#[from] z85::DecodeError), - #[error("{}", _0)] - Io(#[from] io::Error), -} +use std::collections::VecDeque; -#[derive(Debug)] -pub enum EncodeError { - Z85InputLenNotMultipleOf4, - InvalidInput, +// Re-export for the faster decoding/encoding logic +pub mod for_base_common { + pub use data_encoding::*; } -pub type DecodeResult = Result, DecodeError>; +pub mod for_cksum { + pub use data_encoding::BASE64; +} -#[derive(Clone, Copy)] +#[derive(Clone, Copy, Debug)] pub enum Format { Base64, Base64Url, @@ -42,138 +31,182 @@ pub enum Format { Base2Msbf, Z85, } -use self::Format::*; -const BASE2LSBF: Encoding = new_encoding! { +pub const BASE2LSBF: Encoding = new_encoding! { symbols: "01", bit_order: LeastSignificantFirst, }; -const BASE2MSBF: Encoding = new_encoding! { + +pub const BASE2MSBF: Encoding = new_encoding! { symbols: "01", bit_order: MostSignificantFirst, }; -pub fn encode(f: Format, input: &[u8]) -> Result { - Ok(match f { - Base32 => BASE32.encode(input), - Base64 => BASE64.encode(input), - Base64Url => BASE64URL.encode(input), - Base32Hex => BASE32HEX.encode(input), - Base16 => HEXUPPER.encode(input), - Base2Lsbf => BASE2LSBF.encode(input), - Base2Msbf => BASE2MSBF.encode(input), - Z85 => { - // According to the spec we should not accept inputs whose len is not a multiple of 4. - // However, the z85 crate implements a padded encoding and accepts such inputs. We have to manually check for them. - if input.len() % 4 == 0 { - z85::encode(input) - } else { - return Err(EncodeError::Z85InputLenNotMultipleOf4); - } - } - }) -} +pub struct Z85Wrapper {} -pub fn decode(f: Format, input: &[u8]) -> DecodeResult { - Ok(match f { - Base32 => BASE32.decode(input)?, - Base64 => BASE64.decode(input)?, - Base64Url => BASE64URL.decode(input)?, - Base32Hex => BASE32HEX.decode(input)?, - Base16 => HEXUPPER.decode(input)?, - Base2Lsbf => BASE2LSBF.decode(input)?, - Base2Msbf => BASE2MSBF.decode(input)?, - Z85 => { - // The z85 crate implements a padded encoding by using a leading '#' which is otherwise not allowed. - // We manually check for a leading '#' and return an error ourselves. - if input.starts_with(b"#") { - return Err(z85::DecodeError::InvalidByte(0, b'#').into()); - } else { - z85::decode(input)? - } - } - }) +pub struct EncodingWrapper { + pub alphabet: &'static [u8], + pub encoding: Encoding, + pub unpadded_multiple: usize, + pub valid_decoding_multiple: usize, } -pub struct Data { - line_wrap: usize, - ignore_garbage: bool, - input: R, - format: Format, - alphabet: &'static [u8], -} +impl EncodingWrapper { + pub fn new( + encoding: Encoding, + valid_decoding_multiple: usize, + unpadded_multiple: usize, + alphabet: &'static [u8], + ) -> Self { + assert!(valid_decoding_multiple > 0); + + assert!(unpadded_multiple > 0); + + assert!(!alphabet.is_empty()); -impl Data { - pub fn new(input: R, format: Format) -> Self { Self { - line_wrap: 76, - ignore_garbage: false, - input, - format, - alphabet: match format { - Base32 => b"ABCDEFGHIJKLMNOPQRSTUVWXYZ234567=", - Base64 => b"abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ0123456789=+/", - Base64Url => b"abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ0123456789=_-", - Base32Hex => b"0123456789ABCDEFGHIJKLMNOPQRSTUV=", - Base16 => b"0123456789ABCDEF", - Base2Lsbf => b"01", - Base2Msbf => b"01", - Z85 => b"0123456789abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ.-:+=^!/*?&<>()[]{}@%$#", - }, + alphabet, + encoding, + unpadded_multiple, + valid_decoding_multiple, } } +} - #[must_use] - pub fn line_wrap(mut self, wrap: usize) -> Self { - self.line_wrap = wrap; - self - } +pub trait SupportsFastDecodeAndEncode { + /// Returns the list of characters used by this encoding + fn alphabet(&self) -> &'static [u8]; + + fn decode_into_vec(&self, input: &[u8], output: &mut Vec) -> UResult<()>; + + fn encode_to_vec_deque(&self, input: &[u8], output: &mut VecDeque) -> UResult<()>; + + /// Inputs with a length that is a multiple of this number do not have padding when encoded. For instance: + /// + /// "The quick brown" + /// + /// is 15 characters (divisible by 3), so it is encoded in Base64 without padding: + /// + /// "VGhlIHF1aWNrIGJyb3du" + /// + /// While: + /// + /// "The quick brown fox" + /// + /// is 19 characters, which is not divisible by 3, so its Base64 representation has padding: + /// + /// "VGhlIHF1aWNrIGJyb3duIGZveA==" + /// + /// The encoding performed by `fast_encode` depends on this number being correct. + fn unpadded_multiple(&self) -> usize; + + /// Data to decode must be a length that is multiple of this number + /// + /// The decoding performed by `fast_decode` depends on this number being correct. + fn valid_decoding_multiple(&self) -> usize; +} - #[must_use] - pub fn ignore_garbage(mut self, ignore: bool) -> Self { - self.ignore_garbage = ignore; - self +impl SupportsFastDecodeAndEncode for Z85Wrapper { + fn alphabet(&self) -> &'static [u8] { + // Z85 alphabet + b"0123456789abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ.-:+=^!/*?&<>()[]{}@%$#" } - pub fn decode(&mut self) -> DecodeResult { - let mut buf = vec![]; - self.input.read_to_end(&mut buf)?; - if self.ignore_garbage { - buf.retain(|c| self.alphabet.contains(c)); - } else { - buf.retain(|&c| c != b'\r' && c != b'\n'); + fn decode_into_vec(&self, input: &[u8], output: &mut Vec) -> UResult<()> { + if input.first() == Some(&b'#') { + return Err(USimpleError::new(1, "error: invalid input".to_owned())); + } + + let decode_result = match z85::decode(input) { + Ok(ve) => ve, + Err(_de) => { + return Err(USimpleError::new(1, "error: invalid input".to_owned())); + } }; - decode(self.format, &buf) + + output.extend_from_slice(&decode_result); + + Ok(()) + } + + fn valid_decoding_multiple(&self) -> usize { + 5 } - pub fn encode(&mut self) -> Result { - let mut buf: Vec = vec![]; - match self.input.read_to_end(&mut buf) { - Ok(_) => encode(self.format, buf.as_slice()), - Err(_) => Err(EncodeError::InvalidInput), + fn encode_to_vec_deque(&self, input: &[u8], output: &mut VecDeque) -> UResult<()> { + // According to the spec we should not accept inputs whose len is not a multiple of 4. + // However, the z85 crate implements a padded encoding and accepts such inputs. We have to manually check for them. + if input.len() % 4 != 0 { + return Err(USimpleError::new( + 1, + "error: invalid input (length must be multiple of 4 characters)".to_owned(), + )); } + + let string = z85::encode(input); + + output.extend(string.as_bytes()); + + Ok(()) } -} -// NOTE: this will likely be phased out at some point -pub fn wrap_print(data: &Data, res: &str) { - let stdout = io::stdout(); - wrap_write(stdout.lock(), data.line_wrap, res).unwrap(); + fn unpadded_multiple(&self) -> usize { + 4 + } } -pub fn wrap_write(mut writer: W, line_wrap: usize, res: &str) -> io::Result<()> { - use std::cmp::min; +impl SupportsFastDecodeAndEncode for EncodingWrapper { + fn alphabet(&self) -> &'static [u8] { + self.alphabet + } + + // Adapted from `decode` in the "data-encoding" crate + fn decode_into_vec(&self, input: &[u8], output: &mut Vec) -> UResult<()> { + let decode_len_result = match self.encoding.decode_len(input.len()) { + Ok(us) => us, + Err(_de) => { + return Err(USimpleError::new(1, "error: invalid input".to_owned())); + } + }; + + let output_len = output.len(); + + output.resize(output_len + decode_len_result, 0); - if line_wrap == 0 { - return write!(writer, "{res}"); + match self.encoding.decode_mut(input, &mut (output[output_len..])) { + Ok(us) => { + // See: + // https://docs.rs/data-encoding/latest/data_encoding/struct.Encoding.html#method.decode_mut + // "Returns the length of the decoded output. This length may be smaller than the output length if the input contained padding or ignored characters. The output bytes after the returned length are not initialized and should not be read." + output.truncate(output_len + us); + } + Err(_de) => { + return Err(USimpleError::new(1, "error: invalid input".to_owned())); + } + } + + Ok(()) } - let mut start = 0; - while start < res.len() { - let end = min(start + line_wrap, res.len()); - writeln!(writer, "{}", &res[start..end])?; - start = end; + fn valid_decoding_multiple(&self) -> usize { + self.valid_decoding_multiple } - Ok(()) + // Adapted from `encode_append` in the "data-encoding" crate + fn encode_to_vec_deque(&self, input: &[u8], output: &mut VecDeque) -> UResult<()> { + let output_len = output.len(); + + output.resize(output_len + self.encoding.encode_len(input.len()), 0); + + let make_contiguous_result = output.make_contiguous(); + + self.encoding + .encode_mut(input, &mut (make_contiguous_result[output_len..])); + + Ok(()) + } + + fn unpadded_multiple(&self) -> usize { + self.unpadded_multiple + } } diff --git a/tests/by-util/test_base64.rs b/tests/by-util/test_base64.rs index 403fd7db86a..f07da925f5b 100644 --- a/tests/by-util/test_base64.rs +++ b/tests/by-util/test_base64.rs @@ -146,3 +146,77 @@ fn test_base64_file_not_found() { .fails() .stderr_only("base64: a.txt: No such file or directory\n"); } + +#[test] +fn test_no_repeated_trailing_newline() { + new_ucmd!() + .args(&["--wrap", "10", "--", "-"]) + .pipe_in("The quick brown fox jumps over the lazy dog.") + .succeeds() + .stdout_only( + // cSpell:disable + "\ +VGhlIHF1aW +NrIGJyb3du +IGZveCBqdW +1wcyBvdmVy +IHRoZSBsYX +p5IGRvZy4= +", + // cSpell:enable + ); +} + +#[test] +fn test_wrap_default() { + const PIPE_IN: &str = "The quick brown fox jumps over the lazy dog. The quick brown fox jumps over the lazy dog. The quick brown fox jumps over the lazy dog."; + + new_ucmd!() + .args(&["--", "-"]) + .pipe_in(PIPE_IN) + .succeeds() + .stdout_only( + // cSpell:disable + "\ +VGhlIHF1aWNrIGJyb3duIGZveCBqdW1wcyBvdmVyIHRoZSBsYXp5IGRvZy4gVGhlIHF1aWNrIGJy +b3duIGZveCBqdW1wcyBvdmVyIHRoZSBsYXp5IGRvZy4gVGhlIHF1aWNrIGJyb3duIGZveCBqdW1w +cyBvdmVyIHRoZSBsYXp5IGRvZy4= +", + // cSpell:enable + ); +} + +// Prevent regression to: +// +// ❯ coreutils manpage base64 | rg --fixed-strings -- 'base32' +// The data are encoded as described for the base32 alphabet in RFC 4648. +// to the bytes of the formal base32 alphabet. Use \-\-ignore\-garbage +// The data are encoded as described for the base32 alphabet in RFC 4648. +// to the bytes of the formal base32 alphabet. Use \-\-ignore\-garbage +#[test] +fn test_manpage() { + use std::process::{Command, Stdio}; + + let test_scenario = TestScenario::new(""); + + let child = Command::new(test_scenario.bin_path) + .arg("manpage") + .arg("base64") + .stdin(Stdio::piped()) + .stdout(Stdio::piped()) + .stderr(Stdio::piped()) + .spawn() + .unwrap(); + + let output = child.wait_with_output().unwrap(); + + assert_eq!(output.status.code().unwrap(), 0); + + assert!(output.stderr.is_empty()); + + let stdout_str = std::str::from_utf8(&output.stdout).unwrap(); + + assert!(stdout_str.contains("base64 alphabet")); + + assert!(!stdout_str.to_ascii_lowercase().contains("base32")); +} diff --git a/tests/by-util/test_basenc.rs b/tests/by-util/test_basenc.rs index 18f0502a1da..85c05ad3ee0 100644 --- a/tests/by-util/test_basenc.rs +++ b/tests/by-util/test_basenc.rs @@ -3,17 +3,23 @@ // For the full copyright and license information, please view the LICENSE // file that was distributed with this source code. -//spell-checker: ignore (encodings) lsbf msbf +// spell-checker: ignore (encodings) lsbf msbf + use crate::common::util::TestScenario; #[test] -fn test_z85_not_padded() { +fn test_z85_not_padded_decode() { // The z85 crate deviates from the standard in some cases; we have to catch those new_ucmd!() .args(&["--z85", "-d"]) .pipe_in("##########") .fails() .stderr_only("basenc: error: invalid input\n"); +} + +#[test] +fn test_z85_not_padded_encode() { + // The z85 crate deviates from the standard in some cases; we have to catch those new_ucmd!() .args(&["--z85"]) .pipe_in("123") @@ -26,7 +32,7 @@ fn test_invalid_input() { let error_message = if cfg!(windows) { "basenc: .: Permission denied\n" } else { - "basenc: error: invalid input\n" + "basenc: read error: Is a directory\n" }; new_ucmd!() .args(&["--base32", "."]) @@ -40,7 +46,6 @@ fn test_base64() { .arg("--base64") .pipe_in("to>be?") .succeeds() - .no_stderr() .stdout_only("dG8+YmU/\n"); } @@ -50,7 +55,6 @@ fn test_base64_decode() { .args(&["--base64", "-d"]) .pipe_in("dG8+YmU/") .succeeds() - .no_stderr() .stdout_only("to>be?"); } @@ -60,7 +64,6 @@ fn test_base64url() { .arg("--base64url") .pipe_in("to>be?") .succeeds() - .no_stderr() .stdout_only("dG8-YmU_\n"); } @@ -70,7 +73,6 @@ fn test_base64url_decode() { .args(&["--base64url", "-d"]) .pipe_in("dG8-YmU_") .succeeds() - .no_stderr() .stdout_only("to>be?"); } @@ -80,7 +82,6 @@ fn test_base32() { .arg("--base32") .pipe_in("nice>base?") .succeeds() - .no_stderr() .stdout_only("NZUWGZJ6MJQXGZJ7\n"); // spell-checker:disable-line } @@ -90,7 +91,6 @@ fn test_base32_decode() { .args(&["--base32", "-d"]) .pipe_in("NZUWGZJ6MJQXGZJ7") // spell-checker:disable-line .succeeds() - .no_stderr() .stdout_only("nice>base?"); } @@ -100,7 +100,6 @@ fn test_base32hex() { .arg("--base32hex") .pipe_in("nice>base?") .succeeds() - .no_stderr() .stdout_only("DPKM6P9UC9GN6P9V\n"); // spell-checker:disable-line } @@ -110,7 +109,6 @@ fn test_base32hex_decode() { .args(&["--base32hex", "-d"]) .pipe_in("DPKM6P9UC9GN6P9V") // spell-checker:disable-line .succeeds() - .no_stderr() .stdout_only("nice>base?"); } @@ -120,7 +118,6 @@ fn test_base16() { .arg("--base16") .pipe_in("Hello, World!") .succeeds() - .no_stderr() .stdout_only("48656C6C6F2C20576F726C6421\n"); } @@ -130,7 +127,6 @@ fn test_base16_decode() { .args(&["--base16", "-d"]) .pipe_in("48656C6C6F2C20576F726C6421") .succeeds() - .no_stderr() .stdout_only("Hello, World!"); } @@ -140,7 +136,6 @@ fn test_base2msbf() { .arg("--base2msbf") .pipe_in("msbf") .succeeds() - .no_stderr() .stdout_only("01101101011100110110001001100110\n"); } @@ -150,7 +145,6 @@ fn test_base2msbf_decode() { .args(&["--base2msbf", "-d"]) .pipe_in("01101101011100110110001001100110") .succeeds() - .no_stderr() .stdout_only("msbf"); } @@ -160,7 +154,6 @@ fn test_base2lsbf() { .arg("--base2lsbf") .pipe_in("lsbf") .succeeds() - .no_stderr() .stdout_only("00110110110011100100011001100110\n"); } @@ -170,7 +163,6 @@ fn test_base2lsbf_decode() { .args(&["--base2lsbf", "-d"]) .pipe_in("00110110110011100100011001100110") .succeeds() - .no_stderr() .stdout_only("lsbf"); } @@ -189,7 +181,6 @@ fn test_choose_last_encoding_z85() { ]) .pipe_in("Hello, World") .succeeds() - .no_stderr() .stdout_only("nm=QNz.92jz/PV8\n"); } @@ -208,7 +199,6 @@ fn test_choose_last_encoding_base64() { ]) .pipe_in("Hello, World!") .succeeds() - .no_stderr() .stdout_only("SGVsbG8sIFdvcmxkIQ==\n"); // spell-checker:disable-line } @@ -227,7 +217,6 @@ fn test_choose_last_encoding_base2lsbf() { ]) .pipe_in("lsbf") .succeeds() - .no_stderr() .stdout_only("00110110110011100100011001100110\n"); } @@ -248,6 +237,18 @@ fn test_base32_decode_repeated() { ]) .pipe_in("NZUWGZJ6MJQXGZJ7") // spell-checker:disable-line .succeeds() - .no_stderr() .stdout_only("nice>base?"); } + +// The restriction that input length has to be divisible by 4 only applies to data being encoded with Z85, not to the +// decoding of Z85-encoded data +#[test] +fn test_z85_length_check() { + new_ucmd!() + .args(&["--decode", "--z85"]) + // Input has length 10, not divisible by 4 + // spell-checker:disable-next-line + .pipe_in("f!$Kwh8WxM") + .succeeds() + .stdout_only("12345678"); +}