From 32e1c54c789de5fecc3b3818eeb66475b92eb808 Mon Sep 17 00:00:00 2001 From: Andrew Liebenow Date: Sat, 5 Oct 2024 08:17:10 -0500 Subject: [PATCH] Fix "coreutils manpage base64" bug --- Cargo.lock | 1 + src/uu/base32/src/base32.rs | 19 ++---- src/uu/base32/src/base_common.rs | 83 +++++++++++++------------ src/uu/base64/Cargo.toml | 1 + src/uu/base64/src/base64.rs | 24 +++---- src/uu/basenc/BENCHMARKING.md | 2 +- src/uu/basenc/src/basenc.rs | 22 ++----- src/uucore/src/lib/features/encoding.rs | 4 +- tests/by-util/test_base64.rs | 35 +++++++++++ tests/by-util/test_basenc.rs | 3 +- 10 files changed, 103 insertions(+), 91 deletions(-) diff --git a/Cargo.lock b/Cargo.lock index 9514fafc162..5e99683fb47 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -2532,6 +2532,7 @@ dependencies = [ name = "uu_base64" version = "0.0.27" dependencies = [ + "clap", "uu_base32", "uucore", ] diff --git a/src/uu/base32/src/base32.rs b/src/uu/base32/src/base32.rs index 09250421c25..46a0361ea4a 100644 --- a/src/uu/base32/src/base32.rs +++ b/src/uu/base32/src/base32.rs @@ -3,13 +3,11 @@ // For the full copyright and license information, please view the LICENSE // file that was distributed with this source code. -use std::io::{stdin, Read}; +pub mod base_common; use clap::Command; use uucore::{encoding::Format, error::UResult, help_about, help_usage}; -pub mod base_common; - const ABOUT: &str = help_about!("base32.md"); const USAGE: &str = help_usage!("base32.md"); @@ -17,20 +15,11 @@ const USAGE: &str = help_usage!("base32.md"); pub fn uumain(args: impl uucore::Args) -> UResult<()> { let format = Format::Base32; - let config: base_common::Config = base_common::parse_base_cmd_args(args, ABOUT, USAGE)?; + let config = base_common::parse_base_cmd_args(args, ABOUT, USAGE)?; - // Create a reference to stdin so we can return a locked stdin from - // parse_base_cmd_args - let stdin_raw = stdin(); - let mut input: Box = base_common::get_input(&config, &stdin_raw)?; + let mut input = base_common::get_input(&config)?; - base_common::handle_input( - &mut input, - format, - config.wrap_cols, - config.ignore_garbage, - config.decode, - ) + base_common::handle_input(&mut input, format, config) } pub fn uu_app() -> Command { diff --git a/src/uu/base32/src/base_common.rs b/src/uu/base32/src/base_common.rs index 53f507e0d9e..f6b88f55157 100644 --- a/src/uu/base32/src/base_common.rs +++ b/src/uu/base32/src/base_common.rs @@ -7,11 +7,11 @@ use clap::{crate_version, Arg, ArgAction, Command}; use std::fs::File; -use std::io::{self, ErrorKind, Read, Stdin}; -use std::path::Path; +use std::io::{self, ErrorKind, Read}; +use std::path::{Path, PathBuf}; use uucore::display::Quotable; use uucore::encoding::{ - for_fast_encode::{BASE32, BASE32HEX, BASE64, BASE64URL, HEXUPPER}, + for_base_common::{BASE32, BASE32HEX, BASE64, BASE64URL, HEXUPPER}, Format, Z85Wrapper, BASE2LSBF, BASE2MSBF, }; use uucore::encoding::{EncodingWrapper, SupportsFastDecodeAndEncode}; @@ -31,7 +31,7 @@ pub struct Config { pub decode: bool, pub ignore_garbage: bool, pub wrap_cols: Option, - pub to_read: Option, + pub to_read: Option, } pub mod options { @@ -43,9 +43,10 @@ pub mod options { impl Config { pub fn from(options: &clap::ArgMatches) -> UResult { - let file: Option = match options.get_many::(options::FILE) { + let to_read = match options.get_many::(options::FILE) { Some(mut values) => { let name = values.next().unwrap(); + if let Some(extra_op) = values.next() { return Err(UUsageError::new( BASE_CMD_PARSE_ERROR, @@ -56,19 +57,22 @@ impl Config { if name == "-" { None } else { - if !Path::exists(Path::new(name)) { + let path = Path::new(name); + + if !path.exists() { return Err(USimpleError::new( BASE_CMD_PARSE_ERROR, - format!("{}: No such file or directory", name.maybe_quote()), + format!("{}: No such file or directory", path.maybe_quote()), )); } - Some(name.clone()) + + Some(path.to_owned()) } } None => None, }; - let cols = options + let wrap_cols = options .get_one::(options::WRAP) .map(|num| { num.parse::().map_err(|_| { @@ -83,8 +87,8 @@ impl Config { Ok(Self { decode: options.get_flag(options::DECODE), ignore_garbage: options.get_flag(options::IGNORE_GARBAGE), - wrap_cols: cols, - to_read: file, + wrap_cols, + to_read, }) } } @@ -139,42 +143,43 @@ pub fn base_app(about: &'static str, usage: &str) -> Command { ) } -pub fn get_input<'a>(config: &Config, stdin_ref: &'a Stdin) -> UResult> { +pub fn get_input(config: &Config) -> UResult> { match &config.to_read { - Some(name) => { + Some(path_buf) => { // Do not buffer input, because buffering is handled by `fast_decode` and `fast_encode` - let file_buf = - File::open(Path::new(name)).map_err_context(|| name.maybe_quote().to_string())?; - Ok(Box::new(file_buf)) + let file = + File::open(path_buf).map_err_context(|| path_buf.maybe_quote().to_string())?; + + Ok(Box::new(file)) + } + None => { + let stdin_lock = io::stdin().lock(); + + Ok(Box::new(stdin_lock)) } - None => Ok(Box::new(stdin_ref.lock())), } } -pub fn handle_input( - input: &mut R, - format: Format, - wrap: Option, - ignore_garbage: bool, - decode: bool, -) -> UResult<()> { +pub fn handle_input(input: &mut R, format: Format, config: Config) -> UResult<()> { let supports_fast_decode_and_encode = get_supports_fast_decode_and_encode(format); + let supports_fast_decode_and_encode_ref = supports_fast_decode_and_encode.as_ref(); + let mut stdout_lock = io::stdout().lock(); - if decode { + if config.decode { fast_decode::fast_decode( input, &mut stdout_lock, - supports_fast_decode_and_encode.as_ref(), - ignore_garbage, + supports_fast_decode_and_encode_ref, + config.ignore_garbage, ) } else { fast_encode::fast_encode( input, &mut stdout_lock, - supports_fast_decode_and_encode.as_ref(), - wrap, + supports_fast_decode_and_encode_ref, + config.wrap_cols, ) } } @@ -423,15 +428,15 @@ pub mod fast_encode { }; // Start of buffers - // Data that was read from stdin + // Data that was read from `input` let mut input_buffer = vec![0; INPUT_BUFFER_SIZE]; assert!(!input_buffer.is_empty()); - // Data that was read from stdin but has not been encoded yet + // Data that was read from `input` but has not been encoded yet let mut leftover_buffer = VecDeque::::new(); - // Encoded data that needs to be written to output + // Encoded data that needs to be written to `output` let mut encoded_buffer = VecDeque::::new(); // End of buffers @@ -469,7 +474,7 @@ pub mod fast_encode { assert!(leftover_buffer.len() < encode_in_chunks_of_size); - // Write all data in `encoded_buffer` to output + // Write all data in `encoded_buffer` to `output` write_to_output(&mut line_wrapping, &mut encoded_buffer, &mut output, false)?; } Err(er) => { @@ -511,7 +516,7 @@ pub mod fast_decode { // Start of helper functions fn alphabet_to_table(alphabet: &[u8], ignore_garbage: bool) -> [bool; 256] { - // If "ignore_garbage" is enabled, all characters outside the alphabet are ignored + // If `ignore_garbage` is enabled, all characters outside the alphabet are ignored // If it is not enabled, only '\n' and '\r' are ignored if ignore_garbage { // Note: "false" here @@ -618,12 +623,12 @@ pub mod fast_decode { assert!(decode_in_chunks_of_size > 0); - // Note that it's not worth using "data-encoding"'s ignore functionality if "ignore_garbage" is true, because + // Note that it's not worth using "data-encoding"'s ignore functionality if `ignore_garbage` is true, because // "data-encoding"'s ignore functionality cannot discard non-ASCII bytes. The data has to be filtered before // passing it to "data-encoding", so there is no point in doing any filtering in "data-encoding". This also // allows execution to stay on the happy path in "data-encoding": // https://github.com/ia0/data-encoding/blob/4f42ad7ef242f6d243e4de90cd1b46a57690d00e/lib/src/lib.rs#L754-L756 - // Update: it is not even worth it to use "data-encoding"'s ignore functionality when "ignore_garbage" is + // It is also not worth using "data-encoding"'s ignore functionality when `ignore_garbage` is // false. // Note that the alphabet constants above already include the padding characters // TODO @@ -631,18 +636,18 @@ pub mod fast_decode { let table = alphabet_to_table(alphabet, ignore_garbage); // Start of buffers - // Data that was read from stdin + // Data that was read from `input` let mut input_buffer = vec![0; INPUT_BUFFER_SIZE]; assert!(!input_buffer.is_empty()); - // Data that was read from stdin but has not been decoded yet + // Data that was read from `input` but has not been decoded yet let mut leftover_buffer = Vec::::new(); // Decoded data that needs to be written to `output` let mut decoded_buffer = Vec::::new(); - // Buffer that will be used when "ignore_garbage" is true, and the chunk read from "input" contains garbage + // Buffer that will be used when `ignore_garbage` is true, and the chunk read from `input` contains garbage // data let mut non_garbage_buffer = Vec::::new(); // End of buffers diff --git a/src/uu/base64/Cargo.toml b/src/uu/base64/Cargo.toml index c65fe5b971e..5afc4283e6d 100644 --- a/src/uu/base64/Cargo.toml +++ b/src/uu/base64/Cargo.toml @@ -17,6 +17,7 @@ readme.workspace = true path = "src/base64.rs" [dependencies] +clap = { workspace = true } uucore = { workspace = true, features = ["encoding"] } uu_base32 = { workspace = true } diff --git a/src/uu/base64/src/base64.rs b/src/uu/base64/src/base64.rs index 6544638bdae..86eb75bf119 100644 --- a/src/uu/base64/src/base64.rs +++ b/src/uu/base64/src/base64.rs @@ -3,13 +3,10 @@ // For the full copyright and license information, please view the LICENSE // file that was distributed with this source code. +use clap::Command; use uu_base32::base_common; -pub use uu_base32::uu_app; - use uucore::{encoding::Format, error::UResult, help_about, help_usage}; -use std::io::{stdin, Read}; - const ABOUT: &str = help_about!("base64.md"); const USAGE: &str = help_usage!("base64.md"); @@ -17,18 +14,13 @@ const USAGE: &str = help_usage!("base64.md"); pub fn uumain(args: impl uucore::Args) -> UResult<()> { let format = Format::Base64; - let config: base_common::Config = base_common::parse_base_cmd_args(args, ABOUT, USAGE)?; + let config = base_common::parse_base_cmd_args(args, ABOUT, USAGE)?; - // Create a reference to stdin so we can return a locked stdin from - // parse_base_cmd_args - let stdin_raw = stdin(); - let mut input: Box = base_common::get_input(&config, &stdin_raw)?; + let mut input = base_common::get_input(&config)?; + + base_common::handle_input(&mut input, format, config) +} - base_common::handle_input( - &mut input, - format, - config.wrap_cols, - config.ignore_garbage, - config.decode, - ) +pub fn uu_app() -> Command { + base_common::base_app(ABOUT, USAGE) } diff --git a/src/uu/basenc/BENCHMARKING.md b/src/uu/basenc/BENCHMARKING.md index f007b82014b..8248cbbc53b 100644 --- a/src/uu/basenc/BENCHMARKING.md +++ b/src/uu/basenc/BENCHMARKING.md @@ -13,7 +13,7 @@ use a benchmarking tool like [hyperfine][0]. hyperfine currently does not measure maximum memory usage. Memory usage can be benchmarked using [poop][2], or [toybox][3]'s "time" subcommand (both are Linux only). -Next, build the `basenc` binary using the release profile: +Build the `basenc` binary using the release profile: ```Shell cargo build --package uu_basenc --profile release diff --git a/src/uu/basenc/src/basenc.rs b/src/uu/basenc/src/basenc.rs index ed117b22a0d..2de1223f4a1 100644 --- a/src/uu/basenc/src/basenc.rs +++ b/src/uu/basenc/src/basenc.rs @@ -3,19 +3,15 @@ // For the full copyright and license information, please view the LICENSE // file that was distributed with this source code. -//spell-checker:ignore (args) lsbf msbf +// spell-checker:ignore lsbf msbf use clap::{Arg, ArgAction, Command}; use uu_base32::base_common::{self, Config, BASE_CMD_PARSE_ERROR}; - +use uucore::error::UClapError; use uucore::{ encoding::Format, error::{UResult, UUsageError}, }; - -use std::io::{stdin, Read}; -use uucore::error::UClapError; - use uucore::{help_about, help_usage}; const ABOUT: &str = help_about!("basenc.md"); @@ -81,16 +77,8 @@ fn parse_cmd_args(args: impl uucore::Args) -> UResult<(Config, Format)> { #[uucore::main] pub fn uumain(args: impl uucore::Args) -> UResult<()> { let (config, format) = parse_cmd_args(args)?; - // Create a reference to stdin so we can return a locked stdin from - // parse_base_cmd_args - let stdin_raw = stdin(); - let mut input: Box = base_common::get_input(&config, &stdin_raw)?; - base_common::handle_input( - &mut input, - format, - config.wrap_cols, - config.ignore_garbage, - config.decode, - ) + let mut input = base_common::get_input(&config)?; + + base_common::handle_input(&mut input, format, config) } diff --git a/src/uucore/src/lib/features/encoding.rs b/src/uucore/src/lib/features/encoding.rs index b27d3a1f9b5..b9150114b8a 100644 --- a/src/uucore/src/lib/features/encoding.rs +++ b/src/uucore/src/lib/features/encoding.rs @@ -11,8 +11,8 @@ use data_encoding::Encoding; use data_encoding_macro::new_encoding; use std::collections::VecDeque; -// Re-export for the faster encoding logic -pub mod for_fast_encode { +// Re-export for the faster decoding/encoding logic +pub mod for_base_common { pub use data_encoding::*; } diff --git a/tests/by-util/test_base64.rs b/tests/by-util/test_base64.rs index b0a89b4c2ae..f07da925f5b 100644 --- a/tests/by-util/test_base64.rs +++ b/tests/by-util/test_base64.rs @@ -185,3 +185,38 @@ cyBvdmVyIHRoZSBsYXp5IGRvZy4= // cSpell:enable ); } + +// Prevent regression to: +// +// ❯ coreutils manpage base64 | rg --fixed-strings -- 'base32' +// The data are encoded as described for the base32 alphabet in RFC 4648. +// to the bytes of the formal base32 alphabet. Use \-\-ignore\-garbage +// The data are encoded as described for the base32 alphabet in RFC 4648. +// to the bytes of the formal base32 alphabet. Use \-\-ignore\-garbage +#[test] +fn test_manpage() { + use std::process::{Command, Stdio}; + + let test_scenario = TestScenario::new(""); + + let child = Command::new(test_scenario.bin_path) + .arg("manpage") + .arg("base64") + .stdin(Stdio::piped()) + .stdout(Stdio::piped()) + .stderr(Stdio::piped()) + .spawn() + .unwrap(); + + let output = child.wait_with_output().unwrap(); + + assert_eq!(output.status.code().unwrap(), 0); + + assert!(output.stderr.is_empty()); + + let stdout_str = std::str::from_utf8(&output.stdout).unwrap(); + + assert!(stdout_str.contains("base64 alphabet")); + + assert!(!stdout_str.to_ascii_lowercase().contains("base32")); +} diff --git a/tests/by-util/test_basenc.rs b/tests/by-util/test_basenc.rs index 437413a26b7..85c05ad3ee0 100644 --- a/tests/by-util/test_basenc.rs +++ b/tests/by-util/test_basenc.rs @@ -3,7 +3,8 @@ // For the full copyright and license information, please view the LICENSE // file that was distributed with this source code. -//spell-checker: ignore (encodings) lsbf msbf +// spell-checker: ignore (encodings) lsbf msbf + use crate::common::util::TestScenario; #[test]