Skip to content

Commit

Permalink
Fix "coreutils manpage base64" bug
Browse files Browse the repository at this point in the history
  • Loading branch information
andrewliebenow committed Oct 5, 2024
1 parent 9fa405f commit 32e1c54
Show file tree
Hide file tree
Showing 10 changed files with 103 additions and 91 deletions.
1 change: 1 addition & 0 deletions Cargo.lock

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

19 changes: 4 additions & 15 deletions src/uu/base32/src/base32.rs
Original file line number Diff line number Diff line change
Expand Up @@ -3,34 +3,23 @@
// For the full copyright and license information, please view the LICENSE
// file that was distributed with this source code.

use std::io::{stdin, Read};
pub mod base_common;

use clap::Command;
use uucore::{encoding::Format, error::UResult, help_about, help_usage};

pub mod base_common;

const ABOUT: &str = help_about!("base32.md");
const USAGE: &str = help_usage!("base32.md");

#[uucore::main]
pub fn uumain(args: impl uucore::Args) -> UResult<()> {
let format = Format::Base32;

let config: base_common::Config = base_common::parse_base_cmd_args(args, ABOUT, USAGE)?;
let config = base_common::parse_base_cmd_args(args, ABOUT, USAGE)?;

// Create a reference to stdin so we can return a locked stdin from
// parse_base_cmd_args
let stdin_raw = stdin();
let mut input: Box<dyn Read> = base_common::get_input(&config, &stdin_raw)?;
let mut input = base_common::get_input(&config)?;

base_common::handle_input(
&mut input,
format,
config.wrap_cols,
config.ignore_garbage,
config.decode,
)
base_common::handle_input(&mut input, format, config)
}

pub fn uu_app() -> Command {
Expand Down
83 changes: 44 additions & 39 deletions src/uu/base32/src/base_common.rs
Original file line number Diff line number Diff line change
Expand Up @@ -7,11 +7,11 @@

use clap::{crate_version, Arg, ArgAction, Command};
use std::fs::File;
use std::io::{self, ErrorKind, Read, Stdin};
use std::path::Path;
use std::io::{self, ErrorKind, Read};
use std::path::{Path, PathBuf};
use uucore::display::Quotable;
use uucore::encoding::{
for_fast_encode::{BASE32, BASE32HEX, BASE64, BASE64URL, HEXUPPER},
for_base_common::{BASE32, BASE32HEX, BASE64, BASE64URL, HEXUPPER},
Format, Z85Wrapper, BASE2LSBF, BASE2MSBF,
};
use uucore::encoding::{EncodingWrapper, SupportsFastDecodeAndEncode};
Expand All @@ -31,7 +31,7 @@ pub struct Config {
pub decode: bool,
pub ignore_garbage: bool,
pub wrap_cols: Option<usize>,
pub to_read: Option<String>,
pub to_read: Option<PathBuf>,
}

pub mod options {
Expand All @@ -43,9 +43,10 @@ pub mod options {

impl Config {
pub fn from(options: &clap::ArgMatches) -> UResult<Self> {
let file: Option<String> = match options.get_many::<String>(options::FILE) {
let to_read = match options.get_many::<String>(options::FILE) {
Some(mut values) => {
let name = values.next().unwrap();

if let Some(extra_op) = values.next() {
return Err(UUsageError::new(
BASE_CMD_PARSE_ERROR,
Expand All @@ -56,19 +57,22 @@ impl Config {
if name == "-" {
None
} else {
if !Path::exists(Path::new(name)) {
let path = Path::new(name);

if !path.exists() {
return Err(USimpleError::new(
BASE_CMD_PARSE_ERROR,
format!("{}: No such file or directory", name.maybe_quote()),
format!("{}: No such file or directory", path.maybe_quote()),
));
}
Some(name.clone())

Some(path.to_owned())
}
}
None => None,
};

let cols = options
let wrap_cols = options
.get_one::<String>(options::WRAP)
.map(|num| {
num.parse::<usize>().map_err(|_| {
Expand All @@ -83,8 +87,8 @@ impl Config {
Ok(Self {
decode: options.get_flag(options::DECODE),
ignore_garbage: options.get_flag(options::IGNORE_GARBAGE),
wrap_cols: cols,
to_read: file,
wrap_cols,
to_read,
})
}
}
Expand Down Expand Up @@ -139,42 +143,43 @@ pub fn base_app(about: &'static str, usage: &str) -> Command {
)
}

pub fn get_input<'a>(config: &Config, stdin_ref: &'a Stdin) -> UResult<Box<dyn Read + 'a>> {
pub fn get_input(config: &Config) -> UResult<Box<dyn Read>> {
match &config.to_read {
Some(name) => {
Some(path_buf) => {
// Do not buffer input, because buffering is handled by `fast_decode` and `fast_encode`
let file_buf =
File::open(Path::new(name)).map_err_context(|| name.maybe_quote().to_string())?;
Ok(Box::new(file_buf))
let file =
File::open(path_buf).map_err_context(|| path_buf.maybe_quote().to_string())?;

Ok(Box::new(file))
}
None => {
let stdin_lock = io::stdin().lock();

Ok(Box::new(stdin_lock))
}
None => Ok(Box::new(stdin_ref.lock())),
}
}

pub fn handle_input<R: Read>(
input: &mut R,
format: Format,
wrap: Option<usize>,
ignore_garbage: bool,
decode: bool,
) -> UResult<()> {
pub fn handle_input<R: Read>(input: &mut R, format: Format, config: Config) -> UResult<()> {
let supports_fast_decode_and_encode = get_supports_fast_decode_and_encode(format);

let supports_fast_decode_and_encode_ref = supports_fast_decode_and_encode.as_ref();

let mut stdout_lock = io::stdout().lock();

if decode {
if config.decode {
fast_decode::fast_decode(
input,
&mut stdout_lock,
supports_fast_decode_and_encode.as_ref(),
ignore_garbage,
supports_fast_decode_and_encode_ref,
config.ignore_garbage,
)
} else {
fast_encode::fast_encode(
input,
&mut stdout_lock,
supports_fast_decode_and_encode.as_ref(),
wrap,
supports_fast_decode_and_encode_ref,
config.wrap_cols,
)
}
}
Expand Down Expand Up @@ -423,15 +428,15 @@ pub mod fast_encode {
};

// Start of buffers
// Data that was read from stdin
// Data that was read from `input`
let mut input_buffer = vec![0; INPUT_BUFFER_SIZE];

assert!(!input_buffer.is_empty());

// Data that was read from stdin but has not been encoded yet
// Data that was read from `input` but has not been encoded yet
let mut leftover_buffer = VecDeque::<u8>::new();

// Encoded data that needs to be written to output
// Encoded data that needs to be written to `output`
let mut encoded_buffer = VecDeque::<u8>::new();
// End of buffers

Expand Down Expand Up @@ -469,7 +474,7 @@ pub mod fast_encode {

assert!(leftover_buffer.len() < encode_in_chunks_of_size);

// Write all data in `encoded_buffer` to output
// Write all data in `encoded_buffer` to `output`
write_to_output(&mut line_wrapping, &mut encoded_buffer, &mut output, false)?;
}
Err(er) => {
Expand Down Expand Up @@ -511,7 +516,7 @@ pub mod fast_decode {

// Start of helper functions
fn alphabet_to_table(alphabet: &[u8], ignore_garbage: bool) -> [bool; 256] {
// If "ignore_garbage" is enabled, all characters outside the alphabet are ignored
// If `ignore_garbage` is enabled, all characters outside the alphabet are ignored
// If it is not enabled, only '\n' and '\r' are ignored
if ignore_garbage {
// Note: "false" here
Expand Down Expand Up @@ -618,31 +623,31 @@ pub mod fast_decode {

assert!(decode_in_chunks_of_size > 0);

// Note that it's not worth using "data-encoding"'s ignore functionality if "ignore_garbage" is true, because
// Note that it's not worth using "data-encoding"'s ignore functionality if `ignore_garbage` is true, because
// "data-encoding"'s ignore functionality cannot discard non-ASCII bytes. The data has to be filtered before
// passing it to "data-encoding", so there is no point in doing any filtering in "data-encoding". This also
// allows execution to stay on the happy path in "data-encoding":
// https://github.com/ia0/data-encoding/blob/4f42ad7ef242f6d243e4de90cd1b46a57690d00e/lib/src/lib.rs#L754-L756
// Update: it is not even worth it to use "data-encoding"'s ignore functionality when "ignore_garbage" is
// It is also not worth using "data-encoding"'s ignore functionality when `ignore_garbage` is
// false.
// Note that the alphabet constants above already include the padding characters
// TODO
// Precompute this
let table = alphabet_to_table(alphabet, ignore_garbage);

// Start of buffers
// Data that was read from stdin
// Data that was read from `input`
let mut input_buffer = vec![0; INPUT_BUFFER_SIZE];

assert!(!input_buffer.is_empty());

// Data that was read from stdin but has not been decoded yet
// Data that was read from `input` but has not been decoded yet
let mut leftover_buffer = Vec::<u8>::new();

// Decoded data that needs to be written to `output`
let mut decoded_buffer = Vec::<u8>::new();

// Buffer that will be used when "ignore_garbage" is true, and the chunk read from "input" contains garbage
// Buffer that will be used when `ignore_garbage` is true, and the chunk read from `input` contains garbage
// data
let mut non_garbage_buffer = Vec::<u8>::new();
// End of buffers
Expand Down
1 change: 1 addition & 0 deletions src/uu/base64/Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -17,6 +17,7 @@ readme.workspace = true
path = "src/base64.rs"

[dependencies]
clap = { workspace = true }
uucore = { workspace = true, features = ["encoding"] }
uu_base32 = { workspace = true }

Expand Down
24 changes: 8 additions & 16 deletions src/uu/base64/src/base64.rs
Original file line number Diff line number Diff line change
Expand Up @@ -3,32 +3,24 @@
// For the full copyright and license information, please view the LICENSE
// file that was distributed with this source code.

use clap::Command;
use uu_base32::base_common;
pub use uu_base32::uu_app;

use uucore::{encoding::Format, error::UResult, help_about, help_usage};

use std::io::{stdin, Read};

const ABOUT: &str = help_about!("base64.md");
const USAGE: &str = help_usage!("base64.md");

#[uucore::main]
pub fn uumain(args: impl uucore::Args) -> UResult<()> {
let format = Format::Base64;

let config: base_common::Config = base_common::parse_base_cmd_args(args, ABOUT, USAGE)?;
let config = base_common::parse_base_cmd_args(args, ABOUT, USAGE)?;

// Create a reference to stdin so we can return a locked stdin from
// parse_base_cmd_args
let stdin_raw = stdin();
let mut input: Box<dyn Read> = base_common::get_input(&config, &stdin_raw)?;
let mut input = base_common::get_input(&config)?;

base_common::handle_input(&mut input, format, config)
}

base_common::handle_input(
&mut input,
format,
config.wrap_cols,
config.ignore_garbage,
config.decode,
)
pub fn uu_app() -> Command {
base_common::base_app(ABOUT, USAGE)
}
2 changes: 1 addition & 1 deletion src/uu/basenc/BENCHMARKING.md
Original file line number Diff line number Diff line change
Expand Up @@ -13,7 +13,7 @@ use a benchmarking tool like [hyperfine][0].
hyperfine currently does not measure maximum memory usage. Memory usage can be benchmarked using [poop][2], or
[toybox][3]'s "time" subcommand (both are Linux only).

Next, build the `basenc` binary using the release profile:
Build the `basenc` binary using the release profile:

```Shell
cargo build --package uu_basenc --profile release
Expand Down
22 changes: 5 additions & 17 deletions src/uu/basenc/src/basenc.rs
Original file line number Diff line number Diff line change
Expand Up @@ -3,19 +3,15 @@
// For the full copyright and license information, please view the LICENSE
// file that was distributed with this source code.

//spell-checker:ignore (args) lsbf msbf
// spell-checker:ignore lsbf msbf

use clap::{Arg, ArgAction, Command};
use uu_base32::base_common::{self, Config, BASE_CMD_PARSE_ERROR};

use uucore::error::UClapError;
use uucore::{
encoding::Format,
error::{UResult, UUsageError},
};

use std::io::{stdin, Read};
use uucore::error::UClapError;

use uucore::{help_about, help_usage};

const ABOUT: &str = help_about!("basenc.md");
Expand Down Expand Up @@ -81,16 +77,8 @@ fn parse_cmd_args(args: impl uucore::Args) -> UResult<(Config, Format)> {
#[uucore::main]
pub fn uumain(args: impl uucore::Args) -> UResult<()> {
let (config, format) = parse_cmd_args(args)?;
// Create a reference to stdin so we can return a locked stdin from
// parse_base_cmd_args
let stdin_raw = stdin();
let mut input: Box<dyn Read> = base_common::get_input(&config, &stdin_raw)?;

base_common::handle_input(
&mut input,
format,
config.wrap_cols,
config.ignore_garbage,
config.decode,
)
let mut input = base_common::get_input(&config)?;

base_common::handle_input(&mut input, format, config)
}
4 changes: 2 additions & 2 deletions src/uucore/src/lib/features/encoding.rs
Original file line number Diff line number Diff line change
Expand Up @@ -11,8 +11,8 @@ use data_encoding::Encoding;
use data_encoding_macro::new_encoding;
use std::collections::VecDeque;

// Re-export for the faster encoding logic
pub mod for_fast_encode {
// Re-export for the faster decoding/encoding logic
pub mod for_base_common {
pub use data_encoding::*;
}

Expand Down
Loading

0 comments on commit 32e1c54

Please sign in to comment.