Skip to content

Commit

Permalink
fix versionsort chunk split on non-ASCII numerics (#6407)
Browse files Browse the repository at this point in the history
Replaces `char::is_numeric()` with `char::is_ascii_digit()` in
`VersionChunkIter::parse_str_chunk()`.

The text in the Style Guide explicitly mentions that a numeric chunk
is a sequence of ASCII digits.
  • Loading branch information
jessicarod7 authored Dec 1, 2024
1 parent 0a32a02 commit 78aa72f
Show file tree
Hide file tree
Showing 5 changed files with 90 additions and 1 deletion.
11 changes: 10 additions & 1 deletion src/sort.rs
Original file line number Diff line number Diff line change
Expand Up @@ -65,7 +65,7 @@ impl<'a> VersionChunkIter<'a> {
break;
}

if !c.is_numeric() {
if !c.is_ascii_digit() {
continue;
}

Expand Down Expand Up @@ -283,6 +283,10 @@ mod test {
source: "009"
})
);

// '๙' = U+0E59 THAI DIGIT NINE, General Category Nd
let mut iter = VersionChunkIter::new("x๙v");
assert_eq!(iter.next(), Some(VersionChunk::Str("x๙v")));
}

#[test]
Expand All @@ -297,6 +301,11 @@ mod test {
input.sort_by(|a, b| version_sort(a, b));
assert_eq!(input, expected);

let mut input = vec!["x๙x", "xéx", "x0x"];
let expected = vec!["x0x", "xéx", "x๙x"];
input.sort_by(|a, b| version_sort(a, b));
assert_eq!(input, expected);

let mut input = vec!["applesauce", "apple"];
let expected = vec!["apple", "applesauce"];
input.sort_by(|a, b| version_sort(a, b));
Expand Down
17 changes: 17 additions & 0 deletions tests/source/non_ascii_numerics_import_asciibetically.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,17 @@
// rustfmt-style_edition: 2015

// ascii-betically sorted
pub use print๙msg;
pub use print0msg;
pub use printémsg;

fn main() {}

/// '๙' = 0E59;THAI DIGIT NINE;Nd; (Non-ASCII Decimal_Number, sorts third)
mod print๙msg {}

/// '0' = 0030;DIGIT ZERO;Nd; (ASCII Decimal_Number, sorts first)
mod print0msg {}

/// 'é' = 00E9;LATIN SMALL LETTER E WITH ACUTE;Ll; (Lowercase_Letter, sorts second)
mod printémsg {}
23 changes: 23 additions & 0 deletions tests/source/non_ascii_numerics_import_versionsort.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,23 @@
// rustfmt-style_edition: 2024

// versionsorted
pub use print๙msg;
pub use print0msg;
pub use printémsg;

fn main() {}

/// '๙' = 0E59;THAI DIGIT NINE;Nd; (Non-ASCII Decimal_Number, one string chunk)
///
/// U+0E59 > U+00E9, sorts third
mod print๙msg {}

/// '0' = 0030;DIGIT ZERO;Nd; (ASCII Decimal_Number, splits into 3 chunks ("print",0,"msg"))
///
/// shortest chunk "print", sorts first
mod print0msg {}

/// 'é' = 00E9;LATIN SMALL LETTER E WITH ACUTE;Ll; (Lowercase_Letter, one string chunk)
///
/// U+00E9 < U+0E59, sorts second
mod printémsg {}
17 changes: 17 additions & 0 deletions tests/target/non_ascii_numerics_import_asciibetically.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,17 @@
// rustfmt-style_edition: 2015

// ascii-betically sorted
pub use print0msg;
pub use printémsg;
pub use print๙msg;

fn main() {}

/// '๙' = 0E59;THAI DIGIT NINE;Nd; (Non-ASCII Decimal_Number, sorts third)
mod print๙msg {}

/// '0' = 0030;DIGIT ZERO;Nd; (ASCII Decimal_Number, sorts first)
mod print0msg {}

/// 'é' = 00E9;LATIN SMALL LETTER E WITH ACUTE;Ll; (Lowercase_Letter, sorts second)
mod printémsg {}
23 changes: 23 additions & 0 deletions tests/target/non_ascii_numerics_import_versionsort.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,23 @@
// rustfmt-style_edition: 2024

// versionsorted
pub use print0msg;
pub use printémsg;
pub use print๙msg;

fn main() {}

/// '๙' = 0E59;THAI DIGIT NINE;Nd; (Non-ASCII Decimal_Number, one string chunk)
///
/// U+0E59 > U+00E9, sorts third
mod print๙msg {}

/// '0' = 0030;DIGIT ZERO;Nd; (ASCII Decimal_Number, splits into 3 chunks ("print",0,"msg"))
///
/// shortest chunk "print", sorts first
mod print0msg {}

/// 'é' = 00E9;LATIN SMALL LETTER E WITH ACUTE;Ll; (Lowercase_Letter, one string chunk)
///
/// U+00E9 < U+0E59, sorts second
mod printémsg {}

0 comments on commit 78aa72f

Please sign in to comment.