diff --git a/crates/toml_edit/src/parser/datetime.rs b/crates/toml_edit/src/parser/datetime.rs index 69c8d7f3..67e5c4a5 100644 --- a/crates/toml_edit/src/parser/datetime.rs +++ b/crates/toml_edit/src/parser/datetime.rs @@ -255,6 +255,7 @@ pub(crate) fn unsigned_digits<'i, const MIN: usize, const MAX: usize>( input: &mut Input<'i>, ) -> PResult<&'i str> { take_while(MIN..=MAX, DIGIT) + // Safety: `digit` only produces ASCII .map(|b: &[u8]| unsafe { from_utf8_unchecked(b, "`is_ascii_digit` filters out on-ASCII") }) .parse_next(input) } diff --git a/crates/toml_edit/src/parser/key.rs b/crates/toml_edit/src/parser/key.rs index ee729fda..5616d3df 100644 --- a/crates/toml_edit/src/parser/key.rs +++ b/crates/toml_edit/src/parser/key.rs @@ -90,6 +90,7 @@ pub(crate) fn simple_key(input: &mut Input<'_>) -> PResult<(RawString, InternalS fn unquoted_key<'i>(input: &mut Input<'i>) -> PResult<&'i str> { trace( "unquoted-key", + // Safety: UNQUOTED_CHAR is only ASCII ranges take_while(1.., UNQUOTED_CHAR) .map(|b| unsafe { from_utf8_unchecked(b, "`is_unquoted_char` filters out on-ASCII") }), ) @@ -101,6 +102,7 @@ pub(crate) fn is_unquoted_char(c: u8) -> bool { UNQUOTED_CHAR.contains_token(c) } +// Safety-usable invariant: UNQUOTED_CHAR is only ASCII ranges const UNQUOTED_CHAR: ( RangeInclusive, RangeInclusive, diff --git a/crates/toml_edit/src/parser/numbers.rs b/crates/toml_edit/src/parser/numbers.rs index dcd96b88..30753938 100644 --- a/crates/toml_edit/src/parser/numbers.rs +++ b/crates/toml_edit/src/parser/numbers.rs @@ -79,6 +79,7 @@ pub(crate) fn dec_int<'i>(input: &mut Input<'i>) -> PResult<&'i str> { )), ) .recognize() + // Safety: DIGIT1_9, digit(), and `_` only covers ASCII ranges .map(|b: &[u8]| unsafe { from_utf8_unchecked(b, "`digit` and `_` filter out non-ASCII") }) @@ -86,6 +87,7 @@ pub(crate) fn dec_int<'i>(input: &mut Input<'i>) -> PResult<&'i str> { ) .parse_next(input) } +/// Safety-usable invariant: DIGIT1_9 is only ASCII ranges const DIGIT1_9: RangeInclusive = b'1'..=b'9'; // hex-prefix = %x30.78 ; 0x @@ -114,11 +116,13 @@ pub(crate) fn hex_int<'i>(input: &mut Input<'i>) -> PResult<&'i str> { )) .recognize(), ) + // Safety: HEX_PREFIX, hexdig(), and `_` only covers ASCII ranges .map(|b| unsafe { from_utf8_unchecked(b, "`hexdig` and `_` filter out non-ASCII") }) .context(StrContext::Label("hexadecimal integer")), ) .parse_next(input) } +/// Safety-usable invariant: HEX_PREFIX is ASCII only const HEX_PREFIX: &[u8] = b"0x"; // oct-prefix = %x30.6F ; 0o @@ -147,12 +151,15 @@ pub(crate) fn oct_int<'i>(input: &mut Input<'i>) -> PResult<&'i str> { )) .recognize(), ) + // Safety: DIGIT0_7, OCT_PREFIX, and `_` only covers ASCII ranges .map(|b| unsafe { from_utf8_unchecked(b, "`DIGIT0_7` and `_` filter out non-ASCII") }) .context(StrContext::Label("octal integer")), ) .parse_next(input) } +/// Safety-usable invariant: OCT_PREFIX is ASCII only const OCT_PREFIX: &[u8] = b"0o"; +/// Safety-usable invariant: DIGIT0_7 is ASCII only const DIGIT0_7: RangeInclusive = b'0'..=b'7'; // bin-prefix = %x30.62 ; 0b @@ -181,12 +188,15 @@ pub(crate) fn bin_int<'i>(input: &mut Input<'i>) -> PResult<&'i str> { )) .recognize(), ) + // Safety: DIGIT0_1, BIN_PREFIX, and `_` only covers ASCII ranges .map(|b| unsafe { from_utf8_unchecked(b, "`DIGIT0_1` and `_` filter out non-ASCII") }) .context(StrContext::Label("binary integer")), ) .parse_next(input) } +/// Safety-usable invariant: BIN_PREFIX is ASCII only const BIN_PREFIX: &[u8] = b"0b"; +/// Safety-usable invariant: DIGIT0_1 is ASCII only const DIGIT0_1: RangeInclusive = b'0'..=b'1'; // ;; Float @@ -234,6 +244,7 @@ pub(crate) fn frac<'i>(input: &mut Input<'i>) -> PResult<&'i str> { ) .recognize() .map(|b: &[u8]| unsafe { + // Safety: `.` and `zero_prefixable_int` only handle ASCII from_utf8_unchecked( b, "`.` and `parse_zero_prefixable_int` filter out non-ASCII", @@ -243,6 +254,7 @@ pub(crate) fn frac<'i>(input: &mut Input<'i>) -> PResult<&'i str> { } // zero-prefixable-int = DIGIT *( DIGIT / underscore DIGIT ) +/// Safety-usable invariant: only produces ASCII pub(crate) fn zero_prefixable_int<'i>(input: &mut Input<'i>) -> PResult<&'i str> { ( digit, @@ -261,8 +273,10 @@ pub(crate) fn zero_prefixable_int<'i>(input: &mut Input<'i>) -> PResult<&'i str> .map(|()| ()), ) .recognize() + // Safety: `digit()` and `_` are all ASCII .map(|b: &[u8]| unsafe { from_utf8_unchecked(b, "`digit` and `_` filter out non-ASCII") }) .parse_next(input) + /// Safety-usable invariant upheld by only using `digit` and `_` in the parser } // exp = "e" float-exp-part @@ -275,6 +289,7 @@ pub(crate) fn exp<'i>(input: &mut Input<'i>) -> PResult<&'i str> { ) .recognize() .map(|b: &[u8]| unsafe { + // Safety: `e`, `E`, `+`, `-`, and `zero_prefixable_int` are all ASCII from_utf8_unchecked( b, "`one_of` and `parse_zero_prefixable_int` filter out non-ASCII", @@ -305,15 +320,20 @@ pub(crate) fn nan(input: &mut Input<'_>) -> PResult { const NAN: &[u8] = b"nan"; // DIGIT = %x30-39 ; 0-9 +/// Safety-usable invariant: only parses ASCII pub(crate) fn digit(input: &mut Input<'_>) -> PResult { + // Safety: DIGIT is all ASCII one_of(DIGIT).parse_next(input) } const DIGIT: RangeInclusive = b'0'..=b'9'; // HEXDIG = DIGIT / "A" / "B" / "C" / "D" / "E" / "F" +/// Safety-usable invariant: only parses ASCII pub(crate) fn hexdig(input: &mut Input<'_>) -> PResult { + // Safety: HEXDIG is all ASCII one_of(HEXDIG).parse_next(input) } +/// Safety-usable invariant: only ASCII ranges pub(crate) const HEXDIG: (RangeInclusive, RangeInclusive, RangeInclusive) = (DIGIT, b'A'..=b'F', b'a'..=b'f'); diff --git a/crates/toml_edit/src/parser/strings.rs b/crates/toml_edit/src/parser/strings.rs index 0478faa8..9705aa24 100644 --- a/crates/toml_edit/src/parser/strings.rs +++ b/crates/toml_edit/src/parser/strings.rs @@ -138,6 +138,7 @@ fn escape_seq_char(input: &mut Input<'_>) -> PResult { pub(crate) fn hexescape(input: &mut Input<'_>) -> PResult { take_while(0..=N, HEXDIG) .verify(|b: &[u8]| b.len() == N) + // Safety: HEXDIG is ASCII-only .map(|b: &[u8]| unsafe { from_utf8_unchecked(b, "`is_ascii_digit` filters out on-ASCII") }) .verify_map(|s| u32::from_str_radix(s, 16).ok()) .try_map(|h| char::from_u32(h).ok_or(CustomError::OutOfRange)) @@ -217,6 +218,8 @@ fn mlb_quotes<'i>( move |input: &mut Input<'i>| { let start = input.checkpoint(); let res = terminated(b"\"\"", peek(term.by_ref())) + // Safety: terminated returns the output of the first parser here, + // which only parses ASCII .map(|b| unsafe { from_utf8_unchecked(b, "`bytes` out non-ASCII") }) .parse_next(input); @@ -224,6 +227,8 @@ fn mlb_quotes<'i>( Err(winnow::error::ErrMode::Backtrack(_)) => { input.reset(&start); terminated(b"\"", peek(term.by_ref())) + // Safety: terminated returns the output of the first parser here, + // which only parses ASCII .map(|b| unsafe { from_utf8_unchecked(b, "`bytes` out non-ASCII") }) .parse_next(input) } @@ -346,6 +351,8 @@ fn mll_quotes<'i>( move |input: &mut Input<'i>| { let start = input.checkpoint(); let res = terminated(b"''", peek(term.by_ref())) + // Safety: terminated returns the output of the first parser here, + // which only parses ASCII .map(|b| unsafe { from_utf8_unchecked(b, "`bytes` out non-ASCII") }) .parse_next(input); @@ -353,6 +360,8 @@ fn mll_quotes<'i>( Err(winnow::error::ErrMode::Backtrack(_)) => { input.reset(&start); terminated(b"'", peek(term.by_ref())) + // Safety: terminated returns the output of the first parser here, + // which only parses ASCII .map(|b| unsafe { from_utf8_unchecked(b, "`bytes` out non-ASCII") }) .parse_next(input) } diff --git a/crates/toml_edit/src/parser/trivia.rs b/crates/toml_edit/src/parser/trivia.rs index c62e2dec..500e1cfb 100644 --- a/crates/toml_edit/src/parser/trivia.rs +++ b/crates/toml_edit/src/parser/trivia.rs @@ -11,6 +11,7 @@ use winnow::token::take_while; use crate::parser::prelude::*; +/// Safety invariant: must be called with valid UTF-8 in `bytes` pub(crate) unsafe fn from_utf8_unchecked<'b>( bytes: &'b [u8], safety_justification: &'static str, @@ -27,10 +28,12 @@ pub(crate) unsafe fn from_utf8_unchecked<'b>( // wschar = ( %x20 / ; Space // %x09 ) ; Horizontal tab +/// Safety-usable invariant: WSCHAR is only ASCII values pub(crate) const WSCHAR: (u8, u8) = (b' ', b'\t'); // ws = *wschar pub(crate) fn ws<'i>(input: &mut Input<'i>) -> PResult<&'i str> { + // Safety: WSCHAR only contains ASCII take_while(0.., WSCHAR) .map(|b| unsafe { from_utf8_unchecked(b, "`is_wschar` filters out on-ASCII") }) .parse_next(input) @@ -58,8 +61,10 @@ pub(crate) fn comment<'i>(input: &mut Input<'i>) -> PResult<&'i [u8]> { // newline = ( %x0A / ; LF // %x0D.0A ) ; CRLF +/// Safety-usable invariant: Only returns ASCII bytes pub(crate) fn newline(input: &mut Input<'_>) -> PResult { alt(( + // Safety: CR and LF are ASCII one_of(LF).value(b'\n'), (one_of(CR), one_of(LF)).value(b'\n'), )) @@ -76,6 +81,7 @@ pub(crate) fn ws_newline<'i>(input: &mut Input<'i>) -> PResult<&'i str> { ) .map(|()| ()) .recognize() + // Safety: `newline` and `WSCHAR` are all ASCII .map(|b| unsafe { from_utf8_unchecked(b, "`is_wschar` and `newline` filters out on-ASCII") }) .parse_next(input) } @@ -85,6 +91,7 @@ pub(crate) fn ws_newlines<'i>(input: &mut Input<'i>) -> PResult<&'i str> { (newline, ws_newline) .recognize() .map(|b| unsafe { + // Safety: `newline` and `WSCHAR` are all ASCII from_utf8_unchecked(b, "`is_wschar` and `newline` filters out on-ASCII") }) .parse_next(input)