From b589f86a09c823208cbe95755f0cb0883e28d0de Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=E8=AE=B8=E6=9D=B0=E5=8F=8B=20Jieyou=20Xu=20=28Joe=29?= Date: Fri, 9 Aug 2024 05:03:36 +0000 Subject: [PATCH 1/2] tests: add regression test for incorrect `BytePos` manipulation triggering assertion Issue: --- .../ui/typeck/suggest-arg-comma-delete-ice.rs | 19 ++++++++++ .../suggest-arg-comma-delete-ice.stderr | 38 +++++++++++++++++++ 2 files changed, 57 insertions(+) create mode 100644 tests/ui/typeck/suggest-arg-comma-delete-ice.rs create mode 100644 tests/ui/typeck/suggest-arg-comma-delete-ice.stderr diff --git a/tests/ui/typeck/suggest-arg-comma-delete-ice.rs b/tests/ui/typeck/suggest-arg-comma-delete-ice.rs new file mode 100644 index 0000000000000..48d02e13eca09 --- /dev/null +++ b/tests/ui/typeck/suggest-arg-comma-delete-ice.rs @@ -0,0 +1,19 @@ +//! Previously, we tried to remove extra arg commas when providing extra arg removal suggestions. +//! One of the edge cases is having to account for an arg that has a closing delimiter `)` +//! following it. However, the previous suggestion code assumed that the delimiter is in fact +//! exactly the 1-byte `)` character. This assumption was proven incorrect, because we recover +//! from Unicode-confusable delimiters in the parser, which means that the ending delimiter could be +//! a multi-byte codepoint that looks *like* a `)`. Subtracing 1 byte could land us in the middle of +//! a codepoint, triggering a codepoint boundary assertion. +//! +//! issue: rust-lang/rust#128717 + +fn main() { + // The following example has been modified from #128717 to remove irrelevant Unicode as they do + // not otherwise partake in the right delimiter calculation causing the codepoint boundary + // assertion. + main(rahh); + //~^ ERROR unknown start of token + //~| ERROR this function takes 0 arguments but 1 argument was supplied + //~| ERROR cannot find value `rahh` in this scope +} diff --git a/tests/ui/typeck/suggest-arg-comma-delete-ice.stderr b/tests/ui/typeck/suggest-arg-comma-delete-ice.stderr new file mode 100644 index 0000000000000..53608391f3c89 --- /dev/null +++ b/tests/ui/typeck/suggest-arg-comma-delete-ice.stderr @@ -0,0 +1,38 @@ +error: unknown start of token: \u{ff09} + --> $DIR/suggest-arg-comma-delete-ice.rs:15:14 + | +LL | main(rahh); + | ^^ + | +help: Unicode character ')' (Fullwidth Right Parenthesis) looks like ')' (Right Parenthesis), but it is not + | +LL | main(rahh); + | ~ + +error[E0425]: cannot find value `rahh` in this scope + --> $DIR/suggest-arg-comma-delete-ice.rs:15:10 + | +LL | main(rahh); + | ^^^^ not found in this scope + +error[E0061]: this function takes 0 arguments but 1 argument was supplied + --> $DIR/suggest-arg-comma-delete-ice.rs:15:5 + | +LL | main(rahh); + | ^^^^ ---- unexpected argument + | +note: function defined here + --> $DIR/suggest-arg-comma-delete-ice.rs:11:4 + | +LL | fn main() { + | ^^^^ +help: remove the extra argument + | +LL - main(rahh); +LL + main(); + | + +error: aborting due to 3 previous errors + +Some errors have detailed explanations: E0061, E0425. +For more information about an error, try `rustc --explain E0061`. From 879bfd7ad0f5f79e7bc90320dfb80dfabe91ac2b Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=E8=AE=B8=E6=9D=B0=E5=8F=8B=20Jieyou=20Xu=20=28Joe=29?= Date: Fri, 9 Aug 2024 05:01:27 +0000 Subject: [PATCH 2/2] hir_typeck: use `end_point` over `BytePos` manipulations Parser has error recovery for Unicode-confusables, which includes the right parentheses `)`. If a multi-byte right parentheses look-alike reaches the argument removal suggestion diagnostics, it would trigger an assertion because the diagnostics used `- BytePos(1)` which can land within a multi-byte codepoint. This is fixed by using `SourceMap::end_point` to find the final right delimiter codepoint, which correctly respects codepoint boundaries. --- compiler/rustc_hir_typeck/src/fn_ctxt/checks.rs | 9 ++++++--- 1 file changed, 6 insertions(+), 3 deletions(-) diff --git a/compiler/rustc_hir_typeck/src/fn_ctxt/checks.rs b/compiler/rustc_hir_typeck/src/fn_ctxt/checks.rs index cef003e0a43de..89e7227eda2c7 100644 --- a/compiler/rustc_hir_typeck/src/fn_ctxt/checks.rs +++ b/compiler/rustc_hir_typeck/src/fn_ctxt/checks.rs @@ -22,7 +22,7 @@ use rustc_middle::ty::{self, IsSuggestable, Ty, TyCtxt}; use rustc_middle::{bug, span_bug}; use rustc_session::Session; use rustc_span::symbol::{kw, Ident}; -use rustc_span::{sym, BytePos, Span, DUMMY_SP}; +use rustc_span::{sym, Span, DUMMY_SP}; use rustc_trait_selection::error_reporting::infer::{FailureCode, ObligationCauseExt}; use rustc_trait_selection::infer::InferCtxtExt; use rustc_trait_selection::traits::{self, ObligationCauseCode, SelectionContext}; @@ -1140,8 +1140,11 @@ impl<'a, 'tcx> FnCtxt<'a, 'tcx> { .get(arg_idx + 1) .map(|&(_, sp)| sp) .unwrap_or_else(|| { - // Subtract one to move before `)` - call_expr.span.with_lo(call_expr.span.hi() - BytePos(1)) + // Try to move before `)`. Note that `)` here is not necessarily + // the latin right paren, it could be a Unicode-confusable that + // looks like a `)`, so we must not use `- BytePos(1)` + // manipulations here. + self.tcx().sess.source_map().end_point(call_expr.span) }); // Include next comma