From 5613dc601d4eef5cf98fe5261d06dc30fb5b900d Mon Sep 17 00:00:00 2001 From: Karl Williamson Date: Sun, 16 Jul 2023 19:11:44 -0600 Subject: [PATCH] utf8_hop_forward: Don't go over edge of buffer even in the presence of malformed UTF-8. This preserves previous behavior of if you start at one byte past the edge of the buffer, it returns that position. --- inline.h | 17 +++++++++++++---- 1 file changed, 13 insertions(+), 4 deletions(-) diff --git a/inline.h b/inline.h index 94b63d5870d7..ccab2db1a7ee 100644 --- a/inline.h +++ b/inline.h @@ -2659,7 +2659,7 @@ start of the next character. C must be non-negative. -C must be before or equal to C. +C must be before or equal to C. If after, the function panics. When moving forward it will not move beyond C. @@ -2677,19 +2677,28 @@ Perl_utf8_hop_forward(const U8 *s, SSize_t off, const U8 *end) * the bitops (especially ~) can create illegal UTF-8. * In other words: in Perl UTF-8 is not just for Unicode. */ - assert(s <= end); assert(off >= 0); + if (UNLIKELY(s >= end)) { + if (s == end) { + return (U8 *) end; + } + + Perl_croak_nocontext("panic: Start of forward hop (0x%p) is %zd bytes" + " beyond legal end position (0x%p)", + s, 1 + s - end, end); + } + if (off && UNLIKELY(UTF8_IS_CONTINUATION(*s))) { /* Get to next non-continuation byte */ do { s++; } - while (UTF8_IS_CONTINUATION(*s)); + while (s < end && UTF8_IS_CONTINUATION(*s)); off--; } - while (off--) { + while (off-- && s < end) { STRLEN skip = UTF8SKIP(s); if ((STRLEN)(end - s) <= skip) { GCC_DIAG_IGNORE(-Wcast-qual)