From 50eeabb1014676f4bcc1fe38a06a5d3ef8806044 Mon Sep 17 00:00:00 2001 From: "jan.nijtmans" Date: Wed, 19 Jul 2023 13:04:51 +0000 Subject: [PATCH 1/2] Bugfix for escape-encoding, 'stolen' from TIP #671 branch --- generic/tclEncoding.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/generic/tclEncoding.c b/generic/tclEncoding.c index 539b5206678..deaabaa3486 100644 --- a/generic/tclEncoding.c +++ b/generic/tclEncoding.c @@ -3961,9 +3961,10 @@ EscapeToUtfProc( if ((checked == dataPtr->numSubTables + 2) || (flags & TCL_ENCODING_END)) { if (!PROFILE_STRICT(flags)) { + unsigned skip = longest > left ? left : longest; /* Unknown escape sequence */ dst += Tcl_UniCharToUtf(UNICODE_REPLACE_CHAR, dst); - src += longest; + src += skip; continue; } result = TCL_CONVERT_SYNTAX; From 515bd7c0e7439f19f59c33be1ff8089de8f4417f Mon Sep 17 00:00:00 2001 From: "jan.nijtmans" Date: Wed, 19 Jul 2023 14:23:31 +0000 Subject: [PATCH 2/2] Improve error-reporting possibilities for Tcl_ExternalToUtfDStringEx/Tcl_UtfToExternalDStringEx in case no interpreter is available, by setting "errno". --- generic/tclEncoding.c | 21 +++++++++++++++------ generic/tclListObj.c | 4 ++-- 2 files changed, 17 insertions(+), 8 deletions(-) diff --git a/generic/tclEncoding.c b/generic/tclEncoding.c index deaabaa3486..83510ccce33 100644 --- a/generic/tclEncoding.c +++ b/generic/tclEncoding.c @@ -1290,6 +1290,7 @@ Tcl_ExternalToUtfDStringEx( "Parameter error: TCL_ENCODING_{START,STOP} bits set in flags.", TCL_INDEX_NONE)); Tcl_SetErrorCode(interp, "TCL", "ENCODING", "ILLEGALFLAGS", NULL); + errno = EINVAL; return TCL_ERROR; } @@ -1333,17 +1334,20 @@ Tcl_ExternalToUtfDStringEx( /* Caller wants error message on failure */ if (result != TCL_OK && interp != NULL) { char buf[TCL_INTEGER_SPACE]; - snprintf(buf, sizeof(buf), "%u", nBytesProcessed); + snprintf(buf, sizeof(buf), "%" TCL_SIZE_MODIFIER "u", nBytesProcessed); Tcl_SetObjResult( interp, Tcl_ObjPrintf("unexpected byte sequence starting at index %" - "u: '\\x%02X'", + TCL_SIZE_MODIFIER "u: '\\x%02X'", nBytesProcessed, UCHAR(srcStart[nBytesProcessed]))); Tcl_SetErrorCode( interp, "TCL", "ENCODING", "ILLEGALSEQUENCE", buf, NULL); } } + if (result != TCL_OK) { + errno = (result == TCL_CONVERT_NOSPACE) ? ENOMEM : EILSEQ; + } return result; } @@ -1527,7 +1531,7 @@ Tcl_UtfToExternalDString( * The parameter flags controls the behavior, if any of the bytes in * the source buffer are invalid or cannot be represented in the * target encoding. It should be composed by OR-ing the following: - * - *At most one* of TCL_ENCODING_PROFILE{DEFAULT,TCL8,STRICT} + * - *At most one* of TCL_ENCODING_PROFILE_* * - TCL_ENCODING_STOPONERROR: Backward compatibility. Sets the profile * to TCL_ENCODING_PROFILE_STRICT overriding any specified profile flags * @@ -1590,6 +1594,7 @@ Tcl_UtfToExternalDStringEx( "Parameter error: TCL_ENCODING_{START,STOP} bits set in flags.", TCL_INDEX_NONE)); Tcl_SetErrorCode(interp, "TCL", "ENCODING", "ILLEGALFLAGS", NULL); + errno = EINVAL; return TCL_ERROR; } @@ -1630,21 +1635,25 @@ Tcl_UtfToExternalDStringEx( } else { /* Caller wants error message on failure */ if (result != TCL_OK && interp != NULL) { - int pos = Tcl_NumUtfChars(srcStart, nBytesProcessed); + Tcl_Size pos = Tcl_NumUtfChars(srcStart, nBytesProcessed); int ucs4; char buf[TCL_INTEGER_SPACE]; TclUtfToUCS4(&srcStart[nBytesProcessed], &ucs4); - snprintf(buf, sizeof(buf), "%u", nBytesProcessed); + snprintf(buf, sizeof(buf), "%" TCL_SIZE_MODIFIER "u", nBytesProcessed); Tcl_SetObjResult( interp, Tcl_ObjPrintf( - "unexpected character at index %d: 'U+%06X'", + "unexpected character at index %" TCL_SIZE_MODIFIER + "u: 'U+%06X'", pos, ucs4)); Tcl_SetErrorCode(interp, "TCL", "ENCODING", "ILLEGALSEQUENCE", buf, NULL); } } + if (result != TCL_OK) { + errno = (result == TCL_CONVERT_NOSPACE) ? ENOMEM : EILSEQ; + } return result; } diff --git a/generic/tclListObj.c b/generic/tclListObj.c index 2f433c5f46c..f1b525882e5 100644 --- a/generic/tclListObj.c +++ b/generic/tclListObj.c @@ -3448,8 +3448,8 @@ UpdateStringOfList( * Mark the list as being canonical; although it will now have a string * rep, it is one we derived through proper "canonical" quoting and so * it's known to be free from nasties relating to [concat] and [eval]. - * However, we only do this if - * + * However, we only do this if + * * (a) the store is not shared as a shared store may be referenced by * multiple lists with different string reps. (see [a366c6efee]), AND *