diff --git a/generic/tclEncoding.c b/generic/tclEncoding.c index 7ac1c394ec9..38d62c59310 100644 --- a/generic/tclEncoding.c +++ b/generic/tclEncoding.c @@ -34,9 +34,9 @@ typedef struct { Tcl_EncodingFreeProc *freeProc; /* If non-NULL, function to call when this * encoding is deleted. */ - void *clientData; /* Arbitrary value associated with encoding + void *clientData; /* Arbitrary value associated with encoding * type. Passed to conversion functions. */ - Tcl_Size nullSize; /* Number of 0x00 bytes that signify + Tcl_Size nullSize; /* Number of 0x00 bytes that signify * end-of-string in this encoding. This number * is used to determine the source string * length when the srcLen argument is @@ -119,7 +119,8 @@ typedef struct { * entry in this array is 1, otherwise it is * 0. */ int numSubTables; /* Length of following array. */ - EscapeSubTable subTables[TCLFLEXARRAY];/* Information about each EscapeSubTable used + EscapeSubTable subTables[TCLFLEXARRAY]; + /* Information about each EscapeSubTable used * by this encoding type. The actual size is * as large as necessary to hold all * EscapeSubTables. */ @@ -211,9 +212,9 @@ static const struct TclEncodingProfiles { ((ENCODING_PROFILE_GET(flags_) != TCL_ENCODING_PROFILE_REPLACE) && !PROFILE_STRICT(flags_)) #define UNICODE_REPLACE_CHAR 0xFFFD -#define SURROGATE(c_) (((c_) & ~0x7FF) == 0xD800) -#define HIGH_SURROGATE(c_) (((c_) & ~0x3FF) == 0xD800) -#define LOW_SURROGATE(c_) (((c_) & ~0x3FF) == 0xDC00) +#define SURROGATE(c_) (((c_) & ~0x7FF) == 0xD800) +#define HIGH_SURROGATE(c_) (((c_) & ~0x3FF) == 0xD800) +#define LOW_SURROGATE(c_) (((c_) & ~0x3FF) == 0xDC00) /* * The following variable is used in the sparse matrix code for a @@ -258,11 +259,10 @@ static Tcl_EncodingConvertProc UtfToUtfProc; static Tcl_EncodingConvertProc Iso88591FromUtfProc; static Tcl_EncodingConvertProc Iso88591ToUtfProc; - /* - * A Tcl_ObjType for holding a cached Tcl_Encoding in the twoPtrValue.ptr1 field - * of the internalrep. This should help the lifetime of encodings be more useful. - * See concerns raised in [Bug 1077262]. + * A Tcl_ObjType for holding a cached Tcl_Encoding in the twoPtrValue.ptr1 + * field of the internalrep. This should help the lifetime of encodings be more + * useful. See concerns raised in [Bug 1077262]. */ static const Tcl_ObjType encodingType = { @@ -273,21 +273,20 @@ static const Tcl_ObjType encodingType = { NULL }; -#define EncodingSetInternalRep(objPtr, encoding) \ +#define EncodingSetInternalRep(objPtr, encoding) \ do { \ Tcl_ObjInternalRep ir; \ ir.twoPtrValue.ptr1 = (encoding); \ ir.twoPtrValue.ptr2 = NULL; \ - Tcl_StoreInternalRep((objPtr), &encodingType, &ir); \ + Tcl_StoreInternalRep((objPtr), &encodingType, &ir); \ } while (0) -#define EncodingGetInternalRep(objPtr, encoding) \ +#define EncodingGetInternalRep(objPtr, encoding) \ do { \ - const Tcl_ObjInternalRep *irPtr; \ + const Tcl_ObjInternalRep *irPtr; \ irPtr = TclFetchInternalRep ((objPtr), &encodingType); \ - (encoding) = irPtr ? (Tcl_Encoding)irPtr->twoPtrValue.ptr1 : NULL; \ + (encoding) = irPtr ? (Tcl_Encoding)irPtr->twoPtrValue.ptr1 : NULL; \ } while (0) - /* *---------------------------------------------------------------------- @@ -517,13 +516,13 @@ FillEncodingFileMap(void) TclListObjGetElements(NULL, matchFileList, &numFiles, &filev); for (j=0; j internal */ +enum InternalEncodingFlags { + TCL_ENCODING_LE = 0x100, /* Used to distinguish LE/BE variants */ + ENCODING_UTF = 0x200, /* For UTF-8 encoding, allow 4-byte output + * sequences */ + ENCODING_INPUT = 0x400 /* For UTF-8/CESU-8 encoding, means + * external -> internal */ +}; void TclInitEncodingSubsystem(void) @@ -612,30 +615,30 @@ TclInitEncodingSubsystem(void) Tcl_CreateEncoding(&type); type.toUtfProc = Utf16ToUtfProc; - type.fromUtfProc = UtfToUcs2Proc; + type.fromUtfProc = UtfToUcs2Proc; type.freeProc = NULL; type.nullSize = 2; - type.encodingName = "ucs-2le"; + type.encodingName = "ucs-2le"; type.clientData = INT2PTR(TCL_ENCODING_LE); Tcl_CreateEncoding(&type); - type.encodingName = "ucs-2be"; + type.encodingName = "ucs-2be"; type.clientData = NULL; Tcl_CreateEncoding(&type); - type.encodingName = "ucs-2"; + type.encodingName = "ucs-2"; type.clientData = INT2PTR(leFlags); Tcl_CreateEncoding(&type); type.toUtfProc = Utf32ToUtfProc; - type.fromUtfProc = UtfToUtf32Proc; + type.fromUtfProc = UtfToUtf32Proc; type.freeProc = NULL; type.nullSize = 4; - type.encodingName = "utf-32le"; + type.encodingName = "utf-32le"; type.clientData = INT2PTR(TCL_ENCODING_LE); Tcl_CreateEncoding(&type); - type.encodingName = "utf-32be"; + type.encodingName = "utf-32be"; type.clientData = NULL; Tcl_CreateEncoding(&type); - type.encodingName = "utf-32"; + type.encodingName = "utf-32"; type.clientData = INT2PTR(leFlags); Tcl_CreateEncoding(&type); @@ -643,18 +646,18 @@ TclInitEncodingSubsystem(void) type.fromUtfProc = UtfToUtf16Proc; type.freeProc = NULL; type.nullSize = 2; - type.encodingName = "utf-16le"; + type.encodingName = "utf-16le"; type.clientData = INT2PTR(TCL_ENCODING_LE); Tcl_CreateEncoding(&type); - type.encodingName = "utf-16be"; + type.encodingName = "utf-16be"; type.clientData = NULL; Tcl_CreateEncoding(&type); - type.encodingName = "utf-16"; + type.encodingName = "utf-16"; type.clientData = INT2PTR(leFlags); Tcl_CreateEncoding(&type); #ifndef TCL_NO_DEPRECATED - type.encodingName = "unicode"; + type.encodingName = "unicode"; Tcl_CreateEncoding(&type); #endif @@ -1035,7 +1038,7 @@ Tcl_GetEncodingNames( * Tcl_GetEncodingNulLength -- * * Given an encoding, return the number of nul bytes used for the - * string termination. + * string termination. * * Results: * The number of nul bytes used for the string termination. @@ -1158,30 +1161,30 @@ Tcl_CreateEncoding( encodingPtr->refCount = 1; encodingPtr->hPtr = NULL; - if (typePtr->encodingName) { - Tcl_HashEntry *hPtr; - int isNew; - char *name; + if (typePtr->encodingName) { + Tcl_HashEntry *hPtr; + int isNew; + char *name; - Tcl_MutexLock(&encodingMutex); - hPtr = Tcl_CreateHashEntry(&encodingTable, typePtr->encodingName, &isNew); - if (isNew == 0) { - /* - * Remove old encoding from hash table, but don't delete it until last - * reference goes away. - */ + Tcl_MutexLock(&encodingMutex); + hPtr = Tcl_CreateHashEntry(&encodingTable, typePtr->encodingName, &isNew); + if (isNew == 0) { + /* + * Remove old encoding from hash table, but don't delete it until last + * reference goes away. + */ - Encoding *replaceMe = (Encoding *)Tcl_GetHashValue(hPtr); - replaceMe->hPtr = NULL; - } + Encoding *replaceMe = (Encoding *)Tcl_GetHashValue(hPtr); + replaceMe->hPtr = NULL; + } - name = (char *)ckalloc(strlen(typePtr->encodingName) + 1); - encodingPtr->name = strcpy(name, typePtr->encodingName); - encodingPtr->hPtr = hPtr; - Tcl_SetHashValue(hPtr, encodingPtr); + name = (char *)ckalloc(strlen(typePtr->encodingName) + 1); + encodingPtr->name = strcpy(name, typePtr->encodingName); + encodingPtr->hPtr = hPtr; + Tcl_SetHashValue(hPtr, encodingPtr); - Tcl_MutexUnlock(&encodingMutex); - } + Tcl_MutexUnlock(&encodingMutex); + } return (Tcl_Encoding) encodingPtr; } @@ -1220,7 +1223,6 @@ Tcl_ExternalToUtfDString( NULL, encoding, src, srcLen, TCL_ENCODING_PROFILE_TCL8, dstPtr, NULL); return Tcl_DStringValue(dstPtr); } - /* *------------------------------------------------------------------------- @@ -1235,45 +1237,44 @@ Tcl_ExternalToUtfDString( * - *At most one* of TCL_ENCODING_PROFILE{DEFAULT,TCL8,STRICT} * * Results: - * The return value is one of - * TCL_OK: success. Converted string in *dstPtr - * TCL_ERROR: error in passed parameters. Error message in interp - * TCL_CONVERT_MULTIBYTE: source ends in truncated multibyte sequence - * TCL_CONVERT_SYNTAX: source is not conformant to encoding definition - * TCL_CONVERT_UNKNOWN: source contained a character that could not - * be represented in target encoding. + * The return value is one of + * TCL_OK: success. Converted string in *dstPtr + * TCL_ERROR: error in passed parameters. Error message in interp + * TCL_CONVERT_MULTIBYTE: source ends in truncated multibyte sequence + * TCL_CONVERT_SYNTAX: source is not conformant to encoding definition + * TCL_CONVERT_UNKNOWN: source contained a character that could not + * be represented in target encoding. * * Side effects: - * - * TCL_OK: The converted bytes are stored in the DString and NUL - * terminated in an encoding-specific manner. - * TCL_ERROR: an error, message is stored in the interp if not NULL. - * TCL_CONVERT_*: if errorLocPtr is NULL, an error message is stored - * in the interpreter (if not NULL). If errorLocPtr is not NULL, - * no error message is stored as it is expected the caller is - * interested in whatever is decoded so far and not treating this - * as an error condition. - * - * In addition, *dstPtr is always initialized and must be cleared - * by the caller irrespective of the return code. + * TCL_OK: The converted bytes are stored in the DString and NUL + * terminated in an encoding-specific manner. + * TCL_ERROR: an error, message is stored in the interp if not NULL. + * TCL_CONVERT_*: if errorLocPtr is NULL, an error message is stored + * in the interpreter (if not NULL). If errorLocPtr is not NULL, + * no error message is stored as it is expected the caller is + * interested in whatever is decoded so far and not treating this + * as an error condition. + * + * In addition, *dstPtr is always initialized and must be cleared + * by the caller irrespective of the return code. * *------------------------------------------------------------------------- */ int Tcl_ExternalToUtfDStringEx( - Tcl_Interp *interp, /* For error messages. May be NULL. */ + Tcl_Interp *interp, /* For error messages. May be NULL. */ Tcl_Encoding encoding, /* The encoding for the source string, or NULL * for the default system encoding. */ const char *src, /* Source string in specified encoding. */ - Tcl_Size srcLen, /* Source string length in bytes, or < 0 for + Tcl_Size srcLen, /* Source string length in bytes, or < 0 for * encoding-specific string length. */ int flags, /* Conversion control flags. */ Tcl_DString *dstPtr, /* Uninitialized or free DString in which the * converted string is stored. */ - Tcl_Size *errorLocPtr) /* Where to store the error location - (or TCL_INDEX_NONE if no error). May - be NULL. */ + Tcl_Size *errorLocPtr) /* Where to store the error location + * (or TCL_INDEX_NONE if no error). May + * be NULL. */ { char *dst; Tcl_EncodingState state; @@ -1306,11 +1307,11 @@ Tcl_ExternalToUtfDStringEx( while (1) { result = encodingPtr->toUtfProc(encodingPtr->clientData, src, - srcLen, flags, &state, dst, dstLen, - &srcRead, &dstWrote, &dstChars); + srcLen, flags, &state, dst, dstLen, &srcRead, &dstWrote, &dstChars); soFar = dst + dstWrote - Tcl_DStringValue(dstPtr); src += srcRead; + if (result != TCL_CONVERT_NOSPACE) { Tcl_Size nBytesProcessed = (src - srcStart); @@ -1320,20 +1321,21 @@ Tcl_ExternalToUtfDStringEx( * Do not write error message into interpreter if caller * wants to know error location. */ - *errorLocPtr = result == TCL_OK ? TCL_INDEX_NONE : nBytesProcessed; + *errorLocPtr = result == TCL_OK + ? TCL_INDEX_NONE : nBytesProcessed; } else { /* Caller wants error message on failure */ if (result != TCL_OK && interp != NULL) { char buf[TCL_INTEGER_SPACE]; - snprintf(buf, sizeof(buf), "%" TCL_SIZE_MODIFIER "d", nBytesProcessed); - Tcl_SetObjResult( - interp, - Tcl_ObjPrintf("unexpected byte sequence starting at index %" - TCL_SIZE_MODIFIER "d: '\\x%02X'", - nBytesProcessed, - UCHAR(srcStart[nBytesProcessed]))); + snprintf(buf, sizeof(buf), "%" TCL_SIZE_MODIFIER "d", + nBytesProcessed); + Tcl_SetObjResult(interp, Tcl_ObjPrintf( + "unexpected byte sequence starting at index %" + TCL_SIZE_MODIFIER "d: '\\x%02X'", + nBytesProcessed, UCHAR(srcStart[nBytesProcessed]))); Tcl_SetErrorCode( - interp, "TCL", "ENCODING", "ILLEGALSEQUENCE", buf, (char *)NULL); + interp, "TCL", "ENCODING", "ILLEGALSEQUENCE", buf, + (char *)NULL); } } if (result != TCL_OK) { @@ -1378,8 +1380,9 @@ Tcl_ExternalToUtf( Tcl_Encoding encoding, /* The encoding for the source string, or NULL * for the default system encoding. */ const char *src, /* Source string in specified encoding. */ - Tcl_Size srcLen, /* Source string length in bytes, or < 0 for - * encoding-specific string length. */ + Tcl_Size srcLen, /* Source string length in bytes, or + * TCL_INDEX_NONE for encoding-specific string + * length. */ int flags, /* Conversion control flags. */ Tcl_EncodingState *statePtr,/* Place for conversion routine to store state * information used during a piecewise @@ -1416,7 +1419,7 @@ Tcl_ExternalToUtf( if (src == NULL) { srcLen = 0; - } else if (srcLen < 0) { + } else if (srcLen == TCL_INDEX_NONE) { srcLen = encodingPtr->lengthProc(src); } if (statePtr == NULL) { @@ -1511,7 +1514,6 @@ Tcl_UtfToExternalDString( NULL, encoding, src, srcLen, TCL_ENCODING_PROFILE_TCL8, dstPtr, NULL); return Tcl_DStringValue(dstPtr); } - /* *------------------------------------------------------------------------- @@ -1525,34 +1527,33 @@ Tcl_UtfToExternalDString( * - *At most one* of TCL_ENCODING_PROFILE_* * * Results: - * The return value is one of - * TCL_OK: success. Converted string in *dstPtr - * TCL_ERROR: error in passed parameters. Error message in interp - * TCL_CONVERT_MULTIBYTE: source ends in truncated multibyte sequence - * TCL_CONVERT_SYNTAX: source is not conformant to encoding definition - * TCL_CONVERT_UNKNOWN: source contained a character that could not - * be represented in target encoding. + * The return value is one of + * TCL_OK: success. Converted string in *dstPtr + * TCL_ERROR: error in passed parameters. Error message in interp + * TCL_CONVERT_MULTIBYTE: source ends in truncated multibyte sequence + * TCL_CONVERT_SYNTAX: source is not conformant to encoding definition + * TCL_CONVERT_UNKNOWN: source contained a character that could not + * be represented in target encoding. * * Side effects: - * - * TCL_OK: The converted bytes are stored in the DString and NUL - * terminated in an encoding-specific manner - * TCL_ERROR: an error, message is stored in the interp if not NULL. - * TCL_CONVERT_*: if errorLocPtr is NULL, an error message is stored - * in the interpreter (if not NULL). If errorLocPtr is not NULL, - * no error message is stored as it is expected the caller is - * interested in whatever is decoded so far and not treating this - * as an error condition. - * - * In addition, *dstPtr is always initialized and must be cleared - * by the caller irrespective of the return code. + * TCL_OK: The converted bytes are stored in the DString and NUL + * terminated in an encoding-specific manner + * TCL_ERROR: an error, message is stored in the interp if not NULL. + * TCL_CONVERT_*: if errorLocPtr is NULL, an error message is stored + * in the interpreter (if not NULL). If errorLocPtr is not NULL, + * no error message is stored as it is expected the caller is + * interested in whatever is decoded so far and not treating this + * as an error condition. + * + * In addition, *dstPtr is always initialized and must be cleared + * by the caller irrespective of the return code. * *------------------------------------------------------------------------- */ int Tcl_UtfToExternalDStringEx( - Tcl_Interp *interp, /* For error messages. May be NULL. */ + Tcl_Interp *interp, /* For error messages. May be NULL. */ Tcl_Encoding encoding, /* The encoding for the converted string, or * NULL for the default system encoding. */ const char *src, /* Source string in UTF-8. */ @@ -1561,9 +1562,9 @@ Tcl_UtfToExternalDStringEx( int flags, /* Conversion control flags. */ Tcl_DString *dstPtr, /* Uninitialized or free DString in which the * converted string is stored. */ - Tcl_Size *errorLocPtr) /* Where to store the error location - (or TCL_INDEX_NONE if no error). May - be NULL. */ + Tcl_Size *errorLocPtr) /* Where to store the error location + * (or TCL_INDEX_NONE if no error). May + * be NULL. */ { char *dst; Tcl_EncodingState state; @@ -1592,14 +1593,14 @@ Tcl_UtfToExternalDStringEx( flags |= TCL_ENCODING_START | TCL_ENCODING_END; while (1) { result = encodingPtr->fromUtfProc(encodingPtr->clientData, src, - srcLen, flags, &state, dst, dstLen, - &srcRead, &dstWrote, &dstChars); + srcLen, flags, &state, dst, dstLen, &srcRead, &dstWrote, &dstChars); soFar = dst + dstWrote - Tcl_DStringValue(dstPtr); src += srcRead; if (result != TCL_CONVERT_NOSPACE) { Tcl_Size nBytesProcessed = (src - srcStart); - int i = soFar + encodingPtr->nullSize - 1; + Tcl_Size i = soFar + encodingPtr->nullSize - 1; + /* Loop as DStringSetLength only stores one nul byte at a time */ while (i >= soFar) { Tcl_DStringSetLength(dstPtr, i--); } @@ -1608,24 +1609,24 @@ Tcl_UtfToExternalDStringEx( * Do not write error message into interpreter if caller * wants to know error location. */ - *errorLocPtr = result == TCL_OK ? TCL_INDEX_NONE : nBytesProcessed; + *errorLocPtr = result == TCL_OK + ? TCL_INDEX_NONE : nBytesProcessed; } else { /* Caller wants error message on failure */ if (result != TCL_OK && interp != NULL) { Tcl_Size pos = TclNumUtfChars(srcStart, nBytesProcessed); int ucs4; char buf[TCL_INTEGER_SPACE]; + TclUtfToUniChar(&srcStart[nBytesProcessed], &ucs4); - snprintf(buf, sizeof(buf), "%" TCL_SIZE_MODIFIER "d", nBytesProcessed); - Tcl_SetObjResult( - interp, - Tcl_ObjPrintf( + snprintf(buf, sizeof(buf), "%" TCL_SIZE_MODIFIER "d", + nBytesProcessed); + Tcl_SetObjResult(interp, Tcl_ObjPrintf( "unexpected character at index %" TCL_SIZE_MODIFIER "u: 'U+%06X'", - pos, - ucs4)); + pos, ucs4)); Tcl_SetErrorCode(interp, "TCL", "ENCODING", "ILLEGALSEQUENCE", - buf, (char *)NULL); + buf, (char *)NULL); } } if (result != TCL_OK) { @@ -1636,6 +1637,7 @@ Tcl_UtfToExternalDStringEx( flags &= ~TCL_ENCODING_START; srcLen -= srcRead; + if (Tcl_DStringLength(dstPtr) == 0) { Tcl_DStringSetLength(dstPtr, dstLen); } @@ -1669,8 +1671,8 @@ Tcl_UtfToExternal( Tcl_Encoding encoding, /* The encoding for the converted string, or * NULL for the default system encoding. */ const char *src, /* Source string in UTF-8. */ - Tcl_Size srcLen, /* Source string length in bytes, or < 0 for - * strlen(). */ + Tcl_Size srcLen, /* Source string length in bytes, or + * TCL_INDEX_NONE for strlen(). */ int flags, /* Conversion control flags. */ Tcl_EncodingState *statePtr,/* Place for conversion routine to store state * information used during a piecewise @@ -1966,7 +1968,7 @@ LoadEncodingFile( * little-endian systems. * * Results: - * Returns the new Tcl_Encoding, or NULL if it could could + * Returns the new Tcl_Encoding, or NULL if it could * not be created because the file contained invalid data. * * Side effects: @@ -2206,7 +2208,7 @@ LoadTableEncoding( */ for (TclDStringClear(&lineString); - (len = Tcl_Gets(chan, &lineString)) >= 0; + (len = Tcl_Gets(chan, &lineString)) != -1; TclDStringClear(&lineString)) { const unsigned char *p; int to, from; @@ -2335,8 +2337,8 @@ LoadEscapeEncoding( e = (Encoding *) Tcl_GetEncoding(NULL, est.name); if ((e != NULL) && (e->toUtfProc != TableToUtfProc) && (e->toUtfProc != Iso88591ToUtfProc)) { - Tcl_FreeEncoding((Tcl_Encoding) e); - e = NULL; + Tcl_FreeEncoding((Tcl_Encoding) e); + e = NULL; } est.encodingPtr = e; Tcl_DStringAppend(&escapeData, (char *) &est, sizeof(est)); @@ -2535,12 +2537,12 @@ UtfToUtfProc( /* Special sequence \xC0\x80 */ if (!PROFILE_TCL8(profile) && (flags & ENCODING_INPUT)) { if (PROFILE_REPLACE(profile)) { - dst += Tcl_UniCharToUtf(UNICODE_REPLACE_CHAR, dst); - src += 2; + dst += Tcl_UniCharToUtf(UNICODE_REPLACE_CHAR, dst); + src += 2; } else { - /* PROFILE_STRICT */ - result = TCL_CONVERT_SYNTAX; - break; + /* PROFILE_STRICT */ + result = TCL_CONVERT_SYNTAX; + break; } } else { /* @@ -2564,8 +2566,8 @@ UtfToUtfProc( /* Incomplete bytes for modified UTF-8 target */ if (PROFILE_STRICT(profile)) { result = (flags & TCL_ENCODING_CHAR_LIMIT) - ? TCL_CONVERT_MULTIBYTE - : TCL_CONVERT_SYNTAX; + ? TCL_CONVERT_MULTIBYTE + : TCL_CONVERT_SYNTAX; break; } } @@ -2670,7 +2672,7 @@ UtfToUtfProc( static int Utf32ToUtfProc( - void *clientData, /* additional flags, e.g. TCL_ENCODING_LE */ + void *clientData, /* additional flags, e.g. TCL_ENCODING_LE */ const char *src, /* Source string in Unicode. */ int srcLen, /* Source string length in bytes. */ int flags, /* Conversion control flags. */ @@ -2827,7 +2829,7 @@ Utf32ToUtfProc( static int UtfToUtf32Proc( - void *clientData, /* additional flags, e.g. TCL_ENCODING_LE */ + void *clientData, /* additional flags, e.g. TCL_ENCODING_LE */ const char *src, /* Source string in UTF-8. */ int srcLen, /* Source string length in bytes. */ int flags, /* Conversion control flags. */ @@ -2926,7 +2928,7 @@ UtfToUtf32Proc( static int Utf16ToUtfProc( - void *clientData, /* additional flags, e.g. TCL_ENCODING_LE */ + void *clientData, /* additional flags, e.g. TCL_ENCODING_LE */ const char *src, /* Source string in Unicode. */ int srcLen, /* Source string length in bytes. */ int flags, /* Conversion control flags. */ @@ -3105,7 +3107,7 @@ Utf16ToUtfProc( static int UtfToUtf16Proc( - void *clientData, /* additional flags, e.g. TCL_ENCODING_LE */ + void *clientData, /* additional flags, e.g. TCL_ENCODING_LE */ const char *src, /* Source string in UTF-8. */ int srcLen, /* Source string length in bytes. */ int flags, /* Conversion control flags. */ @@ -3166,21 +3168,25 @@ UtfToUtf16Proc( ch = UNICODE_REPLACE_CHAR; } } - src += len; - if (flags & TCL_ENCODING_LE) { - if (ch <= 0xFFFF) { + if (ch <= 0xFFFF) { + if (flags & TCL_ENCODING_LE) { *dst++ = (ch & 0xFF); *dst++ = (ch >> 8); } else { - *dst++ = (((ch - 0x10000) >> 10) & 0xFF); - *dst++ = (((ch - 0x10000) >> 18) & 0x3) | 0xD8; + *dst++ = (ch >> 8); *dst++ = (ch & 0xFF); - *dst++ = ((ch >> 8) & 0x3) | 0xDC; } } else { - if (ch <= 0xFFFF) { - *dst++ = (ch >> 8); + if ((dst+2) > dstEnd) { + /* Surrogates need 2 more bytes! Bug [66da4d4228] */ + result = TCL_CONVERT_NOSPACE; + break; + } + if (flags & TCL_ENCODING_LE) { + *dst++ = (((ch - 0x10000) >> 10) & 0xFF); + *dst++ = (((ch - 0x10000) >> 18) & 0x3) | 0xD8; *dst++ = (ch & 0xFF); + *dst++ = ((ch >> 8) & 0x3) | 0xDC; } else { *dst++ = (((ch - 0x10000) >> 18) & 0x3) | 0xD8; *dst++ = (((ch - 0x10000) >> 10) & 0xFF); @@ -3188,6 +3194,7 @@ UtfToUtf16Proc( *dst++ = (ch & 0xFF); } } + src += len; } *srcReadPtr = src - srcStart; *dstWrotePtr = dst - dstStart; @@ -3213,7 +3220,7 @@ UtfToUtf16Proc( static int UtfToUcs2Proc( - void *clientData, /* additional flags, e.g. TCL_ENCODING_LE */ + void *clientData, /* additional flags, e.g. TCL_ENCODING_LE */ const char *src, /* Source string in UTF-8. */ int srcLen, /* Source string length in bytes. */ int flags, /* Conversion control flags. */ @@ -3317,7 +3324,7 @@ UtfToUcs2Proc( static int TableToUtfProc( - void *clientData, /* TableEncodingData that specifies + void *clientData, /* TableEncodingData that specifies * encoding. */ const char *src, /* Source string in specified encoding. */ int srcLen, /* Source string length in bytes. */ @@ -3445,7 +3452,7 @@ TableToUtfProc( static int TableFromUtfProc( - void *clientData, /* TableEncodingData that specifies + void *clientData, /* TableEncodingData that specifies * encoding. */ const char *src, /* Source string in UTF-8. */ int srcLen, /* Source string length in bytes. */ @@ -3737,7 +3744,7 @@ Iso88591FromUtfProc( static void TableFreeProc( - void *clientData) /* TableEncodingData that specifies + void *clientData) /* TableEncodingData that specifies * encoding. */ { TableEncodingData *dataPtr = (TableEncodingData *)clientData; @@ -3772,7 +3779,7 @@ TableFreeProc( static int EscapeToUtfProc( - void *clientData, /* EscapeEncodingData that specifies + void *clientData, /* EscapeEncodingData that specifies * encoding. */ const char *src, /* Source string in specified encoding. */ int srcLen, /* Source string length in bytes. */ @@ -3985,7 +3992,7 @@ EscapeToUtfProc( static int EscapeFromUtfProc( - void *clientData, /* EscapeEncodingData that specifies + void *clientData, /* EscapeEncodingData that specifies * encoding. */ const char *src, /* Source string in UTF-8. */ int srcLen, /* Source string length in bytes. */ @@ -4071,6 +4078,15 @@ EscapeFromUtfProc( break; } len = TclUtfToUniChar(src, &ch); + if (ch > 0xFFFF) { + /* Bug 201c7a3aa6 crash - tables are 256x256 (64K) */ + if (PROFILE_STRICT(flags)) { + result = TCL_CONVERT_SYNTAX; + break; + } + /* Will be encoded as encoding specific replacement below */ + ch = UNICODE_REPLACE_CHAR; + } word = tableFromUnicode[(ch >> 8)][ch & 0xFF]; if ((word == 0) && (ch != 0)) { @@ -4196,7 +4212,7 @@ EscapeFromUtfProc( static void EscapeFreeProc( - void *clientData) /* EscapeEncodingData that specifies + void *clientData) /* EscapeEncodingData that specifies * encoding. */ { EscapeEncodingData *dataPtr = (EscapeEncodingData *)clientData; @@ -4401,7 +4417,7 @@ int TclEncodingProfileNameToId( Tcl_Interp *interp, /* For error messages. May be NULL */ const char *profileName, /* Name of profile */ - int *profilePtr) /* Output */ + int *profilePtr) /* Output */ { size_t i; size_t numProfiles = sizeof(encodingProfiles) / sizeof(encodingProfiles[0]); @@ -4413,21 +4429,19 @@ TclEncodingProfileNameToId( } } if (interp) { - Tcl_Obj *errorObj; /* This code assumes at least two profiles :-) */ - errorObj = - Tcl_ObjPrintf("bad profile name \"%s\": must be", + Tcl_Obj *errorObj = Tcl_ObjPrintf("bad profile name \"%s\": must be", profileName); for (i = 0; i < (numProfiles - 1); ++i) { Tcl_AppendStringsToObj( - errorObj, " ", encodingProfiles[i].name, ",", (char *)NULL); + errorObj, " ", encodingProfiles[i].name, ",", (char *)NULL); } Tcl_AppendStringsToObj( - errorObj, " or ", encodingProfiles[numProfiles-1].name, (char *)NULL); + errorObj, " or ", encodingProfiles[numProfiles-1].name, (char *)NULL); Tcl_SetObjResult(interp, errorObj); Tcl_SetErrorCode( - interp, "TCL", "ENCODING", "PROFILE", profileName, (char *)NULL); + interp, "TCL", "ENCODING", "PROFILE", profileName, (char *)NULL); } return TCL_ERROR; } @@ -4460,13 +4474,10 @@ TclEncodingProfileIdToName( } } if (interp) { - Tcl_SetObjResult( - interp, - Tcl_ObjPrintf( - "Internal error. Bad profile id \"%d\".", - profileValue)); + Tcl_SetObjResult(interp, Tcl_ObjPrintf( + "Internal error. Bad profile id \"%d\".", profileValue)); Tcl_SetErrorCode( - interp, "TCL", "ENCODING", "PROFILEID", (char *)NULL); + interp, "TCL", "ENCODING", "PROFILEID", (char *)NULL); } return NULL; } @@ -4487,19 +4498,20 @@ TclEncodingProfileIdToName( *------------------------------------------------------------------------ */ void -TclGetEncodingProfiles(Tcl_Interp *interp) +TclGetEncodingProfiles( + Tcl_Interp *interp) { size_t i, n; Tcl_Obj *objPtr; n = sizeof(encodingProfiles) / sizeof(encodingProfiles[0]); objPtr = Tcl_NewListObj(n, NULL); for (i = 0; i < n; ++i) { - Tcl_ListObjAppendElement( - interp, objPtr, Tcl_NewStringObj(encodingProfiles[i].name, TCL_INDEX_NONE)); + Tcl_ListObjAppendElement(interp, objPtr, + Tcl_NewStringObj(encodingProfiles[i].name, TCL_INDEX_NONE)); } Tcl_SetObjResult(interp, objPtr); } - + /* * Local Variables: * mode: c