Skip to content

Commit

Permalink
Fix [66da4d4228] - UTF-16 encoder buffer overflow. More encoding tests
Browse files Browse the repository at this point in the history
  • Loading branch information
apnadkarni committed Oct 21, 2024
2 parents 1b58229 + 770e4ad commit 712e9c0
Show file tree
Hide file tree
Showing 2 changed files with 62 additions and 1 deletion.
12 changes: 11 additions & 1 deletion generic/tclEncoding.c
Original file line number Diff line number Diff line change
Expand Up @@ -3273,12 +3273,16 @@ UtfToUtf16Proc(
ch = UNICODE_REPLACE_CHAR;
}
}
src += len;
if (flags & TCL_ENCODING_LE) {
if (ch <= 0xFFFF) {
*dst++ = (ch & 0xFF);
*dst++ = (ch >> 8);
} else {
if ((dst+2) > dstEnd) {
/* Surrogates need 2 more bytes! Bug [66da4d4228] */
result = TCL_CONVERT_NOSPACE;
break;
}
*dst++ = (((ch - 0x10000) >> 10) & 0xFF);
*dst++ = (((ch - 0x10000) >> 18) & 0x3) | 0xD8;
*dst++ = (ch & 0xFF);
Expand All @@ -3289,12 +3293,18 @@ UtfToUtf16Proc(
*dst++ = (ch >> 8);
*dst++ = (ch & 0xFF);
} else {
if ((dst+2) > dstEnd) {
/* Surrogates need 2 more bytes! Bug [66da4d4228] */
result = TCL_CONVERT_NOSPACE;
break;
}
*dst++ = (((ch - 0x10000) >> 18) & 0x3) | 0xD8;
*dst++ = (((ch - 0x10000) >> 10) & 0xFF);
*dst++ = ((ch >> 8) & 0x3) | 0xDC;
*dst++ = (ch & 0xFF);
}
}
src += len;
}
*srcReadPtr = src - srcStart;
*dstWrotePtr = dst - dstStart;
Expand Down
51 changes: 51 additions & 0 deletions tests/utfext.test
Original file line number Diff line number Diff line change
Expand Up @@ -224,6 +224,52 @@ namespace eval utftest {
}
}

proc testspacelimit {direction enc comment hexin hexout} {
set id $comment-[join $hexin ""]-spacelimit

# Triple the input to avoid pathological short input case where
# whereby nothing is written to output. The test below
# requires $nchars > 0
set hexin $hexin$hexin$hexin
set hexout $hexout$hexout$hexout

set flags [list start end]
set constraints [list testencoding]

set maxchars [llength $hexout]
set in [binary decode hex $hexin]
set out [binary decode hex $hexout]
set dstlen [expr {[string length $out] - 1}]; # Smaller buffer than needed

if {$direction eq "toutf"} {
set cmd Tcl_ExternalToUtf
set str [encoding convertfrom $enc $in]
} else {
set cmd Tcl_UtfToExternal
set str [encoding convertfrom $enc $out]
}

# Note the tests are loose because the some encoding operations will
# stop even there is actually still room in the destination. For example,
# below only one char is written though there is room in the output.
# % testencoding Tcl_ExternalToUtf ascii abc {start end} {} 5 nread nwritten nchars
# nospace {} aÿÿÿ#
# % puts $nread,$nwritten,$nchars
# 1,1,1
#

test $cmd-$enc-$id-[join $flags -] "$cmd - $enc - $hexin - $flags" \
-constraints $constraints \
-body {
lassign [testencoding $cmd $enc $in $flags {} $dstlen nread nwritten nchars] status state buf
list \
$status \
[expr {$nread < [string length $in]}] \
[expr {$nwritten <= $dstlen}] \
[expr {$nchars > 0 && $nchars < [string length $str]}] \
[expr {[string range $out 0 $nwritten-1] eq [string range $buf 0 $nwritten-1]}]
} -result {nospace 1 1 1 1}
}

#
# Basic tests
Expand All @@ -249,9 +295,14 @@ namespace eval utftest {

# Char limits - note no fromutf as Tcl_UtfToExternal does not support it
testcharlimit toutf $enc $comment $hex $utfhex

# Space limits
testspacelimit toutf $enc $comment $hex $utfhex
testspacelimit fromutf $enc $comment $utfhex $hex
}
}


# Special cases - cesu2 high and low surrogates in separate fragments
# This will (correctly) return "ok", not "multibyte" after first frag
testfragment toutf cesu-8 nonbmp-split-surr \
Expand Down

0 comments on commit 712e9c0

Please sign in to comment.