Skip to content

Commit

Permalink
supports endianness
Browse files Browse the repository at this point in the history
Signed-off-by: LiangliangSui <[email protected]>
  • Loading branch information
LiangliangSui committed Apr 28, 2024
1 parent 2be90fa commit 224df86
Show file tree
Hide file tree
Showing 2 changed files with 64 additions and 26 deletions.
18 changes: 14 additions & 4 deletions go/fury/util.go
Original file line number Diff line number Diff line change
Expand Up @@ -129,19 +129,29 @@ func CreateTimeFromUnixMicro(usec int64) time.Time {
return time.Unix(usec/1e6, (usec%1e6)*1e3)
}

// UTF16LEToString Convert UTF16 encoded data to string.
func UTF16LEToString(utf16Data []byte) (string, error) {
// UTF16ToString Convert UTF16 encoded data to string.
func UTF16ToString(utf16Data []byte, isLittleEndian bool) (string, error) {
dataLen := len(utf16Data)
if dataLen%2 != 0 {
return "", fmt.Errorf("%v is not UTF 16 encoded data, "+
return "", fmt.Errorf("%v is not UTF16 encoded data, "+
"or the data(len: %d) is truncated", utf16Data, dataLen)
}

runes := make([]uint16, dataLen/2)
for i := 0; i < len(utf16Data); i += 2 {
uint16Val := uint16(utf16Data[i]) | uint16(utf16Data[i+1])<<8
firstOffset := 0
secondOffset := 0
if isLittleEndian {
firstOffset = 0
secondOffset = 1
} else {
firstOffset = 1
secondOffset = 0
}
uint16Val := uint16(utf16Data[i+firstOffset]) | uint16(utf16Data[i+secondOffset])<<8
runes[i/2] = uint16Val
}

return string(utf16.Decode(runes)), nil
}

72 changes: 50 additions & 22 deletions go/fury/util_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -44,31 +44,59 @@ func TestTime(t *testing.T) {
require.WithinDuration(t, t1, t2, 1000)
}

func TestUTF16LEToString(t *testing.T) {
// hello世界
utf16Data := []byte{
0b01101000, 0b00000000,
0b01100101, 0b00000000,
0b01101100, 0b00000000,
0b01101100, 0b00000000,
0b01101111, 0b00000000,
0b00010110, 0b01001110,
0b01001100, 0b01110101,
}
strData, err := UTF16LEToString(utf16Data)
require.NoError(t, err)
require.Equal(t, "hello世界", strData)
type UTF16TestBean struct {
UTF16Data []byte
expectedValue string
isLittleEndian bool
}

utf16ErrorData := []byte{0b01101000, 0b00000000, 0b01100101}
strData, err = UTF16LEToString(utf16ErrorData)
require.Error(t, err)
func TestUTF16ToString(t *testing.T) {
data := []UTF16TestBean{
{
[]byte{
0b01101000, 0b00000000,
0b01100101, 0b00000000,
0b01101100, 0b00000000,
0b01101100, 0b00000000,
0b01101111, 0b00000000,
0b00010110, 0b01001110,
0b01001100, 0b01110101,
},
"hello世界",
true,
},
{
[]byte{
0b00110100, 0b11011000, 0b00011110, 0b11011101,
},
// U+1D11E(UTF16 four bytes encode)
"𝄞",
true,
},
{
[]byte{
0b11011000, 0b00110100, 0b11011101, 0b00011110,
},
// U+1D11E(UTF16 four bytes encode)
"𝄞",
false,
},
}

// 𝄞 U+1D11E(UTF16 four bytes encode)
utf16FourBytesData := []byte{
0b00110100, 0b11011000, 0b00011110, 0b11011101,
for _, value := range data {
check(t, value.UTF16Data, value.expectedValue, value.isLittleEndian)
}
}

specialStr, err := UTF16LEToString(utf16FourBytesData)
func check(t *testing.T, utf16Data []byte, expectedValue string, isLittleEndian bool) {
strData, err := UTF16ToString(utf16Data, isLittleEndian)
require.NoError(t, err)
require.Equal(t, specialStr, "𝄞")
require.Equal(t, expectedValue, strData)
}

func TestUTF16ToStringError(t *testing.T) {
utf16ErrorData := []byte{0b01101000, 0b00000000, 0b01100101}
_, err := UTF16ToString(utf16ErrorData, true)
require.Error(t, err)
}

0 comments on commit 224df86

Please sign in to comment.