Skip to content

Commit

Permalink
feat(go): Implement utf16 to string conversion function
Browse files Browse the repository at this point in the history
Signed-off-by: LiangliangSui <[email protected]>
  • Loading branch information
LiangliangSui committed Apr 24, 2024
1 parent ca2f873 commit 2be90fa
Show file tree
Hide file tree
Showing 2 changed files with 48 additions and 0 deletions.
19 changes: 19 additions & 0 deletions go/fury/util.go
Original file line number Diff line number Diff line change
Expand Up @@ -20,8 +20,10 @@ package fury
import (
"bytes"
"encoding/binary"
"fmt"
"reflect"
"time"
"unicode/utf16"
"unsafe"
)

Expand Down Expand Up @@ -126,3 +128,20 @@ func GetUnixMicro(t time.Time) int64 {
func CreateTimeFromUnixMicro(usec int64) time.Time {
return time.Unix(usec/1e6, (usec%1e6)*1e3)
}

// UTF16LEToString Convert UTF16 encoded data to string.
func UTF16LEToString(utf16Data []byte) (string, error) {
dataLen := len(utf16Data)
if dataLen%2 != 0 {
return "", fmt.Errorf("%v is not UTF 16 encoded data, "+
"or the data(len: %d) is truncated", utf16Data, dataLen)
}

runes := make([]uint16, dataLen/2)
for i := 0; i < len(utf16Data); i += 2 {
uint16Val := uint16(utf16Data[i]) | uint16(utf16Data[i+1])<<8
runes[i/2] = uint16Val
}

return string(utf16.Decode(runes)), nil
}
29 changes: 29 additions & 0 deletions go/fury/util_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -43,3 +43,32 @@ func TestTime(t *testing.T) {
require.Equal(t, t1.Nanosecond()/1000, t2.Nanosecond()/1000)
require.WithinDuration(t, t1, t2, 1000)
}

func TestUTF16LEToString(t *testing.T) {
// hello世界
utf16Data := []byte{
0b01101000, 0b00000000,
0b01100101, 0b00000000,
0b01101100, 0b00000000,
0b01101100, 0b00000000,
0b01101111, 0b00000000,
0b00010110, 0b01001110,
0b01001100, 0b01110101,
}
strData, err := UTF16LEToString(utf16Data)
require.NoError(t, err)
require.Equal(t, "hello世界", strData)

utf16ErrorData := []byte{0b01101000, 0b00000000, 0b01100101}
strData, err = UTF16LEToString(utf16ErrorData)
require.Error(t, err)

// 𝄞 U+1D11E(UTF16 four bytes encode)
utf16FourBytesData := []byte{
0b00110100, 0b11011000, 0b00011110, 0b11011101,
}

specialStr, err := UTF16LEToString(utf16FourBytesData)
require.NoError(t, err)
require.Equal(t, specialStr, "𝄞")
}

0 comments on commit 2be90fa

Please sign in to comment.