Skip to content

Commit

Permalink
Merge pull request #37 from lukewilliamboswell/fix-unicode
Browse files Browse the repository at this point in the history
Fix unicode
  • Loading branch information
lukewilliamboswell authored Sep 2, 2024
2 parents 459b844 + eebb018 commit 16aedd5
Show file tree
Hide file tree
Showing 5 changed files with 122 additions and 49 deletions.
3 changes: 2 additions & 1 deletion .gitignore
Original file line number Diff line number Diff line change
@@ -1,5 +1,6 @@
examples/optional
examples/simple1
examples/simple2
examples/tuple
.DS_Store
generated-docs/
generated-docs/
5 changes: 2 additions & 3 deletions examples/optional.roc
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,7 @@ import cli.Stdout
import json.Json
import json.OptionOrNull exposing [OptionOrNull]

Object : { firstName: Str, lastName: OptionOrNull Str }
Object : { firstName : Str, lastName : OptionOrNull Str }

main =
noneObj : Object
Expand All @@ -32,5 +32,4 @@ main =
# someJson == {"firstName":"Luke","lastName":"Boswell"}
someJson = Encode.toBytes someObj Json.utf8
Stdout.line (someJson |> Str.fromUtf8 |> Result.withDefault "Failed to encode JSON")



74 changes: 37 additions & 37 deletions examples/simple2.roc
Original file line number Diff line number Diff line change
@@ -1,37 +1,37 @@
app [main] {
cli: platform "https://github.com/roc-lang/basic-cli/releases/download/0.15.0/SlwdbJ-3GR7uBWQo6zlmYWNYOxnvo8r6YABXD-45UOw.tar.br",
json: "../package/main.roc", # use release URL (ends in tar.br) for local example, see github.com/lukewilliamboswell/roc-json/releases
}

import cli.Stdout
import json.Json
import "data.json" as requestBody : List U8

main =
decoder = Json.utf8With {}

decoded : Decode.DecodeResult (List DataRequest)
decoded = Decode.fromBytesPartial requestBody decoder

when decoded.result is
Ok list ->
Stdout.line! "Successfully decoded list"

when List.get list 0 is
Ok rec -> Stdout.line! "Name of first person is: $(rec.lastname)"
Err _ -> Stdout.line! "Error occurred in List.get"

Err TooShort -> Stdout.line! "A TooShort error occurred"

DataRequest : {
id : I64,
firstname : Str,
lastname : Str,
email : Str,
gender : Str,
ipaddress : Str,
}

# =>
# Successfully decoded list
# Name of first person is: Penddreth
app [main] {
cli: platform "https://github.com/roc-lang/basic-cli/releases/download/0.15.0/SlwdbJ-3GR7uBWQo6zlmYWNYOxnvo8r6YABXD-45UOw.tar.br",
json: "../package/main.roc", # use release URL (ends in tar.br) for local example, see github.com/lukewilliamboswell/roc-json/releases
}

import cli.Stdout
import json.Json
import "data.json" as requestBody : List U8

main =
decoder = Json.utf8With {}

decoded : Decode.DecodeResult (List DataRequest)
decoded = Decode.fromBytesPartial requestBody decoder

when decoded.result is
Ok list ->
Stdout.line! "Successfully decoded list"

when List.get list 0 is
Ok rec -> Stdout.line! "Name of first person is: $(rec.lastname)"
Err _ -> Stdout.line! "Error occurred in List.get"

Err TooShort -> Stdout.line! "A TooShort error occurred"

DataRequest : {
id : I64,
firstname : Str,
lastname : Str,
email : Str,
gender : Str,
ipaddress : Str,
}

# =>
# Successfully decoded list
# Name of first person is: Penddreth
6 changes: 3 additions & 3 deletions flake.lock

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

83 changes: 78 additions & 5 deletions package/Json.roc
Original file line number Diff line number Diff line change
Expand Up @@ -1142,10 +1142,83 @@ hexToUtf8 = \a, b, c, d ->
k = jsonHexToDecimal c
l = jsonHexToDecimal d

if i == 0 && j == 0 then
[decimalHexToByte k l]
cp = (16 * 16 * 16 * Num.toU32 i) + (16 * 16 * Num.toU32 j) + (16 * Num.toU32 k) + Num.toU32 l
codepointToUtf8 cp

# Copied from https://github.com/roc-lang/unicode/blob/e1162d49e3a2c57ed711ecdee7dc8537a19479d8/
# from package/CodePoint.roc and modified
codepointToUtf8 : U32 -> List U8
codepointToUtf8 = \u32 ->
if u32 < 0x80 then
[Num.toU8 u32]
else if u32 < 0x800 then
byte1 =
u32
|> Num.shiftRightBy 6
|> Num.bitwiseOr 0b11000000
|> Num.toU8

byte2 =
u32
|> Num.bitwiseAnd 0b111111
|> Num.bitwiseOr 0b10000000
|> Num.toU8

[byte1, byte2]
else if u32 < 0x10000 then
byte1 =
u32
|> Num.shiftRightBy 12
|> Num.bitwiseOr 0b11100000
|> Num.toU8

byte2 =
u32
|> Num.shiftRightBy 6
|> Num.bitwiseAnd 0b111111
|> Num.bitwiseOr 0b10000000
|> Num.toU8

byte3 =
u32
|> Num.bitwiseAnd 0b111111
|> Num.bitwiseOr 0b10000000
|> Num.toU8

[byte1, byte2, byte3]
else if u32 < 0x110000 then
## This was an invalid Unicode scalar value, even though it had the Roc type Scalar.
## This should never happen!
# expect u32 < 0x110000
crash "Impossible"
else
[decimalHexToByte i j, decimalHexToByte k l]
byte1 =
u32
|> Num.shiftRightBy 18
|> Num.bitwiseOr 0b11110000
|> Num.toU8

byte2 =
u32
|> Num.shiftRightBy 12
|> Num.bitwiseAnd 0b111111
|> Num.bitwiseOr 0b10000000
|> Num.toU8

byte3 =
u32
|> Num.shiftRightBy 6
|> Num.bitwiseAnd 0b111111
|> Num.bitwiseOr 0b10000000
|> Num.toU8

byte4 =
u32
|> Num.bitwiseAnd 0b111111
|> Num.bitwiseOr 0b10000000
|> Num.toU8

[byte1, byte2, byte3, byte4]

# Test for \u0074 == U+74 == 't' in Basic Multilingual Plane
expect
Expand All @@ -1162,10 +1235,10 @@ expect
# Test for \u2c64 == U+2C64 == 'Ɽ' in Latin Extended-C
expect
actual = hexToUtf8 '2' 'C' '6' '4'
expected = [44, 100]
expected = [0xE2, 0xB1, 0xA4]
actual == expected

unicodeReplacement = hexToUtf8 'f' 'f' 'd' 'd'
unicodeReplacement = [0xEF, 0xBF, 0xBD]

replaceEscapedChars : { inBytes : List U8, outBytes : List U8 } -> { inBytes : List U8, outBytes : List U8 }
replaceEscapedChars = \{ inBytes, outBytes } ->
Expand Down

0 comments on commit 16aedd5

Please sign in to comment.