Skip to content

Commit

Permalink
Handle non-UTF-8 CLI args (#7)
Browse files Browse the repository at this point in the history
* Handle non-UTF-8 CLI args

* Rename InvalidUnicode tag to InvalidUtf8
  • Loading branch information
smores56 authored Dec 21, 2024
1 parent 7911e8d commit 99dbd5d
Show file tree
Hide file tree
Showing 16 changed files with 1,225 additions and 216 deletions.
6 changes: 2 additions & 4 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -31,11 +31,9 @@ import weaver.Opt
import weaver.Cli
import weaver.Param
main! = \{} ->
args = Arg.list! {}
main! = \args ->
data =
Cli.parse_or_display_message cli_parser args
Cli.parse_or_display_message cli_parser args Arg.to_os_raw
|> try Result.onErr! \message ->
try Stdout.line! message
Err (Exit 1 "")
Expand Down
6 changes: 2 additions & 4 deletions examples/basic.roc
Original file line number Diff line number Diff line change
Expand Up @@ -9,11 +9,9 @@ import weaver.Opt
import weaver.Cli
import weaver.Param

main! = \{} ->
args = Arg.list! {}

main! = \args ->
data =
Cli.parse_or_display_message cli_parser args
Cli.parse_or_display_message cli_parser args Arg.to_os_raw
|> try Result.onErr! \message ->
try Stdout.line! message
Err (Exit 1 "")
Expand Down
6 changes: 2 additions & 4 deletions examples/default-values.roc
Original file line number Diff line number Diff line change
Expand Up @@ -9,11 +9,9 @@ import weaver.Opt
import weaver.Cli
import weaver.Param

main! = \{} ->
args = Arg.list! {}

main! = \args ->
data =
Cli.parse_or_display_message cli_parser args
Cli.parse_or_display_message cli_parser args Arg.to_os_raw
|> try Result.onErr! \message ->
try Stdout.line! message
Err (Exit 1 "")
Expand Down
6 changes: 2 additions & 4 deletions examples/single-arg.roc
Original file line number Diff line number Diff line change
Expand Up @@ -8,11 +8,9 @@ import pf.Stdout
import weaver.Opt
import weaver.Cli

main! = \{} ->
args = Arg.list! {}

main! = \args ->
data =
Cli.parse_or_display_message cli_parser args
Cli.parse_or_display_message cli_parser args Arg.to_os_raw
|> try Result.onErr! \message ->
try Stdout.line! message
Err (Exit 1 "")
Expand Down
6 changes: 2 additions & 4 deletions examples/subcommands.roc
Original file line number Diff line number Diff line change
Expand Up @@ -10,11 +10,9 @@ import weaver.Cli
import weaver.Param
import weaver.SubCmd

main! = \{} ->
args = Arg.list! {}

main! = \args ->
data =
Cli.parse_or_display_message cli_parser args
Cli.parse_or_display_message cli_parser args Arg.to_os_raw
|> try Result.onErr! \message ->
try Stdout.line! message
Err (Exit 1 "")
Expand Down
98 changes: 98 additions & 0 deletions package/Arg.roc
Original file line number Diff line number Diff line change
@@ -0,0 +1,98 @@
module [
Arg,
from_raw_arg,
to_raw_arg,
from_str,
to_str,
to_bytes,
display,
]

## An OS-aware representation of a command-line argument.
##
## Though we tend to think of args as Unicode strings, most operating systems
## represent command-line arguments as lists of bytes that aren't necessarily
## UTF-8 encoded. Windows doesn't even use bytes, but U16s.
##
## Most of the time, you will pass these to packages and they will handle the
## encoding for you, but for quick-and-dirty code you can use [display] to
## convert these to [Str] in a lossy way.
Arg := [Unix (List U8), Windows (List U16)] implements [Eq, Inspect { toInspector: arg_inspector }]

arg_inspector : Arg -> Inspector f where f implements InspectFormatter
arg_inspector = \arg -> Inspect.str (display arg)

## Wrap a raw, OS-aware numeric list into an [Arg].
from_raw_arg : [Unix (List U8), Windows (List U16)] -> Arg
from_raw_arg = \raw_arg -> @Arg raw_arg

## Unwrap an [Arg] into a raw, OS-aware numeric list.
##
## This is a good way to pass [Arg]s to Roc packages.
to_raw_arg : Arg -> [Unix (List U8), Windows (List U16)]
to_raw_arg = \@Arg raw_arg -> raw_arg

## Encode a UTF-8 [Str] to a Unix-flavored [Arg].
from_str : Str -> Arg
from_str = \str ->
@Arg (Unix (Str.toUtf8 str))

## Attempt to decode an [Arg] to a UTF-8 [Str].
to_str : Arg -> Result Str [InvalidUtf8]
to_str = \@Arg arg ->
# TODO: update when Unicode -> Str conversion is ready:
# https://github.com/roc-lang/roc/issues/7390
when arg is
Unix unix ->
Str.fromUtf8 unix
|> Result.mapErr \_err -> InvalidUtf8

Windows _windows -> Err InvalidUtf8

## Convert an [Arg] to a list of bytes.
to_bytes : Arg -> List U8
to_bytes = \@Arg arg ->
when arg is
Unix unix -> unix
Windows windows ->
# avoid intermediate list resizing allocations by
# appending to a list instead of using `List.join_map`
helper = \codepoints, bytes ->
when codepoints is
[] -> bytes
[codepoint, .. as rest] ->
lower = codepoint |> Num.toU8
upper =
codepoint
|> Num.shiftRightBy 8
|> Num.toU8

updatedBytes =
bytes
|> List.append upper
|> List.append lower

helper rest updatedBytes

bytesOut = List.withCapacity (2 * List.len windows)

helper windows bytesOut

## Convert an Arg to a `Str` for display purposes.
##
## NB: This currently only supports valid UTF-8 Unix strings. Once Roc adds
## support for lossy conversion of Unicode to Str, this will replace invalid
## codepoints with the Unicode replacement character "\uFFFD".
display : Arg -> Str
display = \@Arg arg ->
# TODO: update when Unicode -> Str conversion is ready:
# https://github.com/roc-lang/roc/issues/7390
when arg is
Unix unix ->
when Str.fromUtf8 unix is
Ok str -> str
Err err -> crash "Invalid UTF-8 string: $(Inspect.toStr err)"

Windows _windows ->
crash "Windows args cannot currently be displayed"

13 changes: 7 additions & 6 deletions package/Base.roc
Original file line number Diff line number Diff line change
Expand Up @@ -34,7 +34,8 @@ module [
SubcommandsConfig,
]

import Parser exposing [Arg]
import Arg exposing [Arg]
import Parser exposing [ParsedArg]

## The result of attempting to parse args into config data.
ArgParserResult a : [
Expand All @@ -46,11 +47,11 @@ ArgParserResult a : [

## The parameters that an [ArgParser] takes to extract data
## from args.
ArgParserParams : { args : List Arg, subcommand_path : List Str }
ArgParserParams : { args : List ParsedArg, subcommand_path : List Str }

## The intermediate state that an [ArgParser] passes between
## different parsing steps.
ArgParserState a : { data : a, remaining_args : List Arg, subcommand_path : List Str }
ArgParserState a : { data : a, remaining_args : List ParsedArg, subcommand_path : List Str }

## A function that takes command line arguments and a subcommand,
## and attempts to extract configuration data from said arguments.
Expand Down Expand Up @@ -95,7 +96,7 @@ ArgExtractErr : [
MissingParam ParameterConfig,
UnrecognizedShortArg Str,
UnrecognizedLongArg Str,
ExtraParamProvided Str,
ExtraParamProvided Arg,
]

str_type_name = "str"
Expand All @@ -113,12 +114,12 @@ Plurality : [Optional, One, Many]
## The two built-in flags that we parse automatically.
SpecialFlags : { help : Bool, version : Bool }

InvalidValue : [InvalidNumStr, InvalidValue Str]
InvalidValue : [InvalidNumStr, InvalidValue Str, InvalidUtf8]

DefaultValue a : [NoDefault, Value a, Generate ({} -> a)]

## A parser that extracts an argument value from a string.
ValueParser a : Str -> Result a InvalidValue
ValueParser a : Arg -> Result a InvalidValue

OptionConfigBaseParams : {
short ? Str,
Expand Down
15 changes: 8 additions & 7 deletions package/Builder.roc
Original file line number Diff line number Diff line change
Expand Up @@ -16,6 +16,7 @@ module [
check_for_help_and_version,
]

import Arg
import Base exposing [
ArgParser,
ArgParserState,
Expand All @@ -29,7 +30,7 @@ import Base exposing [
ParameterConfig,
SubcommandConfig,
]
import Parser exposing [Arg]
import Parser exposing [ParsedArg]

GetOptionsAction : { get_options : {} }
GetParamsAction : { get_params : {} }
Expand All @@ -42,7 +43,7 @@ CliBuilder data from_action to_action := {
subcommands : Dict Str SubcommandConfig,
}

from_arg_parser : (List Arg -> Result { data : data, remaining_args : List Arg } ArgExtractErr) -> CliBuilder data from_action to_action
from_arg_parser : (List ParsedArg -> Result { data : data, remaining_args : List ParsedArg } ArgExtractErr) -> CliBuilder data from_action to_action
from_arg_parser = \parser ->
new_parser = \{ args, subcommand_path } ->
when parser args is
Expand Down Expand Up @@ -86,7 +87,7 @@ set_parser = \@CliBuilder builder, parser ->
parser,
}

update_parser : CliBuilder state from_action to_action, ({ data : state, remaining_args : List Arg } -> Result { data : next_state, remaining_args : List Arg } ArgExtractErr) -> CliBuilder next_state from_action to_action
update_parser : CliBuilder state from_action to_action, ({ data : state, remaining_args : List ParsedArg } -> Result { data : next_state, remaining_args : List ParsedArg } ArgExtractErr) -> CliBuilder next_state from_action to_action
update_parser = \@CliBuilder builder, updater ->
new_parser =
on_successful_arg_parse builder.parser \{ data, remaining_args, subcommand_path } ->
Expand Down Expand Up @@ -152,7 +153,7 @@ combine = \@CliBuilder left, @CliBuilder right, combiner ->
subcommands: Dict.insertAll left.subcommands right.subcommands,
}

flag_was_passed : OptionConfig, List Arg -> Bool
flag_was_passed : OptionConfig, List ParsedArg -> Bool
flag_was_passed = \option, args ->
List.any args \arg ->
when arg is
Expand Down Expand Up @@ -188,7 +189,7 @@ expect
|> map Inspected
|> into_parts

out = parser { args: [Parameter "123"], subcommand_path: [] }
out = parser { args: [Parameter (Arg.from_str "123")], subcommand_path: [] }

out
== SuccessfullyParsed {
Expand All @@ -198,7 +199,7 @@ expect
}

expect
args = [Parameter "-h"]
args = [Parameter (Arg.from_str "-h")]

flag_was_passed help_option args |> Bool.not

Expand All @@ -213,6 +214,6 @@ expect
flag_was_passed help_option args

expect
args = [Long { name: "help", value: Ok "123" }]
args = [Long { name: "help", value: Ok (Arg.from_str "123") }]

flag_was_passed help_option args
26 changes: 16 additions & 10 deletions package/Cli.roc
Original file line number Diff line number Diff line change
Expand Up @@ -76,7 +76,7 @@
##
## expect
## cliParser
## |> Cli.parse_or_display_message ["example", "-a", "123", "-vvv", "file.txt", "file-2.txt"]
## |> Cli.parse_or_display_message ["example", "-a", "123", "-vvv", "file.txt", "file-2.txt"] Arg.to_os_raw
## == Ok { alpha: 123, verbosity: 3, files: ["file.txt", "file-2.txt"] }
## ```
##
Expand Down Expand Up @@ -110,7 +110,8 @@ import Base exposing [
CliConfigParams,
map_successfully_parsed,
]
import Parser exposing [Arg, parse_args]
import Arg exposing [Arg]
import Parser exposing [ParsedArg, parse_args]
import Builder exposing [CliBuilder]
import Validate exposing [validate_cli, CliValidationErr]
import ErrorFormatter exposing [
Expand All @@ -122,7 +123,7 @@ import Help exposing [help_text, usage_help]
## A parser that interprets command line arguments and returns well-formed data.
CliParser state : {
config : CliConfig,
parser : List Str -> ArgParserResult state,
parser : List Arg -> ArgParserResult state,
text_style : TextStyle,
}

Expand Down Expand Up @@ -171,7 +172,7 @@ weave = \left, right, combiner ->
Builder.combine left right combiner

## Fail the parsing process if any arguments are left over after parsing.
ensure_all_args_were_parsed : List Arg -> Result {} ArgExtractErr
ensure_all_args_were_parsed : List ParsedArg -> Result {} ArgExtractErr
ensure_all_args_were_parsed = \remaining_args ->
when remaining_args is
[] -> Ok {}
Expand Down Expand Up @@ -337,7 +338,7 @@ assert_valid = \result ->
##
## expect
## exampleCli
## |> Cli.parse_or_display_message ["example", "-h"]
## |> Cli.parse_or_display_message ["example", "-h"] Arg.to_os_raw
## == Err
## """
## example v0.1.0
Expand All @@ -356,17 +357,17 @@ assert_valid = \result ->
##
## expect
## exampleCli
## |> Cli.parse_or_display_message ["example", "-V"]
## |> Cli.parse_or_display_message ["example", "-V"] Arg.to_os_raw
## == Err "v0.1.0"
##
## expect
## exampleCli
## |> Cli.parse_or_display_message ["example", "-v"]
## |> Cli.parse_or_display_message ["example", "-v"] Arg.to_os_raw
## == Ok { verbosity: 1 }
##
## expect
## exampleCli
## |> Cli.parse_or_display_message ["example", "-x"]
## |> Cli.parse_or_display_message ["example", "-x"] Arg.to_os_raw
## == Err
## """
## Error: The argument -x was not recognized.
Expand All @@ -375,8 +376,13 @@ assert_valid = \result ->
## example [OPTIONS]
## """
## ```
parse_or_display_message : CliParser data, List Str -> Result data Str
parse_or_display_message = \parser, args ->
parse_or_display_message : CliParser data, List arg, (arg -> [Unix (List U8), Windows (List U16)]) -> Result data Str
parse_or_display_message = \parser, external_args, to_raw_arg ->
args =
external_args
|> List.map to_raw_arg
|> List.map Arg.from_raw_arg

when parser.parser args is
SuccessfullyParsed data -> Ok data
ShowHelp { subcommand_path } -> Err (help_text parser.config subcommand_path parser.text_style)
Expand Down
4 changes: 2 additions & 2 deletions package/CliTest.roc
Original file line number Diff line number Diff line change
Expand Up @@ -11,7 +11,7 @@ basic_cli =

expect
basic_cli
|> Cli.parse_or_display_message ["basic-cli", "-a", "123"]
|> Cli.parse_or_display_message ["basic-cli", "-a", "123"] \a -> Unix (Str.toUtf8 a)
== Ok (Alpha 123)

expect
Expand All @@ -29,5 +29,5 @@ expect
"""

basic_cli
|> Cli.parse_or_display_message ["basic-cli", "-h"]
|> Cli.parse_or_display_message ["basic-cli", "-h"] \a -> Unix (Str.toUtf8 a)
== Err help_message
Loading

0 comments on commit 99dbd5d

Please sign in to comment.