Skip to content

Commit

Permalink
New --valid-letters option
Browse files Browse the repository at this point in the history
  • Loading branch information
jcburley committed Feb 25, 2021
1 parent 2ea7a31 commit 36435f1
Show file tree
Hide file tree
Showing 3 changed files with 71 additions and 3 deletions.
18 changes: 18 additions & 0 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -139,6 +139,24 @@ test.clj:1:1: Parse warning: let form with empty body
```
The output format is as follows: `<filename>:<line>:<column>: <issue type>: <message>`, where `<issue type>` can be `Read error`, `Parse error`, `Parse warning` or `Exception`.

### Valid Symbol (and Keyword) Characters

The `--valid-letters` option specifies valid characters for symbol and keyword "letters", which are the non-special, non-digit characters that make up symbol names.

While Clojure 1.10 allows, for example, em dashes in symbol names (which can lead to confusion), Joker does not; by default, it allows only Unicode letters (category L). E.g. `(def é "hey")` works as expected. `--valid-letters letters` specifies this explicitly.

Earlier versions of Joker allowed any Unicode code points beyond MaxLatin1 (255, aka `0xff`) in addition to the ASCII letters (`[A-Z][a-z]`). This is more consistent with Clojure, in that it allows em dashes; e.g. `(def a–b "wow")`. Specify `--valid-letters unicode` for this behavior.

Some developers prefer "strict ASCII". Joker supports this for symbol letters (though not for digits, which may be any Unicode digits) via `--valid-letters ascii`.

On the other hand, `--valid-letters any` allows any character (not otherwise special, such as delimeters), including ASCII control characters.

Note that `--valid-letters` affects only how symbols are parsed; `joker.core/symbol` is able to form a symbol from any string, regardless of content.

Also note that, though most closely associated with linting, `--valid-letters` governs how symbols are parsed regardless of mode of operation (linting, running scripts, formatting, etc).

Finally, the environment variable `JOKER_VALID_LETTERS` may be used to specify the default for `--valid-letters`.

### Integration with editors

- Emacs: [flycheck syntax checker](https://github.com/candid82/flycheck-joker)
Expand Down
24 changes: 21 additions & 3 deletions core/read.go
Original file line number Diff line number Diff line change
Expand Up @@ -387,14 +387,32 @@ func readNumber(reader *Reader) Object {
return scanInt(str, str, 0, reader)
}

func AnyRuneIsValid(r rune) bool {
return !isDelimiter(r)
}

func AnyUnicodeIsValid(r rune) bool {
return unicode.IsLetter(r) || r > unicode.MaxLatin1
}

func AnyUnicodeLetterIsValid(r rune) bool {
return unicode.IsLetter(r)
}

func AnyASCIILetterIsValid(r rune) bool {
return r <= unicode.MaxASCII && unicode.IsLetter(r)
}

var IsValidLetterFn = AnyUnicodeLetterIsValid

func isSymbolInitial(r rune) bool {
switch r {
case '*', '+', '!', '-', '_', '?', ':', '=', '<', '>', '&', '%', '$', '|':
return true
case '.':
return DIALECT != CLJS
}
return unicode.IsLetter(r) || r > 255
return IsValidLetterFn(r)
}

func isSymbolRune(r rune) bool {
Expand Down Expand Up @@ -1208,8 +1226,6 @@ func Read(reader *Reader) (Object, bool) {
return readSymbol(reader, r), false
}
return readArgSymbol(reader), false
case isSymbolInitial(r):
return readSymbol(reader, r), false
case r == '"':
return readString(reader), false
case r == '(':
Expand Down Expand Up @@ -1271,6 +1287,8 @@ func Read(reader *Reader) (Object, bool) {
return readWithMeta(reader), false
case r == '#':
return readDispatch(reader)
case isSymbolInitial(r):
return readSymbol(reader, r), false
case r == EOF:
panic(MakeReadError(reader, "Unexpected end of file"))
}
Expand Down
32 changes: 32 additions & 0 deletions main.go
Original file line number Diff line number Diff line change
Expand Up @@ -369,6 +369,22 @@ func dialectFromArg(arg string) Dialect {
return UNKNOWN
}

func setValidLetters(arg, what string) {
switch strings.ToLower(arg) {
case "ascii":
IsValidLetterFn = AnyASCIILetterIsValid
case "letters":
IsValidLetterFn = AnyUnicodeLetterIsValid
case "unicode":
IsValidLetterFn = AnyUnicodeIsValid
case "any":
IsValidLetterFn = AnyRuneIsValid
default:
fmt.Fprintf(Stderr, "Error: Unrecognized %s '%s'.\n", what, arg)
ExitJoker(18)
}
}

func usage(out io.Writer) {
fmt.Fprintf(out, "Joker - %s\n\n", VERSION)
fmt.Fprintln(out, "Usage: joker [args] [-- <repl-args>] starts a repl")
Expand Down Expand Up @@ -416,6 +432,12 @@ func usage(out io.Writer) {
fmt.Fprintln(out, " --dialect <dialect>")
fmt.Fprintln(out, " Set input dialect (\"clj\", \"cljs\", \"joker\", \"edn\") for linting;")
fmt.Fprintln(out, " default is inferred from <filename> suffix, if any.")
fmt.Fprintln(out, " --valid-letters <runes>")
fmt.Fprintln(out, " Set valid runes for non-special/non-digit characters in symbol names; <runes> is:")
fmt.Fprintln(out, " \"ascii\", denoting only [A-Za-z]")
fmt.Fprintln(out, " \"letters\" (default), any Unicode Letter (category L)")
fmt.Fprintln(out, " \"unicode\", [A-Za-z] or any Unicode code point beyond ASCII/Latin1 (255)")
fmt.Fprintln(out, " \"any\", any character (that has no other specific meaning in Joker)")
fmt.Fprintln(out, " --hashmap-threshold <n>")
fmt.Fprintln(out, " Set HASHMAP_THRESHOLD accordingly (internal magic of some sort).")
fmt.Fprintln(out, " --profiler <type>")
Expand Down Expand Up @@ -488,6 +510,9 @@ func parseArgs(args []string) {
} else {
classPath = ""
}
if v, ok := os.LookupEnv("JOKER_VALID_LETTERS"); ok {
setValidLetters(v, "JOKER_VALID_LETTERS environment-variable value")
}
var i int
for i = 1; i < length; i++ { // shift
if debugOut != nil {
Expand Down Expand Up @@ -567,6 +592,13 @@ func parseArgs(args []string) {
} else {
missing = true
}
case "--valid-letters":
if i < length-1 && notOption(args[i+1]) {
i += 1 // shift
setValidLetters(args[i], "--valid-letters argument")
} else {
missing = true
}
case "--hashmap-threshold":
if i < length-1 && notOption(args[i+1]) {
i += 1 // shift
Expand Down

0 comments on commit 36435f1

Please sign in to comment.