Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Support integer literals of different radices and decimal numeric literals #199

Merged
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
104 changes: 102 additions & 2 deletions shared/src/main/scala/mlscript/NewLexer.scala
Original file line number Diff line number Diff line change
Expand Up @@ -23,6 +23,12 @@ class NewLexer(origin: Origin, raise: Diagnostic => Unit, dbg: Bool) {
c.isLetter || c === '_' || c === '\''
def isIdentChar(c: Char): Bool =
isIdentFirstChar(c) || isDigit(c) || c === '\''
def isHexDigit(c: Char): Bool =
isDigit(c) || (c >= 'a' && c <= 'f') || (c >= 'A' && c <= 'F')
def isOctDigit(c: Char): Bool =
c >= '0' && c <= '7'
def isBinDigit(c: Char): Bool =
c === '0' || c === '1'
def isDigit(c: Char): Bool =
c >= '0' && c <= '9'

Expand Down Expand Up @@ -59,15 +65,109 @@ class NewLexer(origin: Origin, raise: Diagnostic => Unit, dbg: Bool) {
if (i < length && pred(bytes(i))) takeWhile(i + 1, bytes(i) :: cur)(pred)
else (cur.reverseIterator.mkString, i)

final def num(i: Int): (Lit, Int) = {
def test(i: Int, p: Char => Bool): Bool = i < length && p(bytes(i))
def zero: IntLit = IntLit(BigInt(0))
/** Take a sequence of digits interleaved with underscores. */
def takeDigits(i: Int, pred: Char => Bool): (Opt[Str], Int) = {
@tailrec def rec(i: Int, acc: Ls[Char], firstSep: Bool, lastSep: Bool): (Str, Bool, Bool, Int) =
if (i < length) {
val c = bytes(i)
if (pred(c)) rec(i + 1, c :: acc, firstSep, false)
else if (c === '_') rec(i + 1, acc, acc.isEmpty, true)
else (acc.reverseIterator.mkString, firstSep, lastSep, i)
}
else (acc.reverseIterator.mkString, firstSep, lastSep, i)
val (str, firstSep, lastSep, j) = rec(i, Nil, false, false)
if (firstSep)
raise(WarningReport(
msg"Leading separator is not allowed" -> S(loc(i - 1, i)) :: Nil,
newDefs = true, source = Lexing))
if (lastSep)
raise(WarningReport(
msg"Trailing separator is not allowed" -> S(loc(j - 1, j)) :: Nil,
newDefs = true, source = Lexing))
(if (str.isEmpty) N else S(str), j)
}
/** Take an integer and coverts to `BigInt`. Also checks if it is empty. */
def integer(i: Int, radix: Int, desc: Str, pred: Char => Bool): (IntLit, Int) = {
takeDigits(i, pred) match {
case (N, j) =>
raise(ErrorReport(msg"Expect at least one $desc digit" -> S(loc(i, i + 2)) :: Nil,
newDefs = true, source = Lexing))
(zero, j)
case (S(str), j) => (IntLit(BigInt(str, radix)), j)
}
}
def isDecimalStart(ch: Char) = ch === '.' || ch === 'e' || ch === 'E'
/** Take a fraction part with an optional exponent part. Call at periods. */
def decimal(i: Int, integral: Str): (DecLit, Int) = {
val (fraction, j) = if (test(i, _ === '.')) {
takeDigits(i + 1, isDigit) match {
case (N, j) =>
raise(ErrorReport(msg"Expect at least one digit after the decimal point" -> S(loc(i + 1, i + 2)) :: Nil,
newDefs = true, source = Lexing))
("", j)
case (S(digits), j) => ("." + digits, j)
}
} else ("", i)
val (exponent, k) = if (test(j, ch => ch === 'e' || ch === 'E')) {
val (sign, k) = if (test(j + 1, ch => ch === '+' || ch === '-')) {
(bytes(j + 1), j + 2)
} else {
('+', j + 1)
}
takeDigits(k, isDigit) match {
case (N, l) =>
raise(ErrorReport(msg"Expect at least one digit after the exponent sign" -> S(loc(l - 1, l)) :: Nil,
newDefs = true, source = Lexing))
("", l)
case (S(digits), l) => ("E" + sign + digits, l)
}
} else {
("", j)
}
(DecLit(BigDecimal(integral + fraction + exponent)), k)
}
if (i < length) {
bytes(i) match {
case '0' if i + 1 < length => bytes(i + 1) match {
case 'x' => integer(i + 2, 16, "hexadecimal", isHexDigit)
case 'o' => integer(i + 2, 8, "octal", isOctDigit)
case 'b' => integer(i + 2, 2, "binary", isBinDigit)
case '.' | 'E' | 'e' => decimal(i + 1, "0")
case _ => integer(i, 10, "decimal", isDigit)
}
case '0' => (zero, i + 1)
case _ => takeDigits(i, isDigit) match {
case (N, j) =>
raise(ErrorReport(msg"Expect a numeric literal" -> S(loc(i, i + 1)) :: Nil,
newDefs = true, source = Lexing))
(zero, i)
case (S(integral), j) =>
if (j < length && isDecimalStart(bytes(j))) decimal(j, integral)
else (IntLit(BigInt(integral)), j)
}
}
} else {
raise(ErrorReport(msg"Expect a numeric literal instead of end of input" -> S(loc(i, i + 1)) :: Nil,
newDefs = true, source = Lexing))
(zero, i)
}
}

@tailrec final
def str(i: Int, escapeMode: Bool, cur: Ls[Char] = Nil): (Str, Int) =
if (escapeMode)
if (i < length)
bytes(i) match {
case '\\' => str(i + 1, false, '\\' :: cur)
case '"' => str(i + 1, false, '"' :: cur)
case 'n' => str(i + 1, false, '\n' :: cur)
case 't' => str(i + 1, false, '\t' :: cur)
case 'r' => str(i + 1, false, '\r' :: cur)
case 'b' => str(i + 1, false, '\b' :: cur)
case 'f' => str(i + 1, false, '\f' :: cur)
case ch =>
raise(WarningReport(msg"Found invalid escape character" -> S(loc(i, i + 1)) :: Nil,
newDefs = true, source = Lexing))
Expand Down Expand Up @@ -190,9 +290,9 @@ class NewLexer(origin: Origin, raise: Diagnostic => Unit, dbg: Bool) {
// else go(j, if (isSymKeyword.contains(n)) KEYWORD(n) else IDENT(n, true))
else lex(j, ind, next(j, if (isSymKeyword.contains(n)) KEYWORD(n) else IDENT(n, true)))
case _ if isDigit(c) =>
val (str, j) = takeWhile(i)(isDigit)
val (lit, j) = num(i)
// go(j, LITVAL(IntLit(BigInt(str))))
lex(j, ind, next(j, LITVAL(IntLit(BigInt(str)))))
lex(j, ind, next(j, LITVAL(lit)))
case _ =>
pe(msg"unexpected character '${escapeChar(c)}'")
// go(i + 1, ERROR)
Expand Down
139 changes: 139 additions & 0 deletions shared/src/test/diff/nu/DecLit.mls
Original file line number Diff line number Diff line change
@@ -0,0 +1,139 @@
:NewDefs

// Real Numbers
// ============

[0.5, 1.0, 3.14159]
//β”‚ [0.5, 1.0, 3.14159]
//β”‚ res
//β”‚ = [ 0.5, 1, 3.14159 ]

[1e100, 1E100, 1e+100, 1E+100, 1E-11, 1e-10, 1E-2, 1e-2]
//β”‚ [1E+100, 1E+100, 1E+100, 1E+100, 1E-11, 1E-10, 0.01, 0.01]
//β”‚ res
//β”‚ = [
//β”‚ 1e+100, 1e+100,
//β”‚ 1e+100, 1e+100,
//β”‚ 1e-11, 1e-10,
//β”‚ 0.01, 0.01
//β”‚ ]

[3.14e-10, 3.14E-10, 3.14e+10, 3.14E+10]
//β”‚ [3.14E-10, 3.14E-10, 3.14E+10, 3.14E+10]
//β”‚ res
//β”‚ = [ 3.14e-10, 3.14e-10, 31400000000, 31400000000 ]

[0.5e-10, 0.5E-10, 0.5e+10, 0.5E+10]
//β”‚ [5E-11, 5E-11, 5E+9, 5E+9]
//β”‚ res
//β”‚ = [ 5e-11, 5e-11, 5000000000, 5000000000 ]

// Separators in integral, fractional, and exponent parts.
[12_34_56.0, 12_34_56.78_90]
[1_2.3_4e-1_0, 1_2.3_4e+1_0, 1_2.3_4e1_0]
[1_2.3_4E-1_0, 1_2.3_4E+1_0, 1_2.3_4E1_0]
//β”‚ [1.234E-9, 1.234E+11, 1.234E+11]
//β”‚ res
//β”‚ = [ 123456, 123456.789 ]
//β”‚ res
//β”‚ = [ 1.234e-9, 123400000000, 123400000000 ]
//β”‚ res
//β”‚ = [ 1.234e-9, 123400000000, 123400000000 ]

// Conflict with tuple index selection.
:pe
.1
//β”‚ ╔══[PARSE ERROR] Unexpected selector in expression position
//β”‚ β•‘ l.45: .1
//β”‚ ╙── ^^
//β”‚ ╔══[PARSE ERROR] Unexpected end of input; an expression was expected here
//β”‚ β•‘ l.45: .1
//β”‚ ╙── ^
//β”‚ ()
//β”‚ res
//β”‚ = undefined

// Corner cases.
:pe
0.E10
//β”‚ ╔══[LEXICAL ERROR] Expect at least one digit after the decimal point
//β”‚ β•‘ l.58: 0.E10
//β”‚ ╙── ^
//β”‚ 0E+10
//β”‚ res
//β”‚ = 0

:pe
0.0E
//β”‚ ╔══[LEXICAL ERROR] Expect at least one digit after the exponent sign
//β”‚ β•‘ l.67: 0.0E
//β”‚ ╙── ^
//β”‚ 0.0
//β”‚ res
//β”‚ = 0

:pe
0.0E+
//β”‚ ╔══[LEXICAL ERROR] Expect at least one digit after the exponent sign
//β”‚ β•‘ l.76: 0.0E+
//β”‚ ╙── ^
//β”‚ 0.0
//β”‚ res
//β”‚ = 0

:pe
0E
//β”‚ ╔══[LEXICAL ERROR] Expect at least one digit after the exponent sign
//β”‚ β•‘ l.85: 0E
//β”‚ ╙── ^
//β”‚ 0
//β”‚ res
//β”‚ = 0

:pe
0E+
//β”‚ ╔══[LEXICAL ERROR] Expect at least one digit after the exponent sign
//β”‚ β•‘ l.94: 0E+
//β”‚ ╙── ^
//β”‚ 0
//β”‚ res
//β”‚ = 0

:pe
1234E
//β”‚ ╔══[LEXICAL ERROR] Expect at least one digit after the exponent sign
//β”‚ β•‘ l.103: 1234E
//β”‚ ╙── ^
//β”‚ 1234
//β”‚ res
//β”‚ = 1234

:pe
4378.
//β”‚ ╔══[LEXICAL ERROR] Expect at least one digit after the decimal point
//β”‚ β•‘ l.112: 4378.
//β”‚ ╙── ^
//β”‚ 4378
//β”‚ res
//β”‚ = 4378

:pe
5.
//β”‚ ╔══[LEXICAL ERROR] Expect at least one digit after the decimal point
//β”‚ β•‘ l.121: 5.
//β”‚ ╙── ^
//β”‚ 5
//β”‚ res
//β”‚ = 5

:pe
789.E
//β”‚ ╔══[LEXICAL ERROR] Expect at least one digit after the decimal point
//β”‚ β•‘ l.130: 789.E
//β”‚ ╙── ^
//β”‚ ╔══[LEXICAL ERROR] Expect at least one digit after the exponent sign
//β”‚ β•‘ l.130: 789.E
//β”‚ ╙── ^
//β”‚ 789
//β”‚ res
//β”‚ = 789
Loading
Loading