Skip to content

Commit

Permalink
Merge pull request #393 from iamtakingiteasy/master
Browse files Browse the repository at this point in the history
Make current token kind public and accessible via Lexer.CurrentToken
  • Loading branch information
rvasily authored Jan 9, 2024
2 parents a209843 + 3bd36b7 commit 141f9c7
Show file tree
Hide file tree
Showing 2 changed files with 78 additions and 50 deletions.
113 changes: 63 additions & 50 deletions jlexer/lexer.go
Original file line number Diff line number Diff line change
Expand Up @@ -19,21 +19,21 @@ import (
"github.com/josharian/intern"
)

// tokenKind determines type of a token.
type tokenKind byte
// TokenKind determines type of a token.
type TokenKind byte

const (
tokenUndef tokenKind = iota // No token.
tokenDelim // Delimiter: one of '{', '}', '[' or ']'.
tokenString // A string literal, e.g. "abc\u1234"
tokenNumber // Number literal, e.g. 1.5e5
tokenBool // Boolean literal: true or false.
tokenNull // null keyword.
TokenUndef TokenKind = iota // No token.
TokenDelim // Delimiter: one of '{', '}', '[' or ']'.
TokenString // A string literal, e.g. "abc\u1234"
TokenNumber // Number literal, e.g. 1.5e5
TokenBool // Boolean literal: true or false.
TokenNull // null keyword.
)

// token describes a single token: type, position in the input and value.
type token struct {
kind tokenKind // Type of a token.
kind TokenKind // Type of a token.

boolValue bool // Value if a boolean literal token.
byteValueCloned bool // true if byteValue was allocated and does not refer to original json body
Expand All @@ -47,7 +47,7 @@ type Lexer struct {

start int // Start of the current token.
pos int // Current unscanned position in the input stream.
token token // Last scanned token, if token.kind != tokenUndef.
token token // Last scanned token, if token.kind != TokenUndef.

firstElement bool // Whether current element is the first in array or an object.
wantSep byte // A comma or a colon character, which need to occur before a token.
Expand All @@ -59,7 +59,7 @@ type Lexer struct {

// FetchToken scans the input for the next token.
func (r *Lexer) FetchToken() {
r.token.kind = tokenUndef
r.token.kind = TokenUndef
r.start = r.pos

// Check if r.Data has r.pos element
Expand Down Expand Up @@ -90,7 +90,7 @@ func (r *Lexer) FetchToken() {
r.errSyntax()
}

r.token.kind = tokenString
r.token.kind = TokenString
r.fetchString()
return

Expand All @@ -99,7 +99,7 @@ func (r *Lexer) FetchToken() {
r.errSyntax()
}
r.firstElement = true
r.token.kind = tokenDelim
r.token.kind = TokenDelim
r.token.delimValue = r.Data[r.pos]
r.pos++
return
Expand All @@ -109,7 +109,7 @@ func (r *Lexer) FetchToken() {
r.errSyntax()
}
r.wantSep = 0
r.token.kind = tokenDelim
r.token.kind = TokenDelim
r.token.delimValue = r.Data[r.pos]
r.pos++
return
Expand All @@ -118,7 +118,7 @@ func (r *Lexer) FetchToken() {
if r.wantSep != 0 {
r.errSyntax()
}
r.token.kind = tokenNumber
r.token.kind = TokenNumber
r.fetchNumber()
return

Expand All @@ -127,7 +127,7 @@ func (r *Lexer) FetchToken() {
r.errSyntax()
}

r.token.kind = tokenNull
r.token.kind = TokenNull
r.fetchNull()
return

Expand All @@ -136,7 +136,7 @@ func (r *Lexer) FetchToken() {
r.errSyntax()
}

r.token.kind = tokenBool
r.token.kind = TokenBool
r.token.boolValue = true
r.fetchTrue()
return
Expand All @@ -146,7 +146,7 @@ func (r *Lexer) FetchToken() {
r.errSyntax()
}

r.token.kind = tokenBool
r.token.kind = TokenBool
r.token.boolValue = false
r.fetchFalse()
return
Expand Down Expand Up @@ -391,7 +391,7 @@ func (r *Lexer) fetchString() {

// scanToken scans the next token if no token is currently available in the lexer.
func (r *Lexer) scanToken() {
if r.token.kind != tokenUndef || r.fatalError != nil {
if r.token.kind != TokenUndef || r.fatalError != nil {
return
}

Expand All @@ -400,7 +400,7 @@ func (r *Lexer) scanToken() {

// consume resets the current token to allow scanning the next one.
func (r *Lexer) consume() {
r.token.kind = tokenUndef
r.token.kind = TokenUndef
r.token.byteValueCloned = false
r.token.delimValue = 0
}
Expand Down Expand Up @@ -443,10 +443,10 @@ func (r *Lexer) errInvalidToken(expected string) {
switch expected {
case "[":
r.token.delimValue = ']'
r.token.kind = tokenDelim
r.token.kind = TokenDelim
case "{":
r.token.delimValue = '}'
r.token.kind = tokenDelim
r.token.kind = TokenDelim
}
r.addNonfatalError(&LexerError{
Reason: fmt.Sprintf("expected %s", expected),
Expand Down Expand Up @@ -475,7 +475,7 @@ func (r *Lexer) GetPos() int {

// Delim consumes a token and verifies that it is the given delimiter.
func (r *Lexer) Delim(c byte) {
if r.token.kind == tokenUndef && r.Ok() {
if r.token.kind == TokenUndef && r.Ok() {
r.FetchToken()
}

Expand All @@ -489,34 +489,34 @@ func (r *Lexer) Delim(c byte) {

// IsDelim returns true if there was no scanning error and next token is the given delimiter.
func (r *Lexer) IsDelim(c byte) bool {
if r.token.kind == tokenUndef && r.Ok() {
if r.token.kind == TokenUndef && r.Ok() {
r.FetchToken()
}
return !r.Ok() || r.token.delimValue == c
}

// Null verifies that the next token is null and consumes it.
func (r *Lexer) Null() {
if r.token.kind == tokenUndef && r.Ok() {
if r.token.kind == TokenUndef && r.Ok() {
r.FetchToken()
}
if !r.Ok() || r.token.kind != tokenNull {
if !r.Ok() || r.token.kind != TokenNull {
r.errInvalidToken("null")
}
r.consume()
}

// IsNull returns true if the next token is a null keyword.
func (r *Lexer) IsNull() bool {
if r.token.kind == tokenUndef && r.Ok() {
if r.token.kind == TokenUndef && r.Ok() {
r.FetchToken()
}
return r.Ok() && r.token.kind == tokenNull
return r.Ok() && r.token.kind == TokenNull
}

// Skip skips a single token.
func (r *Lexer) Skip() {
if r.token.kind == tokenUndef && r.Ok() {
if r.token.kind == TokenUndef && r.Ok() {
r.FetchToken()
}
r.consume()
Expand Down Expand Up @@ -621,10 +621,10 @@ func (r *Lexer) Consumed() {
}

func (r *Lexer) unsafeString(skipUnescape bool) (string, []byte) {
if r.token.kind == tokenUndef && r.Ok() {
if r.token.kind == TokenUndef && r.Ok() {
r.FetchToken()
}
if !r.Ok() || r.token.kind != tokenString {
if !r.Ok() || r.token.kind != TokenString {
r.errInvalidToken("string")
return "", nil
}
Expand Down Expand Up @@ -664,10 +664,10 @@ func (r *Lexer) UnsafeFieldName(skipUnescape bool) string {

// String reads a string literal.
func (r *Lexer) String() string {
if r.token.kind == tokenUndef && r.Ok() {
if r.token.kind == TokenUndef && r.Ok() {
r.FetchToken()
}
if !r.Ok() || r.token.kind != tokenString {
if !r.Ok() || r.token.kind != TokenString {
r.errInvalidToken("string")
return ""
}
Expand All @@ -687,10 +687,10 @@ func (r *Lexer) String() string {

// StringIntern reads a string literal, and performs string interning on it.
func (r *Lexer) StringIntern() string {
if r.token.kind == tokenUndef && r.Ok() {
if r.token.kind == TokenUndef && r.Ok() {
r.FetchToken()
}
if !r.Ok() || r.token.kind != tokenString {
if !r.Ok() || r.token.kind != TokenString {
r.errInvalidToken("string")
return ""
}
Expand All @@ -705,10 +705,10 @@ func (r *Lexer) StringIntern() string {

// Bytes reads a string literal and base64 decodes it into a byte slice.
func (r *Lexer) Bytes() []byte {
if r.token.kind == tokenUndef && r.Ok() {
if r.token.kind == TokenUndef && r.Ok() {
r.FetchToken()
}
if !r.Ok() || r.token.kind != tokenString {
if !r.Ok() || r.token.kind != TokenString {
r.errInvalidToken("string")
return nil
}
Expand All @@ -731,10 +731,10 @@ func (r *Lexer) Bytes() []byte {

// Bool reads a true or false boolean keyword.
func (r *Lexer) Bool() bool {
if r.token.kind == tokenUndef && r.Ok() {
if r.token.kind == TokenUndef && r.Ok() {
r.FetchToken()
}
if !r.Ok() || r.token.kind != tokenBool {
if !r.Ok() || r.token.kind != TokenBool {
r.errInvalidToken("bool")
return false
}
Expand All @@ -744,10 +744,10 @@ func (r *Lexer) Bool() bool {
}

func (r *Lexer) number() string {
if r.token.kind == tokenUndef && r.Ok() {
if r.token.kind == TokenUndef && r.Ok() {
r.FetchToken()
}
if !r.Ok() || r.token.kind != tokenNumber {
if !r.Ok() || r.token.kind != TokenNumber {
r.errInvalidToken("number")
return ""
}
Expand Down Expand Up @@ -1151,7 +1151,7 @@ func (r *Lexer) GetNonFatalErrors() []*LexerError {
// JsonNumber fetches and json.Number from 'encoding/json' package.
// Both int, float or string, contains them are valid values
func (r *Lexer) JsonNumber() json.Number {
if r.token.kind == tokenUndef && r.Ok() {
if r.token.kind == TokenUndef && r.Ok() {
r.FetchToken()
}
if !r.Ok() {
Expand All @@ -1160,11 +1160,11 @@ func (r *Lexer) JsonNumber() json.Number {
}

switch r.token.kind {
case tokenString:
case TokenString:
return json.Number(r.String())
case tokenNumber:
case TokenNumber:
return json.Number(r.Raw())
case tokenNull:
case TokenNull:
r.Null()
return json.Number("")
default:
Expand All @@ -1175,21 +1175,21 @@ func (r *Lexer) JsonNumber() json.Number {

// Interface fetches an interface{} analogous to the 'encoding/json' package.
func (r *Lexer) Interface() interface{} {
if r.token.kind == tokenUndef && r.Ok() {
if r.token.kind == TokenUndef && r.Ok() {
r.FetchToken()
}

if !r.Ok() {
return nil
}
switch r.token.kind {
case tokenString:
case TokenString:
return r.String()
case tokenNumber:
case TokenNumber:
return r.Float64()
case tokenBool:
case TokenBool:
return r.Bool()
case tokenNull:
case TokenNull:
r.Null()
return nil
}
Expand Down Expand Up @@ -1242,3 +1242,16 @@ func (r *Lexer) WantColon() {
r.wantSep = ':'
r.firstElement = false
}

// CurrentToken returns current token kind if there were no errors and TokenUndef otherwise
func (r *Lexer) CurrentToken() TokenKind {
if r.token.kind == TokenUndef && r.Ok() {
r.FetchToken()
}

if !r.Ok() {
return TokenUndef
}

return r.token.kind
}
15 changes: 15 additions & 0 deletions jlexer/lexer_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -373,3 +373,18 @@ func TestFetchStringUnterminatedString(t *testing.T) {
}
}
}

func TestCurrentToken(t *testing.T) {
data := []byte(`{"foo"`)
tokens := []TokenKind{TokenDelim, TokenString, TokenUndef}
l := Lexer{Data: data}

for _, want := range tokens {
got := l.CurrentToken()
if got != want {
t.Errorf("CurrentToken() = %v; want %v (err %s)", got, want, l.Error())
}

l.Skip()
}
}

0 comments on commit 141f9c7

Please sign in to comment.