From 3bd36b7ac9e3174500af6886480f45543b3a30eb Mon Sep 17 00:00:00 2001 From: Alexander Tumin Date: Sat, 6 Jan 2024 18:16:38 +0300 Subject: [PATCH] Make current token kind public and accessible via Lexer.CurrentToken Updated implementation of #308 --- jlexer/lexer.go | 113 ++++++++++++++++++++++++------------------- jlexer/lexer_test.go | 15 ++++++ 2 files changed, 78 insertions(+), 50 deletions(-) diff --git a/jlexer/lexer.go b/jlexer/lexer.go index b5f5e261..a27705b1 100644 --- a/jlexer/lexer.go +++ b/jlexer/lexer.go @@ -19,21 +19,21 @@ import ( "github.com/josharian/intern" ) -// tokenKind determines type of a token. -type tokenKind byte +// TokenKind determines type of a token. +type TokenKind byte const ( - tokenUndef tokenKind = iota // No token. - tokenDelim // Delimiter: one of '{', '}', '[' or ']'. - tokenString // A string literal, e.g. "abc\u1234" - tokenNumber // Number literal, e.g. 1.5e5 - tokenBool // Boolean literal: true or false. - tokenNull // null keyword. + TokenUndef TokenKind = iota // No token. + TokenDelim // Delimiter: one of '{', '}', '[' or ']'. + TokenString // A string literal, e.g. "abc\u1234" + TokenNumber // Number literal, e.g. 1.5e5 + TokenBool // Boolean literal: true or false. + TokenNull // null keyword. ) // token describes a single token: type, position in the input and value. type token struct { - kind tokenKind // Type of a token. + kind TokenKind // Type of a token. boolValue bool // Value if a boolean literal token. byteValueCloned bool // true if byteValue was allocated and does not refer to original json body @@ -47,7 +47,7 @@ type Lexer struct { start int // Start of the current token. pos int // Current unscanned position in the input stream. - token token // Last scanned token, if token.kind != tokenUndef. + token token // Last scanned token, if token.kind != TokenUndef. firstElement bool // Whether current element is the first in array or an object. wantSep byte // A comma or a colon character, which need to occur before a token. @@ -59,7 +59,7 @@ type Lexer struct { // FetchToken scans the input for the next token. func (r *Lexer) FetchToken() { - r.token.kind = tokenUndef + r.token.kind = TokenUndef r.start = r.pos // Check if r.Data has r.pos element @@ -90,7 +90,7 @@ func (r *Lexer) FetchToken() { r.errSyntax() } - r.token.kind = tokenString + r.token.kind = TokenString r.fetchString() return @@ -99,7 +99,7 @@ func (r *Lexer) FetchToken() { r.errSyntax() } r.firstElement = true - r.token.kind = tokenDelim + r.token.kind = TokenDelim r.token.delimValue = r.Data[r.pos] r.pos++ return @@ -109,7 +109,7 @@ func (r *Lexer) FetchToken() { r.errSyntax() } r.wantSep = 0 - r.token.kind = tokenDelim + r.token.kind = TokenDelim r.token.delimValue = r.Data[r.pos] r.pos++ return @@ -118,7 +118,7 @@ func (r *Lexer) FetchToken() { if r.wantSep != 0 { r.errSyntax() } - r.token.kind = tokenNumber + r.token.kind = TokenNumber r.fetchNumber() return @@ -127,7 +127,7 @@ func (r *Lexer) FetchToken() { r.errSyntax() } - r.token.kind = tokenNull + r.token.kind = TokenNull r.fetchNull() return @@ -136,7 +136,7 @@ func (r *Lexer) FetchToken() { r.errSyntax() } - r.token.kind = tokenBool + r.token.kind = TokenBool r.token.boolValue = true r.fetchTrue() return @@ -146,7 +146,7 @@ func (r *Lexer) FetchToken() { r.errSyntax() } - r.token.kind = tokenBool + r.token.kind = TokenBool r.token.boolValue = false r.fetchFalse() return @@ -391,7 +391,7 @@ func (r *Lexer) fetchString() { // scanToken scans the next token if no token is currently available in the lexer. func (r *Lexer) scanToken() { - if r.token.kind != tokenUndef || r.fatalError != nil { + if r.token.kind != TokenUndef || r.fatalError != nil { return } @@ -400,7 +400,7 @@ func (r *Lexer) scanToken() { // consume resets the current token to allow scanning the next one. func (r *Lexer) consume() { - r.token.kind = tokenUndef + r.token.kind = TokenUndef r.token.byteValueCloned = false r.token.delimValue = 0 } @@ -443,10 +443,10 @@ func (r *Lexer) errInvalidToken(expected string) { switch expected { case "[": r.token.delimValue = ']' - r.token.kind = tokenDelim + r.token.kind = TokenDelim case "{": r.token.delimValue = '}' - r.token.kind = tokenDelim + r.token.kind = TokenDelim } r.addNonfatalError(&LexerError{ Reason: fmt.Sprintf("expected %s", expected), @@ -475,7 +475,7 @@ func (r *Lexer) GetPos() int { // Delim consumes a token and verifies that it is the given delimiter. func (r *Lexer) Delim(c byte) { - if r.token.kind == tokenUndef && r.Ok() { + if r.token.kind == TokenUndef && r.Ok() { r.FetchToken() } @@ -489,7 +489,7 @@ func (r *Lexer) Delim(c byte) { // IsDelim returns true if there was no scanning error and next token is the given delimiter. func (r *Lexer) IsDelim(c byte) bool { - if r.token.kind == tokenUndef && r.Ok() { + if r.token.kind == TokenUndef && r.Ok() { r.FetchToken() } return !r.Ok() || r.token.delimValue == c @@ -497,10 +497,10 @@ func (r *Lexer) IsDelim(c byte) bool { // Null verifies that the next token is null and consumes it. func (r *Lexer) Null() { - if r.token.kind == tokenUndef && r.Ok() { + if r.token.kind == TokenUndef && r.Ok() { r.FetchToken() } - if !r.Ok() || r.token.kind != tokenNull { + if !r.Ok() || r.token.kind != TokenNull { r.errInvalidToken("null") } r.consume() @@ -508,15 +508,15 @@ func (r *Lexer) Null() { // IsNull returns true if the next token is a null keyword. func (r *Lexer) IsNull() bool { - if r.token.kind == tokenUndef && r.Ok() { + if r.token.kind == TokenUndef && r.Ok() { r.FetchToken() } - return r.Ok() && r.token.kind == tokenNull + return r.Ok() && r.token.kind == TokenNull } // Skip skips a single token. func (r *Lexer) Skip() { - if r.token.kind == tokenUndef && r.Ok() { + if r.token.kind == TokenUndef && r.Ok() { r.FetchToken() } r.consume() @@ -621,10 +621,10 @@ func (r *Lexer) Consumed() { } func (r *Lexer) unsafeString(skipUnescape bool) (string, []byte) { - if r.token.kind == tokenUndef && r.Ok() { + if r.token.kind == TokenUndef && r.Ok() { r.FetchToken() } - if !r.Ok() || r.token.kind != tokenString { + if !r.Ok() || r.token.kind != TokenString { r.errInvalidToken("string") return "", nil } @@ -664,10 +664,10 @@ func (r *Lexer) UnsafeFieldName(skipUnescape bool) string { // String reads a string literal. func (r *Lexer) String() string { - if r.token.kind == tokenUndef && r.Ok() { + if r.token.kind == TokenUndef && r.Ok() { r.FetchToken() } - if !r.Ok() || r.token.kind != tokenString { + if !r.Ok() || r.token.kind != TokenString { r.errInvalidToken("string") return "" } @@ -687,10 +687,10 @@ func (r *Lexer) String() string { // StringIntern reads a string literal, and performs string interning on it. func (r *Lexer) StringIntern() string { - if r.token.kind == tokenUndef && r.Ok() { + if r.token.kind == TokenUndef && r.Ok() { r.FetchToken() } - if !r.Ok() || r.token.kind != tokenString { + if !r.Ok() || r.token.kind != TokenString { r.errInvalidToken("string") return "" } @@ -705,10 +705,10 @@ func (r *Lexer) StringIntern() string { // Bytes reads a string literal and base64 decodes it into a byte slice. func (r *Lexer) Bytes() []byte { - if r.token.kind == tokenUndef && r.Ok() { + if r.token.kind == TokenUndef && r.Ok() { r.FetchToken() } - if !r.Ok() || r.token.kind != tokenString { + if !r.Ok() || r.token.kind != TokenString { r.errInvalidToken("string") return nil } @@ -731,10 +731,10 @@ func (r *Lexer) Bytes() []byte { // Bool reads a true or false boolean keyword. func (r *Lexer) Bool() bool { - if r.token.kind == tokenUndef && r.Ok() { + if r.token.kind == TokenUndef && r.Ok() { r.FetchToken() } - if !r.Ok() || r.token.kind != tokenBool { + if !r.Ok() || r.token.kind != TokenBool { r.errInvalidToken("bool") return false } @@ -744,10 +744,10 @@ func (r *Lexer) Bool() bool { } func (r *Lexer) number() string { - if r.token.kind == tokenUndef && r.Ok() { + if r.token.kind == TokenUndef && r.Ok() { r.FetchToken() } - if !r.Ok() || r.token.kind != tokenNumber { + if !r.Ok() || r.token.kind != TokenNumber { r.errInvalidToken("number") return "" } @@ -1151,7 +1151,7 @@ func (r *Lexer) GetNonFatalErrors() []*LexerError { // JsonNumber fetches and json.Number from 'encoding/json' package. // Both int, float or string, contains them are valid values func (r *Lexer) JsonNumber() json.Number { - if r.token.kind == tokenUndef && r.Ok() { + if r.token.kind == TokenUndef && r.Ok() { r.FetchToken() } if !r.Ok() { @@ -1160,11 +1160,11 @@ func (r *Lexer) JsonNumber() json.Number { } switch r.token.kind { - case tokenString: + case TokenString: return json.Number(r.String()) - case tokenNumber: + case TokenNumber: return json.Number(r.Raw()) - case tokenNull: + case TokenNull: r.Null() return json.Number("") default: @@ -1175,7 +1175,7 @@ func (r *Lexer) JsonNumber() json.Number { // Interface fetches an interface{} analogous to the 'encoding/json' package. func (r *Lexer) Interface() interface{} { - if r.token.kind == tokenUndef && r.Ok() { + if r.token.kind == TokenUndef && r.Ok() { r.FetchToken() } @@ -1183,13 +1183,13 @@ func (r *Lexer) Interface() interface{} { return nil } switch r.token.kind { - case tokenString: + case TokenString: return r.String() - case tokenNumber: + case TokenNumber: return r.Float64() - case tokenBool: + case TokenBool: return r.Bool() - case tokenNull: + case TokenNull: r.Null() return nil } @@ -1242,3 +1242,16 @@ func (r *Lexer) WantColon() { r.wantSep = ':' r.firstElement = false } + +// CurrentToken returns current token kind if there were no errors and TokenUndef otherwise +func (r *Lexer) CurrentToken() TokenKind { + if r.token.kind == TokenUndef && r.Ok() { + r.FetchToken() + } + + if !r.Ok() { + return TokenUndef + } + + return r.token.kind +} diff --git a/jlexer/lexer_test.go b/jlexer/lexer_test.go index 0cee611d..b2bbad76 100644 --- a/jlexer/lexer_test.go +++ b/jlexer/lexer_test.go @@ -373,3 +373,18 @@ func TestFetchStringUnterminatedString(t *testing.T) { } } } + +func TestCurrentToken(t *testing.T) { + data := []byte(`{"foo"`) + tokens := []TokenKind{TokenDelim, TokenString, TokenUndef} + l := Lexer{Data: data} + + for _, want := range tokens { + got := l.CurrentToken() + if got != want { + t.Errorf("CurrentToken() = %v; want %v (err %s)", got, want, l.Error()) + } + + l.Skip() + } +}