Skip to content

Commit

Permalink
Add enum to IDL and Schema (#54)
Browse files Browse the repository at this point in the history
It is now possible to declare enums in the IDL. We will use
the enums in a future PR to generate enum code.
  • Loading branch information
tigrannajaryan authored Feb 28, 2025
1 parent c5c038a commit 7588e1e
Show file tree
Hide file tree
Showing 5 changed files with 226 additions and 10 deletions.
66 changes: 62 additions & 4 deletions go/pkg/idl/lexer.go
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,7 @@ import (
"errors"
"fmt"
"io"
"strconv"
"unicode"
)

Expand All @@ -23,8 +24,9 @@ type Lexer struct {
curPos Pos
prevPos Pos

identRunes []rune
tokenRunes []rune
ident string
uintNumber uint64
}

// Pos indicates a position in the input stream.
Expand All @@ -46,6 +48,7 @@ const (
tStruct
tOneof
tMultimap
tEnum

tOptional
tRoot
Expand All @@ -60,6 +63,9 @@ const (
tString
tBytes

tIntNumber

tAssign = '='
tLBracket = '['
tRBracket = ']'
tLParen = '('
Expand Down Expand Up @@ -88,6 +94,7 @@ var keywords = map[string]Token{
"struct": tStruct,
"oneof": tOneof,
"multimap": tMultimap,
"enum": tEnum,
"optional": tOptional,
"root": tRoot,
"dict": tDict,
Expand Down Expand Up @@ -151,6 +158,8 @@ func (l *Lexer) Next() {
}

switch l.nextRune {
case tAssign:
l.token = tAssign
case tLParen:
l.token = tLParen
case tRParen:
Expand All @@ -168,6 +177,10 @@ func (l *Lexer) Next() {
// This is a letter. It must a start of an identifier or keyword.
l.readIdentOrKeyword()
return
} else if isDigit(l.nextRune) {
// This is a digit. It must be a number.
l.readUint64Number()
return
}
l.token = tError
l.errMsg = fmt.Sprintf("invalid character: %c", l.nextRune)
Expand Down Expand Up @@ -234,12 +247,12 @@ func (l *Lexer) readNextRune() {
}

func (l *Lexer) readIdentOrKeyword() Token {
l.identRunes = l.identRunes[:0]
l.tokenRunes = l.tokenRunes[:0]

// The first character is already read. Subsequent characters must be
// letters, digits or underscore.
for (unicode.IsLetter(l.nextRune) || unicode.IsDigit(l.nextRune) || l.nextRune == '_') && !l.isError {
l.identRunes = append(l.identRunes, l.nextRune)
l.tokenRunes = append(l.tokenRunes, l.nextRune)
l.readNextRune()
if l.isEOF {
break
Expand All @@ -250,7 +263,7 @@ func (l *Lexer) readIdentOrKeyword() Token {
}
}

l.ident = string(l.identRunes)
l.ident = string(l.tokenRunes)

// Check if it is a keyword.
if token, ok := keywords[l.ident]; ok {
Expand All @@ -273,3 +286,48 @@ func (l *Lexer) Ident() string {
func (l *Lexer) TokenStartPos() Pos {
return l.prevPos
}

func (l *Lexer) Uint64Number() uint64 {
return l.uintNumber
}

func isDigit(r rune) bool {
return r >= '0' && r <= '9'
}

func isNumberContinuation(r rune) bool {
return isDigit(r) || r == '_' || r == 'b' || r == 'x' || r == 'o' || r == 'B' || r == 'X' || r == 'O'
}

func (l *Lexer) readUint64Number() {
l.tokenRunes = l.tokenRunes[:0]

// The first character is already read.

for {
if l.isError {
l.token = tError
return
}
l.tokenRunes = append(l.tokenRunes, l.nextRune)
l.readNextRune()
if l.isEOF || !isNumberContinuation(l.nextRune) {
break
}
}

// This correctly parses decimal, hexadecimal, octal and binary numbers.
val, err := strconv.ParseUint(string(l.tokenRunes), 0, 64)
if err != nil {
l.token = tError
l.errMsg = fmt.Sprintf("invalid number: %s", string(l.tokenRunes))
return
}

l.uintNumber = val
l.token = tIntNumber
}

func (l *Lexer) ErrMsg() string {
return l.errMsg
}
125 changes: 121 additions & 4 deletions go/pkg/idl/parser.go
Original file line number Diff line number Diff line change
Expand Up @@ -48,6 +48,7 @@ func (p *Parser) Parse() error {
p.schema = &schema.Schema{
Structs: map[string]*schema.Struct{},
Multimaps: map[string]*schema.Multimap{},
Enums: map[string]*schema.Enum{},
}

if err := p.parsePackage(); err != nil {
Expand All @@ -63,6 +64,8 @@ func (p *Parser) Parse() error {
err = p.parseOneof()
case tMultimap:
err = p.parseMultimap()
case tEnum:
err = p.parseEnum()
default:
return p.error("expected struct, oneof or multimap")
}
Expand All @@ -76,13 +79,22 @@ func (p *Parser) Parse() error {
return p.resolveFieldTypes()
}

func (p *Parser) isTopLevelNameUsed(name string) bool {
return p.schema.Structs[name] != nil || p.schema.Multimaps[name] != nil || p.schema.Enums[name] != nil
}

func (p *Parser) parseStruct() (*schema.Struct, error) {
p.lexer.Next() // skip "struct"

if p.lexer.Token() != tIdent {
return nil, p.error("struct name expected")
}
structName := p.lexer.Ident()

if p.isTopLevelNameUsed(structName) {
return nil, p.error("duplicate top-level identifier: " + structName)
}

p.lexer.Next()

str := &schema.Struct{
Expand Down Expand Up @@ -126,6 +138,11 @@ func (p *Parser) parseMultimap() error {
return p.error("multimap name expected")
}
multimapName := p.lexer.Ident()

if p.isTopLevelNameUsed(multimapName) {
return p.error("duplicate top-level identifier: " + multimapName)
}

p.lexer.Next()

mm := &schema.Multimap{
Expand Down Expand Up @@ -387,11 +404,36 @@ func (p *Parser) resolveFieldTypes() error {
}

func (p *Parser) resolveFieldType(fieldType *schema.FieldType) error {
if fieldType.Struct != "" {
_, ok := p.schema.Multimaps[fieldType.Struct]
if ok {
fieldType.MultiMap = fieldType.Struct
typeName := fieldType.Struct
if typeName != "" {
matches := 0
_, isStruct := p.schema.Structs[typeName]
if isStruct {
matches++
}

_, isMultimap := p.schema.Multimaps[typeName]
if isMultimap {
fieldType.MultiMap = typeName
fieldType.Struct = ""
matches++
}

_, isEnum := p.schema.Enums[typeName]
if isEnum {
// All enums are uint64.
t := schema.PrimitiveTypeUint64
fieldType.Primitive = &t
fieldType.Enum = typeName
fieldType.Struct = ""
matches++
}

if matches == 0 {
return p.error("unknown type: " + typeName)
}
if matches > 1 {
return p.error("ambiguous type: " + typeName)
}
}
return nil
Expand All @@ -409,3 +451,78 @@ func (p *Parser) parsePackage() error {
}
return nil
}

func (p *Parser) parseEnum() error {
p.lexer.Next() // skip "enum"

if p.lexer.Token() != tIdent {
return p.error("enum name expected")
}
enumName := p.lexer.Ident()

if p.isTopLevelNameUsed(enumName) {
return p.error("duplicate top-level identifier: " + enumName)
}

p.lexer.Next()

enum := &schema.Enum{
Name: enumName,
}
p.schema.Enums[enum.Name] = enum

if err := p.eat(tLBrace); err != nil {
return err
}

if err := p.parseEnumFields(enum); err != nil {
return err
}

if err := p.eat(tRBrace); err != nil {
return err
}

return nil
}

func (p *Parser) parseEnumFields(enum *schema.Enum) error {
for {
err, ok := p.parseEnumField(enum)
if err != nil {
return err
}
if !ok {
break
}
}
return nil
}

func (p *Parser) parseEnumField(enum *schema.Enum) (error, bool) {
if p.lexer.Token() != tIdent {
return nil, false
}

enum.Fields = append(enum.Fields, schema.EnumField{Name: p.lexer.Ident()})
field := &enum.Fields[len(enum.Fields)-1]

p.lexer.Next() // skip field name

if err := p.eat(tAssign); err != nil {
return err, false
}

if p.lexer.Token() != tIntNumber {
errMsg := "enum field value expected"
if p.lexer.Token() == tError {
errMsg += ": " + p.lexer.ErrMsg()
}
return p.error(errMsg), false
}

field.Value = p.lexer.Uint64Number()
p.lexer.Next()

return nil, true
}
22 changes: 21 additions & 1 deletion go/pkg/idl/parser_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -44,6 +44,22 @@ func TestParserErrors(t *testing.T) {
input: "package abc\nstruct MyStruct {\nField []struct",
err: "test.stef:3:10: type specifier expected after []",
},
{
input: "package abc\nstruct MyStruct {\nField UnknownType }",
err: "test.stef:3:20: unknown type: UnknownType",
},
{
input: "package abc oneof A {} struct A {}",
err: "test.stef:1:31: duplicate top-level identifier: A",
},
{
input: "package abc enum {}",
err: "test.stef:1:18: enum name expected",
},
{
input: "package abc enum Enum { Value = }",
err: "test.stef:1:33: enum field value expected",
},
}

for _, test := range tests {
Expand Down Expand Up @@ -79,7 +95,11 @@ func TestParserOtelSTEF(t *testing.T) {
jsonBytes, err := os.ReadFile("testdata/oteltef.wire.json")
require.NoError(t, err)

var schem schema.Schema
schem := schema.Schema{
Structs: map[string]*schema.Struct{},
Multimaps: map[string]*schema.Multimap{},
Enums: map[string]*schema.Enum{},
}
err = json.Unmarshal(jsonBytes, &schem)
require.NoError(t, err)

Expand Down
9 changes: 9 additions & 0 deletions go/pkg/idl/testdata/example.stef
Original file line number Diff line number Diff line change
Expand Up @@ -8,9 +8,18 @@ struct Book {
Title string // The title of the book.
PublishedOn Date // When was it published.
Publisher string dict(Publisher) // Publishers name, encoded with a dict.
Category Category
Authors []Person // Zero or more authors of the book.
}

enum Category {
Fiction = 1
NonFiction = 2
HexMystery = 0x3
OctalMystery = 0o4
BinaryMystery = 0b101
}

// BookEvent describes either a checkout or a checkin event.
oneof BookEvent {
Checkout CheckoutEvent
Expand Down
14 changes: 13 additions & 1 deletion go/pkg/schema/schema.go
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,7 @@ type Schema struct {
PackageName string `json:"package,omitempty"`
Structs map[string]*Struct `json:"structs"`
Multimaps map[string]*Multimap `json:"multimaps"`
Enums map[string]*Enum
}

type Compatibility int
Expand Down Expand Up @@ -301,7 +302,8 @@ type FieldType struct {
Array *FieldType `json:"array,omitempty"`
Struct string `json:"struct,omitempty"`
MultiMap string `json:"multimap,omitempty"`
DictName string `json:"dict,omitempty"`
Enum string
DictName string `json:"dict,omitempty"`
}

type MultimapField struct {
Expand All @@ -313,3 +315,13 @@ type Multimap struct {
Key MultimapField `json:"key"`
Value MultimapField `json:"value"`
}

type Enum struct {
Name string
Fields []EnumField
}

type EnumField struct {
Name string
Value uint64
}

0 comments on commit 7588e1e

Please sign in to comment.