From 7588e1e3606bc1fffa9f58cbc418505e8fed54ab Mon Sep 17 00:00:00 2001 From: Tigran Najaryan <4194920+tigrannajaryan@users.noreply.github.com> Date: Thu, 27 Feb 2025 20:08:36 -0500 Subject: [PATCH] Add enum to IDL and Schema (#54) It is now possible to declare enums in the IDL. We will use the enums in a future PR to generate enum code. --- go/pkg/idl/lexer.go | 66 +++++++++++++++- go/pkg/idl/parser.go | 125 ++++++++++++++++++++++++++++++- go/pkg/idl/parser_test.go | 22 +++++- go/pkg/idl/testdata/example.stef | 9 +++ go/pkg/schema/schema.go | 14 +++- 5 files changed, 226 insertions(+), 10 deletions(-) diff --git a/go/pkg/idl/lexer.go b/go/pkg/idl/lexer.go index d097b6e..faff4c8 100644 --- a/go/pkg/idl/lexer.go +++ b/go/pkg/idl/lexer.go @@ -5,6 +5,7 @@ import ( "errors" "fmt" "io" + "strconv" "unicode" ) @@ -23,8 +24,9 @@ type Lexer struct { curPos Pos prevPos Pos - identRunes []rune + tokenRunes []rune ident string + uintNumber uint64 } // Pos indicates a position in the input stream. @@ -46,6 +48,7 @@ const ( tStruct tOneof tMultimap + tEnum tOptional tRoot @@ -60,6 +63,9 @@ const ( tString tBytes + tIntNumber + + tAssign = '=' tLBracket = '[' tRBracket = ']' tLParen = '(' @@ -88,6 +94,7 @@ var keywords = map[string]Token{ "struct": tStruct, "oneof": tOneof, "multimap": tMultimap, + "enum": tEnum, "optional": tOptional, "root": tRoot, "dict": tDict, @@ -151,6 +158,8 @@ func (l *Lexer) Next() { } switch l.nextRune { + case tAssign: + l.token = tAssign case tLParen: l.token = tLParen case tRParen: @@ -168,6 +177,10 @@ func (l *Lexer) Next() { // This is a letter. It must a start of an identifier or keyword. l.readIdentOrKeyword() return + } else if isDigit(l.nextRune) { + // This is a digit. It must be a number. + l.readUint64Number() + return } l.token = tError l.errMsg = fmt.Sprintf("invalid character: %c", l.nextRune) @@ -234,12 +247,12 @@ func (l *Lexer) readNextRune() { } func (l *Lexer) readIdentOrKeyword() Token { - l.identRunes = l.identRunes[:0] + l.tokenRunes = l.tokenRunes[:0] // The first character is already read. Subsequent characters must be // letters, digits or underscore. for (unicode.IsLetter(l.nextRune) || unicode.IsDigit(l.nextRune) || l.nextRune == '_') && !l.isError { - l.identRunes = append(l.identRunes, l.nextRune) + l.tokenRunes = append(l.tokenRunes, l.nextRune) l.readNextRune() if l.isEOF { break @@ -250,7 +263,7 @@ func (l *Lexer) readIdentOrKeyword() Token { } } - l.ident = string(l.identRunes) + l.ident = string(l.tokenRunes) // Check if it is a keyword. if token, ok := keywords[l.ident]; ok { @@ -273,3 +286,48 @@ func (l *Lexer) Ident() string { func (l *Lexer) TokenStartPos() Pos { return l.prevPos } + +func (l *Lexer) Uint64Number() uint64 { + return l.uintNumber +} + +func isDigit(r rune) bool { + return r >= '0' && r <= '9' +} + +func isNumberContinuation(r rune) bool { + return isDigit(r) || r == '_' || r == 'b' || r == 'x' || r == 'o' || r == 'B' || r == 'X' || r == 'O' +} + +func (l *Lexer) readUint64Number() { + l.tokenRunes = l.tokenRunes[:0] + + // The first character is already read. + + for { + if l.isError { + l.token = tError + return + } + l.tokenRunes = append(l.tokenRunes, l.nextRune) + l.readNextRune() + if l.isEOF || !isNumberContinuation(l.nextRune) { + break + } + } + + // This correctly parses decimal, hexadecimal, octal and binary numbers. + val, err := strconv.ParseUint(string(l.tokenRunes), 0, 64) + if err != nil { + l.token = tError + l.errMsg = fmt.Sprintf("invalid number: %s", string(l.tokenRunes)) + return + } + + l.uintNumber = val + l.token = tIntNumber +} + +func (l *Lexer) ErrMsg() string { + return l.errMsg +} diff --git a/go/pkg/idl/parser.go b/go/pkg/idl/parser.go index 5f78bc3..06eac4b 100644 --- a/go/pkg/idl/parser.go +++ b/go/pkg/idl/parser.go @@ -48,6 +48,7 @@ func (p *Parser) Parse() error { p.schema = &schema.Schema{ Structs: map[string]*schema.Struct{}, Multimaps: map[string]*schema.Multimap{}, + Enums: map[string]*schema.Enum{}, } if err := p.parsePackage(); err != nil { @@ -63,6 +64,8 @@ func (p *Parser) Parse() error { err = p.parseOneof() case tMultimap: err = p.parseMultimap() + case tEnum: + err = p.parseEnum() default: return p.error("expected struct, oneof or multimap") } @@ -76,6 +79,10 @@ func (p *Parser) Parse() error { return p.resolveFieldTypes() } +func (p *Parser) isTopLevelNameUsed(name string) bool { + return p.schema.Structs[name] != nil || p.schema.Multimaps[name] != nil || p.schema.Enums[name] != nil +} + func (p *Parser) parseStruct() (*schema.Struct, error) { p.lexer.Next() // skip "struct" @@ -83,6 +90,11 @@ func (p *Parser) parseStruct() (*schema.Struct, error) { return nil, p.error("struct name expected") } structName := p.lexer.Ident() + + if p.isTopLevelNameUsed(structName) { + return nil, p.error("duplicate top-level identifier: " + structName) + } + p.lexer.Next() str := &schema.Struct{ @@ -126,6 +138,11 @@ func (p *Parser) parseMultimap() error { return p.error("multimap name expected") } multimapName := p.lexer.Ident() + + if p.isTopLevelNameUsed(multimapName) { + return p.error("duplicate top-level identifier: " + multimapName) + } + p.lexer.Next() mm := &schema.Multimap{ @@ -387,11 +404,36 @@ func (p *Parser) resolveFieldTypes() error { } func (p *Parser) resolveFieldType(fieldType *schema.FieldType) error { - if fieldType.Struct != "" { - _, ok := p.schema.Multimaps[fieldType.Struct] - if ok { - fieldType.MultiMap = fieldType.Struct + typeName := fieldType.Struct + if typeName != "" { + matches := 0 + _, isStruct := p.schema.Structs[typeName] + if isStruct { + matches++ + } + + _, isMultimap := p.schema.Multimaps[typeName] + if isMultimap { + fieldType.MultiMap = typeName + fieldType.Struct = "" + matches++ + } + + _, isEnum := p.schema.Enums[typeName] + if isEnum { + // All enums are uint64. + t := schema.PrimitiveTypeUint64 + fieldType.Primitive = &t + fieldType.Enum = typeName fieldType.Struct = "" + matches++ + } + + if matches == 0 { + return p.error("unknown type: " + typeName) + } + if matches > 1 { + return p.error("ambiguous type: " + typeName) } } return nil @@ -409,3 +451,78 @@ func (p *Parser) parsePackage() error { } return nil } + +func (p *Parser) parseEnum() error { + p.lexer.Next() // skip "enum" + + if p.lexer.Token() != tIdent { + return p.error("enum name expected") + } + enumName := p.lexer.Ident() + + if p.isTopLevelNameUsed(enumName) { + return p.error("duplicate top-level identifier: " + enumName) + } + + p.lexer.Next() + + enum := &schema.Enum{ + Name: enumName, + } + p.schema.Enums[enum.Name] = enum + + if err := p.eat(tLBrace); err != nil { + return err + } + + if err := p.parseEnumFields(enum); err != nil { + return err + } + + if err := p.eat(tRBrace); err != nil { + return err + } + + return nil +} + +func (p *Parser) parseEnumFields(enum *schema.Enum) error { + for { + err, ok := p.parseEnumField(enum) + if err != nil { + return err + } + if !ok { + break + } + } + return nil +} + +func (p *Parser) parseEnumField(enum *schema.Enum) (error, bool) { + if p.lexer.Token() != tIdent { + return nil, false + } + + enum.Fields = append(enum.Fields, schema.EnumField{Name: p.lexer.Ident()}) + field := &enum.Fields[len(enum.Fields)-1] + + p.lexer.Next() // skip field name + + if err := p.eat(tAssign); err != nil { + return err, false + } + + if p.lexer.Token() != tIntNumber { + errMsg := "enum field value expected" + if p.lexer.Token() == tError { + errMsg += ": " + p.lexer.ErrMsg() + } + return p.error(errMsg), false + } + + field.Value = p.lexer.Uint64Number() + p.lexer.Next() + + return nil, true +} diff --git a/go/pkg/idl/parser_test.go b/go/pkg/idl/parser_test.go index ecdbb92..b3b35f4 100644 --- a/go/pkg/idl/parser_test.go +++ b/go/pkg/idl/parser_test.go @@ -44,6 +44,22 @@ func TestParserErrors(t *testing.T) { input: "package abc\nstruct MyStruct {\nField []struct", err: "test.stef:3:10: type specifier expected after []", }, + { + input: "package abc\nstruct MyStruct {\nField UnknownType }", + err: "test.stef:3:20: unknown type: UnknownType", + }, + { + input: "package abc oneof A {} struct A {}", + err: "test.stef:1:31: duplicate top-level identifier: A", + }, + { + input: "package abc enum {}", + err: "test.stef:1:18: enum name expected", + }, + { + input: "package abc enum Enum { Value = }", + err: "test.stef:1:33: enum field value expected", + }, } for _, test := range tests { @@ -79,7 +95,11 @@ func TestParserOtelSTEF(t *testing.T) { jsonBytes, err := os.ReadFile("testdata/oteltef.wire.json") require.NoError(t, err) - var schem schema.Schema + schem := schema.Schema{ + Structs: map[string]*schema.Struct{}, + Multimaps: map[string]*schema.Multimap{}, + Enums: map[string]*schema.Enum{}, + } err = json.Unmarshal(jsonBytes, &schem) require.NoError(t, err) diff --git a/go/pkg/idl/testdata/example.stef b/go/pkg/idl/testdata/example.stef index 632e8fd..1f794b7 100644 --- a/go/pkg/idl/testdata/example.stef +++ b/go/pkg/idl/testdata/example.stef @@ -8,9 +8,18 @@ struct Book { Title string // The title of the book. PublishedOn Date // When was it published. Publisher string dict(Publisher) // Publishers name, encoded with a dict. + Category Category Authors []Person // Zero or more authors of the book. } +enum Category { + Fiction = 1 + NonFiction = 2 + HexMystery = 0x3 + OctalMystery = 0o4 + BinaryMystery = 0b101 +} + // BookEvent describes either a checkout or a checkin event. oneof BookEvent { Checkout CheckoutEvent diff --git a/go/pkg/schema/schema.go b/go/pkg/schema/schema.go index 1156182..43d101b 100644 --- a/go/pkg/schema/schema.go +++ b/go/pkg/schema/schema.go @@ -9,6 +9,7 @@ type Schema struct { PackageName string `json:"package,omitempty"` Structs map[string]*Struct `json:"structs"` Multimaps map[string]*Multimap `json:"multimaps"` + Enums map[string]*Enum } type Compatibility int @@ -301,7 +302,8 @@ type FieldType struct { Array *FieldType `json:"array,omitempty"` Struct string `json:"struct,omitempty"` MultiMap string `json:"multimap,omitempty"` - DictName string `json:"dict,omitempty"` + Enum string + DictName string `json:"dict,omitempty"` } type MultimapField struct { @@ -313,3 +315,13 @@ type Multimap struct { Key MultimapField `json:"key"` Value MultimapField `json:"value"` } + +type Enum struct { + Name string + Fields []EnumField +} + +type EnumField struct { + Name string + Value uint64 +}