Add enum to IDL and Schema (#54)

It is now possible to declare enums in the IDL. We will use the enums in a future PR to generate enum code.
splunk · Feb 28, 2025 · 7588e1e · 7588e1e
1 parent c5c038a
commit 7588e1e
Show file tree

Hide file tree

Showing 5 changed files with 226 additions and 10 deletions.
diff --git a/go/pkg/idl/lexer.go b/go/pkg/idl/lexer.go
@@ -5,6 +5,7 @@ import (
 	"errors"
 	"fmt"
 	"io"
+	"strconv"
 	"unicode"
 )
 
@@ -23,8 +24,9 @@ type Lexer struct {
 	curPos  Pos
 	prevPos Pos
 
-	identRunes []rune
+	tokenRunes []rune
 	ident      string
+	uintNumber uint64
 }
 
 // Pos indicates a position in the input stream.
@@ -46,6 +48,7 @@ const (
 	tStruct
 	tOneof
 	tMultimap
+	tEnum
 
 	tOptional
 	tRoot
@@ -60,6 +63,9 @@ const (
 	tString
 	tBytes
 
+	tIntNumber
+
+	tAssign   = '='
 	tLBracket = '['
 	tRBracket = ']'
 	tLParen   = '('
@@ -88,6 +94,7 @@ var keywords = map[string]Token{
 	"struct":   tStruct,
 	"oneof":    tOneof,
 	"multimap": tMultimap,
+	"enum":     tEnum,
 	"optional": tOptional,
 	"root":     tRoot,
 	"dict":     tDict,
@@ -151,6 +158,8 @@ func (l *Lexer) Next() {
 	}
 
 	switch l.nextRune {
+	case tAssign:
+		l.token = tAssign
 	case tLParen:
 		l.token = tLParen
 	case tRParen:
@@ -168,6 +177,10 @@ func (l *Lexer) Next() {
 			// This is a letter. It must a start of an identifier or keyword.
 			l.readIdentOrKeyword()
 			return
+		} else if isDigit(l.nextRune) {
+			// This is a digit. It must be a number.
+			l.readUint64Number()
+			return
 		}
 		l.token = tError
 		l.errMsg = fmt.Sprintf("invalid character: %c", l.nextRune)
@@ -234,12 +247,12 @@ func (l *Lexer) readNextRune() {
 }
 
 func (l *Lexer) readIdentOrKeyword() Token {
-	l.identRunes = l.identRunes[:0]
+	l.tokenRunes = l.tokenRunes[:0]
 
 	// The first character is already read. Subsequent characters must be
 	// letters, digits or underscore.
 	for (unicode.IsLetter(l.nextRune) || unicode.IsDigit(l.nextRune) || l.nextRune == '_') && !l.isError {
-		l.identRunes = append(l.identRunes, l.nextRune)
+		l.tokenRunes = append(l.tokenRunes, l.nextRune)
 		l.readNextRune()
 		if l.isEOF {
 			break
@@ -250,7 +263,7 @@ func (l *Lexer) readIdentOrKeyword() Token {
 		}
 	}
 
-	l.ident = string(l.identRunes)
+	l.ident = string(l.tokenRunes)
 
 	// Check if it is a keyword.
 	if token, ok := keywords[l.ident]; ok {
@@ -273,3 +286,48 @@ func (l *Lexer) Ident() string {
 func (l *Lexer) TokenStartPos() Pos {
 	return l.prevPos
 }
+
+func (l *Lexer) Uint64Number() uint64 {
+	return l.uintNumber
+}
+
+func isDigit(r rune) bool {
+	return r >= '0' && r <= '9'
+}
+
+func isNumberContinuation(r rune) bool {
+	return isDigit(r) || r == '_' || r == 'b' || r == 'x' || r == 'o' || r == 'B' || r == 'X' || r == 'O'
+}
+
+func (l *Lexer) readUint64Number() {
+	l.tokenRunes = l.tokenRunes[:0]
+
+	// The first character is already read.
+
+	for {
+		if l.isError {
+			l.token = tError
+			return
+		}
+		l.tokenRunes = append(l.tokenRunes, l.nextRune)
+		l.readNextRune()
+		if l.isEOF || !isNumberContinuation(l.nextRune) {
+			break
+		}
+	}
+
+	// This correctly parses decimal, hexadecimal, octal and binary numbers.
+	val, err := strconv.ParseUint(string(l.tokenRunes), 0, 64)
+	if err != nil {
+		l.token = tError
+		l.errMsg = fmt.Sprintf("invalid number: %s", string(l.tokenRunes))
+		return
+	}
+
+	l.uintNumber = val
+	l.token = tIntNumber
+}
+
+func (l *Lexer) ErrMsg() string {
+	return l.errMsg
+}
diff --git a/go/pkg/idl/parser.go b/go/pkg/idl/parser.go
@@ -48,6 +48,7 @@ func (p *Parser) Parse() error {
 	p.schema = &schema.Schema{
 		Structs:   map[string]*schema.Struct{},
 		Multimaps: map[string]*schema.Multimap{},
+		Enums:     map[string]*schema.Enum{},
 	}
 
 	if err := p.parsePackage(); err != nil {
@@ -63,6 +64,8 @@ func (p *Parser) Parse() error {
 			err = p.parseOneof()
 		case tMultimap:
 			err = p.parseMultimap()
+		case tEnum:
+			err = p.parseEnum()
 		default:
 			return p.error("expected struct, oneof or multimap")
 		}
@@ -76,13 +79,22 @@ func (p *Parser) Parse() error {
 	return p.resolveFieldTypes()
 }
 
+func (p *Parser) isTopLevelNameUsed(name string) bool {
+	return p.schema.Structs[name] != nil || p.schema.Multimaps[name] != nil || p.schema.Enums[name] != nil
+}
+
 func (p *Parser) parseStruct() (*schema.Struct, error) {
 	p.lexer.Next() // skip "struct"
 
 	if p.lexer.Token() != tIdent {
 		return nil, p.error("struct name expected")
 	}
 	structName := p.lexer.Ident()
+
+	if p.isTopLevelNameUsed(structName) {
+		return nil, p.error("duplicate top-level identifier: " + structName)
+	}
+
 	p.lexer.Next()
 
 	str := &schema.Struct{
@@ -126,6 +138,11 @@ func (p *Parser) parseMultimap() error {
 		return p.error("multimap name expected")
 	}
 	multimapName := p.lexer.Ident()
+
+	if p.isTopLevelNameUsed(multimapName) {
+		return p.error("duplicate top-level identifier: " + multimapName)
+	}
+
 	p.lexer.Next()
 
 	mm := &schema.Multimap{
@@ -387,11 +404,36 @@ func (p *Parser) resolveFieldTypes() error {
 }
 
 func (p *Parser) resolveFieldType(fieldType *schema.FieldType) error {
-	if fieldType.Struct != "" {
-		_, ok := p.schema.Multimaps[fieldType.Struct]
-		if ok {
-			fieldType.MultiMap = fieldType.Struct
+	typeName := fieldType.Struct
+	if typeName != "" {
+		matches := 0
+		_, isStruct := p.schema.Structs[typeName]
+		if isStruct {
+			matches++
+		}
+
+		_, isMultimap := p.schema.Multimaps[typeName]
+		if isMultimap {
+			fieldType.MultiMap = typeName
+			fieldType.Struct = ""
+			matches++
+		}
+
+		_, isEnum := p.schema.Enums[typeName]
+		if isEnum {
+			// All enums are uint64.
+			t := schema.PrimitiveTypeUint64
+			fieldType.Primitive = &t
+			fieldType.Enum = typeName
 			fieldType.Struct = ""
+			matches++
+		}
+
+		if matches == 0 {
+			return p.error("unknown type: " + typeName)
+		}
+		if matches > 1 {
+			return p.error("ambiguous type: " + typeName)
 		}
 	}
 	return nil
@@ -409,3 +451,78 @@ func (p *Parser) parsePackage() error {
 	}
 	return nil
 }
+
+func (p *Parser) parseEnum() error {
+	p.lexer.Next() // skip "enum"
+
+	if p.lexer.Token() != tIdent {
+		return p.error("enum name expected")
+	}
+	enumName := p.lexer.Ident()
+
+	if p.isTopLevelNameUsed(enumName) {
+		return p.error("duplicate top-level identifier: " + enumName)
+	}
+
+	p.lexer.Next()
+
+	enum := &schema.Enum{
+		Name: enumName,
+	}
+	p.schema.Enums[enum.Name] = enum
+
+	if err := p.eat(tLBrace); err != nil {
+		return err
+	}
+
+	if err := p.parseEnumFields(enum); err != nil {
+		return err
+	}
+
+	if err := p.eat(tRBrace); err != nil {
+		return err
+	}
+
+	return nil
+}
+
+func (p *Parser) parseEnumFields(enum *schema.Enum) error {
+	for {
+		err, ok := p.parseEnumField(enum)
+		if err != nil {
+			return err
+		}
+		if !ok {
+			break
+		}
+	}
+	return nil
+}
+
+func (p *Parser) parseEnumField(enum *schema.Enum) (error, bool) {
+	if p.lexer.Token() != tIdent {
+		return nil, false
+	}
+
+	enum.Fields = append(enum.Fields, schema.EnumField{Name: p.lexer.Ident()})
+	field := &enum.Fields[len(enum.Fields)-1]
+
+	p.lexer.Next() // skip field name
+
+	if err := p.eat(tAssign); err != nil {
+		return err, false
+	}
+
+	if p.lexer.Token() != tIntNumber {
+		errMsg := "enum field value expected"
+		if p.lexer.Token() == tError {
+			errMsg += ": " + p.lexer.ErrMsg()
+		}
+		return p.error(errMsg), false
+	}
+
+	field.Value = p.lexer.Uint64Number()
+	p.lexer.Next()
+
+	return nil, true
+}
diff --git a/go/pkg/idl/parser_test.go b/go/pkg/idl/parser_test.go
@@ -44,6 +44,22 @@ func TestParserErrors(t *testing.T) {
 			input: "package abc\nstruct MyStruct {\nField []struct",
 			err:   "test.stef:3:10: type specifier expected after []",
 		},
+		{
+			input: "package abc\nstruct MyStruct {\nField UnknownType }",
+			err:   "test.stef:3:20: unknown type: UnknownType",
+		},
+		{
+			input: "package abc oneof A {} struct A {}",
+			err:   "test.stef:1:31: duplicate top-level identifier: A",
+		},
+		{
+			input: "package abc enum {}",
+			err:   "test.stef:1:18: enum name expected",
+		},
+		{
+			input: "package abc enum Enum { Value = }",
+			err:   "test.stef:1:33: enum field value expected",
+		},
 	}
 
 	for _, test := range tests {
@@ -79,7 +95,11 @@ func TestParserOtelSTEF(t *testing.T) {
 	jsonBytes, err := os.ReadFile("testdata/oteltef.wire.json")
 	require.NoError(t, err)
 
-	var schem schema.Schema
+	schem := schema.Schema{
+		Structs:   map[string]*schema.Struct{},
+		Multimaps: map[string]*schema.Multimap{},
+		Enums:     map[string]*schema.Enum{},
+	}
 	err = json.Unmarshal(jsonBytes, &schem)
 	require.NoError(t, err)
 

diff --git a/go/pkg/idl/testdata/example.stef b/go/pkg/idl/testdata/example.stef
@@ -8,9 +8,18 @@ struct Book {
     Title string                     // The title of the book.
     PublishedOn Date                 // When was it published.
     Publisher string dict(Publisher) // Publishers name, encoded with a dict.
+    Category Category
     Authors []Person                 // Zero or more authors of the book.
 }
 
+enum Category {
+    Fiction = 1
+    NonFiction = 2
+    HexMystery = 0x3
+    OctalMystery = 0o4
+    BinaryMystery = 0b101
+}
+
 // BookEvent describes either a checkout or a checkin event.
 oneof BookEvent {
     Checkout CheckoutEvent

diff --git a/go/pkg/schema/schema.go b/go/pkg/schema/schema.go
@@ -9,6 +9,7 @@ type Schema struct {
 	PackageName string               `json:"package,omitempty"`
 	Structs     map[string]*Struct   `json:"structs"`
 	Multimaps   map[string]*Multimap `json:"multimaps"`
+	Enums       map[string]*Enum
 }
 
 type Compatibility int
@@ -301,7 +302,8 @@ type FieldType struct {
 	Array     *FieldType          `json:"array,omitempty"`
 	Struct    string              `json:"struct,omitempty"`
 	MultiMap  string              `json:"multimap,omitempty"`
-	DictName  string              `json:"dict,omitempty"`
+	Enum      string
+	DictName  string `json:"dict,omitempty"`
 }
 
 type MultimapField struct {
@@ -313,3 +315,13 @@ type Multimap struct {
 	Key   MultimapField `json:"key"`
 	Value MultimapField `json:"value"`
 }
+
+type Enum struct {
+	Name   string
+	Fields []EnumField
+}
+
+type EnumField struct {
+	Name  string
+	Value uint64
+}