diff --git a/go/otel/oteltef/spans.go b/go/otel/oteltef/spans.go index a419b2b..19524fa 100644 --- a/go/otel/oteltef/spans.go +++ b/go/otel/oteltef/spans.go @@ -422,7 +422,7 @@ func (d *SpansDecoder) Init(state *ReaderState, columns *pkg.ReadColumnSet) erro d.column = columns.Column() - d.lastVal.init(nil, 0) + d.lastVal.Init() d.lastValPtr = &d.lastVal var err error diff --git a/go/pkg/idl/lexer.go b/go/pkg/idl/lexer.go new file mode 100644 index 0000000..d097b6e --- /dev/null +++ b/go/pkg/idl/lexer.go @@ -0,0 +1,275 @@ +package idl + +import ( + "bufio" + "errors" + "fmt" + "io" + "unicode" +) + +// Lexer splits a UTF8-encoded input into tokens. +type Lexer struct { + input *bufio.Reader + token Token + + nextRune rune + prevWasCR bool + + isEOF bool + isError bool + errMsg string + + curPos Pos + prevPos Pos + + identRunes []rune + ident string +} + +// Pos indicates a position in the input stream. +type Pos struct { + ByteOfs uint + Line uint + Col uint +} + +type Token uint + +const ( + tError Token = iota + tEOF + + tPackage + tIdent + + tStruct + tOneof + tMultimap + + tOptional + tRoot + tDict + tKey + tValue + + tBool + tInt64 + tUint64 + tFloat64 + tString + tBytes + + tLBracket = '[' + tRBracket = ']' + tLParen = '(' + tRParen = ')' + tLBrace = '{' + tRBrace = '}' +) + +func (t Token) String() string { + str, ok := keywordsReverse[t] + if ok { + return str + } + switch t { + case tEOF: + return "EOF" + case tIdent: + return "identifier" + default: + return string(byte(t)) + } +} + +var keywords = map[string]Token{ + "package": tPackage, + "struct": tStruct, + "oneof": tOneof, + "multimap": tMultimap, + "optional": tOptional, + "root": tRoot, + "dict": tDict, + "key": tKey, + "value": tValue, + "bool": tBool, + "int64": tInt64, + "uint64": tUint64, + "float64": tFloat64, + "string": tString, + "bytes": tBytes, +} + +var keywordsReverse = func() map[Token]string { + m := make(map[Token]string) + for k, v := range keywords { + m[v] = k + } + return m +}() + +func NewLexer(input io.Reader) *Lexer { + l := &Lexer{ + input: bufio.NewReader(input), + curPos: Pos{ + ByteOfs: 0, + Line: 1, + Col: 1, + }, + } + // Fetch the first rune. + l.readNextRune() + + // Fetch the first token. + l.Next() + return l +} + +func (l *Lexer) Token() Token { + return l.token +} + +// Next reads the input for the next token. After that Token() will return +// the token that was read. +// +// If Lexer input is at EOF then the next Token() call will return tEOF. +// If reading failed (e.g. if the input is not valid UTF8) the next Token() call +// will return tError. +func (l *Lexer) Next() { + l.prevPos = l.curPos + + l.skipWhiteSpaceOrComment() + + if l.isEOF { + l.token = tEOF + return + } else if l.isError { + l.token = tError + l.isError = false + return + } + + switch l.nextRune { + case tLParen: + l.token = tLParen + case tRParen: + l.token = tRParen + case tLBracket: + l.token = tLBracket + case tRBracket: + l.token = tRBracket + case tRBrace: + l.token = tRBrace + case tLBrace: + l.token = tLBrace + default: + if unicode.IsLetter(l.nextRune) { + // This is a letter. It must a start of an identifier or keyword. + l.readIdentOrKeyword() + return + } + l.token = tError + l.errMsg = fmt.Sprintf("invalid character: %c", l.nextRune) + } + l.readNextRune() +} + +func (l *Lexer) skipWhiteSpaceOrComment() { + for !l.isEOF && !l.isError { + if unicode.IsSpace(l.nextRune) { + l.readNextRune() + } else if l.nextRune == '/' { + l.skipComment() + } else { + break + } + } +} + +func (l *Lexer) skipComment() { + l.readNextRune() + if l.isEOF || l.isError || l.nextRune != '/' { + l.token = tError + l.errMsg = "expected start of comment" + return + } + + for !l.isEOF && !l.isError && l.nextRune != '\r' && l.nextRune != '\n' { + l.readNextRune() + } +} + +func (l *Lexer) readNextRune() { + nextRune, size, err := l.input.ReadRune() + if err != nil { + if errors.Is(err, io.EOF) { + l.isEOF = true + } else { + l.isError = true + l.errMsg = fmt.Sprintf("invalid character") + } + return + } + l.nextRune = nextRune + l.curPos.ByteOfs += uint(size) + l.curPos.Col++ + + // Handle any of CR,LF,CRLF as a new line. + const cCR = '\r' + const cLF = '\n' + if l.nextRune == cCR { + l.curPos.Line++ + l.curPos.Col = 1 + l.prevWasCR = true + } else if l.nextRune == cLF { + if !l.prevWasCR { + l.curPos.Line++ + l.curPos.Col = 1 + } + l.prevWasCR = false + } else { + l.prevWasCR = false + } +} + +func (l *Lexer) readIdentOrKeyword() Token { + l.identRunes = l.identRunes[:0] + + // The first character is already read. Subsequent characters must be + // letters, digits or underscore. + for (unicode.IsLetter(l.nextRune) || unicode.IsDigit(l.nextRune) || l.nextRune == '_') && !l.isError { + l.identRunes = append(l.identRunes, l.nextRune) + l.readNextRune() + if l.isEOF { + break + } + if l.isError { + l.token = tError + return tError + } + } + + l.ident = string(l.identRunes) + + // Check if it is a keyword. + if token, ok := keywords[l.ident]; ok { + l.token = token + return token + } + + l.token = tIdent + return tIdent +} + +// Ident will return the identifier if the current token is tIdent. +// Use Token() first. +func (l *Lexer) Ident() string { + return l.ident +} + +// TokenStartPos will return the starting position of the last read +// token after Next() call. +func (l *Lexer) TokenStartPos() Pos { + return l.prevPos +} diff --git a/go/pkg/idl/lexer_test.go b/go/pkg/idl/lexer_test.go new file mode 100644 index 0000000..1362cba --- /dev/null +++ b/go/pkg/idl/lexer_test.go @@ -0,0 +1,52 @@ +package idl + +import ( + "bytes" + "os" + "testing" + + "github.com/stretchr/testify/assert" + "github.com/stretchr/testify/require" +) + +func TestLexer(t *testing.T) { + l := NewLexer(bytes.NewBufferString("struct abc {}")) + + tokens := []Token{tStruct, tIdent, tLBrace, tRBrace, tEOF} + i := 0 + for { + token := l.Token() + assert.Equal(t, tokens[i], token, i) + i++ + if token == tEOF { + break + } + l.Next() + } +} + +func FuzzLexer(f *testing.F) { + f.Add([]byte(nil)) + f.Add([]byte("")) + f.Add([]byte("struct abc {}")) + + testFiles := []string{"testdata/example.stef", "testdata/otel.stef"} + for _, file := range testFiles { + content, err := os.ReadFile(file) + require.NoError(f, err) + f.Add(content) + } + + f.Fuzz( + func(t *testing.T, content []byte) { + l := NewLexer(bytes.NewBuffer(content)) + for { + token := l.Token() + if token == tEOF || token == tError { + break + } + l.Next() + } + }, + ) +} diff --git a/go/pkg/idl/parser.go b/go/pkg/idl/parser.go new file mode 100644 index 0000000..5f78bc3 --- /dev/null +++ b/go/pkg/idl/parser.go @@ -0,0 +1,411 @@ +package idl + +import ( + "fmt" + + "github.com/splunk/stef/go/pkg/schema" +) + +// Parser parses a STEF IDL input into Schema. +// +// This is a recursive descent parser with separate lexer for tokenization. +type Parser struct { + lexer *Lexer + schema *schema.Schema + fileName string +} + +// Error represents a parsing error. +type Error struct { + Msg string + Filename string + Pos Pos +} + +func (e *Error) Error() string { + return fmt.Sprintf("%s:%d:%d: %s", e.Filename, e.Pos.Line, e.Pos.Col, e.Msg) +} + +var _ error = (*Error)(nil) + +// NewParser creates a new parser with specified lexer as the input. +// fileName is used for composing error messages (if any). +func NewParser(lexer *Lexer, fileName string) *Parser { + p := &Parser{fileName: fileName} + p.lexer = lexer + p.schema = &schema.Schema{} + return p +} + +// Schema returns the parsed Schema, assuming Parse() returned nil. +func (p *Parser) Schema() *schema.Schema { + return p.schema +} + +// Parse an IDL input into Schema. +// Will return an error if the input syntax is invalid. +func (p *Parser) Parse() error { + p.schema = &schema.Schema{ + Structs: map[string]*schema.Struct{}, + Multimaps: map[string]*schema.Multimap{}, + } + + if err := p.parsePackage(); err != nil { + return err + } + + for { + var err error + switch p.lexer.Token() { + case tStruct: + _, err = p.parseStruct() + case tOneof: + err = p.parseOneof() + case tMultimap: + err = p.parseMultimap() + default: + return p.error("expected struct, oneof or multimap") + } + if err != nil { + return err + } + if p.lexer.Token() == tEOF { + break + } + } + return p.resolveFieldTypes() +} + +func (p *Parser) parseStruct() (*schema.Struct, error) { + p.lexer.Next() // skip "struct" + + if p.lexer.Token() != tIdent { + return nil, p.error("struct name expected") + } + structName := p.lexer.Ident() + p.lexer.Next() + + str := &schema.Struct{ + Name: structName, + } + p.schema.Structs[str.Name] = str + + if err := p.parseStructModifiers(str); err != nil { + return nil, err + } + + if err := p.eat(tLBrace); err != nil { + return nil, err + } + + if err := p.parseStructFields(str); err != nil { + return nil, err + } + + if err := p.eat(tRBrace); err != nil { + return nil, err + } + + return str, nil +} + +func (p *Parser) parseOneof() error { + // "oneof" syntax is identical to struct, except we need to set "OneOf" flag. + str, err := p.parseStruct() + if err != nil { + return err + } + str.OneOf = true + return nil +} + +func (p *Parser) parseMultimap() error { + p.lexer.Next() // skip "multimap" + + if p.lexer.Token() != tIdent { + return p.error("multimap name expected") + } + multimapName := p.lexer.Ident() + p.lexer.Next() + + mm := &schema.Multimap{ + Name: multimapName, + } + p.schema.Multimaps[mm.Name] = mm + + if err := p.eat(tLBrace); err != nil { + return err + } + + // Parse the key. + if err := p.eat(tKey); err != nil { + return err + } + if err := p.parseMultimapField(&mm.Key); err != nil { + return err + } + + // Parse the value. + if err := p.eat(tValue); err != nil { + return err + } + if err := p.parseMultimapField(&mm.Value); err != nil { + return err + } + + if err := p.eat(tRBrace); err != nil { + return err + } + + return nil +} + +func (p *Parser) error(msg string) error { + return &Error{ + Msg: msg, + Filename: p.fileName, + Pos: p.lexer.TokenStartPos(), + } +} + +func (p *Parser) parseStructModifiers(str *schema.Struct) error { + for { + err, ok := p.parseStructModifier(str) + if err != nil { + return err + } + if !ok { + break + } + } + return nil +} + +func (p *Parser) parseStructModifier(str *schema.Struct) (error, bool) { + switch p.lexer.Token() { + case tDict: + dictName, err := p.parseDictModifier() + if err != nil { + return err, false + } + str.DictName = dictName + case tRoot: + str.IsRoot = true + p.lexer.Next() + default: + return nil, false + } + return nil, false +} + +func (p *Parser) parseDictModifier() (string, error) { + p.lexer.Next() // skip "dict" + + if err := p.eat(tLParen); err != nil { + return "", err + } + + if p.lexer.Token() != tIdent { + return "", p.error("dict name expected") + } + dictName := p.lexer.Ident() + p.lexer.Next() + + if err := p.eat(tRParen); err != nil { + return "", err + } + return dictName, nil +} + +// eat checks that the current token is the expected one and skips it. +func (p *Parser) eat(token Token) error { + if p.lexer.Token() != token { + return p.error(fmt.Sprintf("expected %s but got %s", token, p.lexer.Token())) + } + p.lexer.Next() + return nil +} + +func (p *Parser) parseStructFields(str *schema.Struct) error { + for { + err, ok := p.parseStructField(str) + if err != nil { + return err + } + if !ok { + break + } + } + return nil +} + +func (p *Parser) parseStructField(str *schema.Struct) (error, bool) { + if p.lexer.Token() != tIdent { + return nil, false + } + + str.Fields = append(str.Fields, schema.StructField{Name: p.lexer.Ident()}) + field := &str.Fields[len(str.Fields)-1] + + p.lexer.Next() + + if err := p.parseFieldType(&field.FieldType); err != nil { + return err, false + } + if err := p.parseStructFieldModifiers(field); err != nil { + return err, false + } + + return nil, true +} + +func (p *Parser) parseFieldType(field *schema.FieldType) error { + isArray := false + if p.lexer.Token() == tLBracket { + isArray = true + p.lexer.Next() + // We expect a matching right bracket. + if err := p.eat(tRBracket); err != nil { + return err + } + } + + ft := schema.FieldType{} + switch p.lexer.Token() { + case tIdent: + // Temporarily store in "Struct", but this may also be a oneof or multimap. + // We will resolve to the correct type it later, after all input is read, + // since it may be a forward reference. + ft.Struct = p.lexer.Ident() + + case tBool: + v := schema.PrimitiveTypeBool + ft.Primitive = &v + + case tInt64: + v := schema.PrimitiveTypeInt64 + ft.Primitive = &v + + case tUint64: + v := schema.PrimitiveTypeUint64 + ft.Primitive = &v + + case tFloat64: + v := schema.PrimitiveTypeFloat64 + ft.Primitive = &v + + case tString: + v := schema.PrimitiveTypeString + ft.Primitive = &v + + case tBytes: + v := schema.PrimitiveTypeBytes + ft.Primitive = &v + + default: + if isArray { + return p.error("type specifier expected after []") + } + return nil + } + p.lexer.Next() + + if isArray { + field.Array = &ft + } else { + *field = ft + } + + return nil +} + +func (p *Parser) parseStructFieldModifiers(field *schema.StructField) error { + for { + err, ok := p.parseStructFieldModifier(field) + if err != nil { + return err + } + if !ok { + break + } + } + return nil +} + +func (p *Parser) parseStructFieldModifier(field *schema.StructField) (error, bool) { + switch p.lexer.Token() { + case tDict: + dictName, err := p.parseDictModifier() + if err != nil { + return err, false + } + field.DictName = dictName + return nil, true + case tOptional: + field.Optional = true + p.lexer.Next() + return nil, true + default: + return nil, false + } +} + +func (p *Parser) parseMultimapField(field *schema.MultimapField) error { + if err := p.parseFieldType(&field.Type); err != nil { + return err + } + + if p.lexer.Token() == tDict { + dictName, err := p.parseDictModifier() + if err != nil { + return err + } + field.Type.DictName = dictName + } + + return nil +} + +func (p *Parser) resolveFieldTypes() error { + for _, v := range p.schema.Structs { + for i := range v.Fields { + field := &v.Fields[i] + if err := p.resolveFieldType(&field.FieldType); err != nil { + return err + } + } + } + for _, v := range p.schema.Multimaps { + if err := p.resolveFieldType(&v.Key.Type); err != nil { + return err + } + if err := p.resolveFieldType(&v.Value.Type); err != nil { + return err + } + } + return nil +} + +func (p *Parser) resolveFieldType(fieldType *schema.FieldType) error { + if fieldType.Struct != "" { + _, ok := p.schema.Multimaps[fieldType.Struct] + if ok { + fieldType.MultiMap = fieldType.Struct + fieldType.Struct = "" + } + } + return nil +} + +func (p *Parser) parsePackage() error { + if p.lexer.Token() == tPackage { + p.lexer.Next() // skip "package" + + if p.lexer.Token() != tIdent { + return p.error("package name expected") + } + p.schema.PackageName = p.lexer.Ident() + p.lexer.Next() + } + return nil +} diff --git a/go/pkg/idl/parser_test.go b/go/pkg/idl/parser_test.go new file mode 100644 index 0000000..ecdbb92 --- /dev/null +++ b/go/pkg/idl/parser_test.go @@ -0,0 +1,103 @@ +package idl + +import ( + "bytes" + "encoding/json" + "os" + "testing" + + "github.com/stretchr/testify/require" + + "github.com/splunk/stef/go/pkg/schema" +) + +func TestParserErrors(t *testing.T) { + tests := []struct { + input string + err string + }{ + { + input: "package ", + err: "test.stef:1:9: package name expected", + }, + { + input: "package abc\nhello", + err: "test.stef:2:1: expected struct, oneof or multimap", + }, + { + input: "package abc\nstruct string", + err: "test.stef:2:8: struct name expected", + }, + { + input: "package abc\nmultimap [", + err: "test.stef:2:10: multimap name expected", + }, + { + input: "package abc\nstruct MyStruct dict()", + err: "test.stef:2:23: dict name expected", + }, + { + input: "package abc\nstruct MyStruct dict[]", + err: "test.stef:2:22: expected ( but got [", + }, + { + input: "package abc\nstruct MyStruct {\nField []struct", + err: "test.stef:3:10: type specifier expected after []", + }, + } + + for _, test := range tests { + lexer := NewLexer(bytes.NewBufferString(test.input)) + parser := NewParser(lexer, "test.stef") + err := parser.Parse() + require.Error(t, err) + require.Equal(t, test.err, err.Error()) + } +} + +func TestParseExample(t *testing.T) { + inputFile := "testdata/example.stef" + idlBytes, err := os.ReadFile(inputFile) + require.NoError(t, err) + + lexer := NewLexer(bytes.NewBuffer(idlBytes)) + parser := NewParser(lexer, inputFile) + err = parser.Parse() + require.NoError(t, err) +} + +func TestParserOtelSTEF(t *testing.T) { + inputFile := "testdata/otel.stef" + idlBytes, err := os.ReadFile(inputFile) + require.NoError(t, err) + + lexer := NewLexer(bytes.NewBuffer(idlBytes)) + parser := NewParser(lexer, inputFile) + err = parser.Parse() + require.NoError(t, err) + + jsonBytes, err := os.ReadFile("testdata/oteltef.wire.json") + require.NoError(t, err) + + var schem schema.Schema + err = json.Unmarshal(jsonBytes, &schem) + require.NoError(t, err) + + require.EqualValues(t, &schem, parser.Schema()) +} + +func FuzzParser(f *testing.F) { + testFiles := []string{"testdata/example.stef", "testdata/otel.stef"} + for _, file := range testFiles { + content, err := os.ReadFile(file) + require.NoError(f, err) + f.Add(content) + } + + f.Fuzz( + func(t *testing.T, content []byte) { + p := NewParser(NewLexer(bytes.NewBuffer(content)), "temp.stef") + _ = p.Parse() + }, + ) +} diff --git a/go/pkg/idl/testdata/example.stef b/go/pkg/idl/testdata/example.stef new file mode 100644 index 0000000..632e8fd --- /dev/null +++ b/go/pkg/idl/testdata/example.stef @@ -0,0 +1,38 @@ +// Records of events that happened with books. This is the main record struct. +struct BookRecords root { + Book Book // Which book the event is about. + Event BookEvent // The event that happened. +} + +struct Book { + Title string // The title of the book. + PublishedOn Date // When was it published. + Publisher string dict(Publisher) // Publishers name, encoded with a dict. + Authors []Person // Zero or more authors of the book. +} + +// BookEvent describes either a checkout or a checkin event. +oneof BookEvent { + Checkout CheckoutEvent + Checkin CheckinEvent +} + +struct CheckoutEvent { + Date Date // when was it checked out + Person Person // who checked out the book. +} + +struct CheckinEvent { + Date Date + DamageValue float64 optional // Amount of damage assessed for the book. +} + +struct Person { + Name string +} + +struct Date { + Year uint64 + Month uint64 + Day uint64 +} diff --git a/go/pkg/idl/testdata/otel.stef b/go/pkg/idl/testdata/otel.stef new file mode 100755 index 0000000..5f15f92 --- /dev/null +++ b/go/pkg/idl/testdata/otel.stef @@ -0,0 +1,151 @@ +// This is Otel/STEF schema: a representation of OpenTelemetry data model +// as STEF records. Data model is virtually a direct mapping from OpenTelemetry +// Protobuf IDL for metrics and traces, see: https://github.com/open-telemetry/opentelemetry-proto/tree/main/opentelemetry/proto +// TODO: add Logs and Profiles. + +package oteltef + +multimap Attributes { + key string dict(AttributeKey) + value AnyValue +} + +multimap EnvelopeAttributes { + key string + value bytes +} + +multimap KeyValueList { + key string + value AnyValue +} + +struct Resource dict(Resource) { + // All SchemaURL fields use the same (shared) dict. + SchemaURL string dict(SchemaURL) + Attributes Attributes + DroppedAttributesCount uint64 +} + +// Point represents a metric data point. +struct Point { + StartTimestamp uint64 + Timestamp uint64 + Value PointValue + Exemplars []Exemplar +} + +struct Span { + TraceID bytes + SpanID bytes + TraceState string + ParentSpanID bytes + Flags uint64 + Name string dict(SpanName) + Kind uint64 + StartTimeUnixNano uint64 + EndTimeUnixNano uint64 + Attributes Attributes + DroppedAttributesCount uint64 + Events []Event + Links []Link + Status SpanStatus +} + +oneof PointValue { + Int64 int64 + Float64 float64 + Histogram HistogramValue + // TODO: Add Summary and Exponential Histogram value support. +} + +struct Metric dict(Metric) { + Name string dict(MetricName) + Description string dict(MetricDescription) + Unit string dict(MetricUnit) + Type uint64 + Metadata Attributes + HistogramBounds []float64 + AggregationTemporality uint64 + Monotonic bool +} + +struct Metrics root { + Envelope Envelope + Metric Metric + Resource Resource + Scope Scope + Attributes Attributes + Point Point +} + +struct Scope dict(Scope) { + Name string dict(ScopeName) + Version string dict(ScopeVersion) + SchemaURL string dict(SchemaURL) + Attributes Attributes + DroppedAttributesCount uint64 +} + +struct Link { + TraceID bytes + SpanID bytes + TraceState string + Flags uint64 + Attributes Attributes + DroppedAttributesCount uint64 +} + +struct HistogramValue { + Count int64 + Sum float64 optional + Min float64 optional + Max float64 optional + BucketCounts []int64 +} + +oneof AnyValue { + String string dict(AnyValueString) + Bool bool + Int64 int64 + Float64 float64 + Array []AnyValue + KVList KeyValueList + Bytes bytes +} + +struct Event { + Name string dict(SpanEventName) + TimeUnixNano uint64 + Attributes Attributes + DroppedAttributesCount uint64 +} + +struct SpanStatus { + Message string + Code uint64 +} + +struct Spans root { + Envelope Envelope + Resource Resource + Scope Scope + Span Span +} + +struct Envelope { + Attributes EnvelopeAttributes +} + +struct Exemplar { + Timestamp uint64 + Value ExemplarValue + SpanID bytes dict(Span) + TraceID bytes dict(Trace) + FilteredAttributes Attributes +} + +oneof ExemplarValue { + Int64 int64 + Float64 float64 +} diff --git a/go/pkg/idl/testdata/oteltef.wire.json b/go/pkg/idl/testdata/oteltef.wire.json new file mode 100755 index 0000000..ea938a5 --- /dev/null +++ b/go/pkg/idl/testdata/oteltef.wire.json @@ -0,0 +1,487 @@ +{ + "package": "oteltef", + "structs": { + "AnyValue": { + "name": "AnyValue", + "oneof": true, + "fields": [ + { + "primitive": 4, + "dict": "AnyValueString", + "name": "String" + }, + { + "primitive": 3, + "name": "Bool" + }, + { + "primitive": 0, + "name": "Int64" + }, + { + "primitive": 2, + "name": "Float64" + }, + { + "array": { + "struct": "AnyValue" + }, + "name": "Array", + "recursive": true + }, + { + "multimap": "KeyValueList", + "name": "KVList", + "recursive": true + }, + { + "primitive": 5, + "name": "Bytes" + } + ] + }, + "Envelope": { + "name": "Envelope", + "fields": [ + { + "multimap": "EnvelopeAttributes", + "name": "Attributes" + } + ] + }, + "Exemplar": { + "name": "Exemplar", + "fields": [ + { + "primitive": 1, + "name": "Timestamp" + }, + { + "struct": "ExemplarValue", + "name": "Value" + }, + { + "primitive": 5, + "dict": "Span", + "name": "SpanID" + }, + { + "primitive": 5, + "dict": "Trace", + "name": "TraceID" + }, + { + "multimap": "Attributes", + "name": "FilteredAttributes" + } + ] + }, + "ExemplarValue": { + "name": "ExemplarValue", + "oneof": true, + "fields": [ + { + "primitive": 0, + "name": "Int64" + }, + { + "primitive": 2, + "name": "Float64" + } + ] + }, + "HistogramValue": { + "name": "HistogramValue", + "fields": [ + { + "primitive": 0, + "name": "Count" + }, + { + "primitive": 2, + "name": "Sum", + "optional": true + }, + { + "primitive": 2, + "name": "Min", + "optional": true + }, + { + "primitive": 2, + "name": "Max", + "optional": true + }, + { + "array": { + "primitive": 0 + }, + "name": "BucketCounts" + } + ] + }, + "Metric": { + "name": "Metric", + "dict": "Metric", + "fields": [ + { + "primitive": 4, + "dict": "MetricName", + "name": "Name" + }, + { + "primitive": 4, + "dict": "MetricDescription", + "name": "Description" + }, + { + "primitive": 4, + "dict": "MetricUnit", + "name": "Unit" + }, + { + "primitive": 1, + "name": "Type" + }, + { + "multimap": "Attributes", + "name": "Metadata" + }, + { + "array": { + "primitive": 2 + }, + "name": "HistogramBounds" + }, + { + "primitive": 1, + "name": "AggregationTemporality" + }, + { + "primitive": 3, + "name": "Monotonic" + } + ] + }, + "Point": { + "name": "Point", + "fields": [ + { + "primitive": 1, + "name": "StartTimestamp" + }, + { + "primitive": 1, + "name": "Timestamp" + }, + { + "struct": "PointValue", + "name": "Value" + }, + { + "array": { + "struct": "Exemplar" + }, + "name": "Exemplars" + } + ] + }, + "PointValue": { + "name": "PointValue", + "oneof": true, + "fields": [ + { + "primitive": 0, + "name": "Int64" + }, + { + "primitive": 2, + "name": "Float64" + }, + { + "struct": "HistogramValue", + "name": "Histogram" + } + ] + }, + "Metrics": { + "name": "Metrics", + "root": true, + "fields": [ + { + "struct": "Envelope", + "name": "Envelope" + }, + { + "struct": "Metric", + "name": "Metric" + }, + { + "struct": "Resource", + "name": "Resource" + }, + { + "struct": "Scope", + "name": "Scope" + }, + { + "multimap": "Attributes", + "name": "Attributes" + }, + { + "struct": "Point", + "name": "Point" + } + ] + }, + "Resource": { + "name": "Resource", + "dict": "Resource", + "fields": [ + { + "primitive": 4, + "dict": "SchemaURL", + "name": "SchemaURL" + }, + { + "multimap": "Attributes", + "name": "Attributes" + }, + { + "name": "DroppedAttributesCount", + "primitive": 1 + } + ] + }, + "Scope": { + "name": "Scope", + "dict": "Scope", + "fields": [ + { + "primitive": 4, + "dict": "ScopeName", + "name": "Name" + }, + { + "primitive": 4, + "dict": "ScopeVersion", + "name": "Version" + }, + { + "primitive": 4, + "dict": "SchemaURL", + "name": "SchemaURL" + }, + { + "multimap": "Attributes", + "name": "Attributes" + }, + { + "name": "DroppedAttributesCount", + "primitive": 1 + } + ] + }, + + "Span": { + "name": "Span", + "fields": [ + { + "primitive": 5, + "name": "TraceID" + }, + { + "primitive": 5, + "name": "SpanID" + }, + { + "primitive": 4, + "name": "TraceState" + }, + { + "primitive": 5, + "name": "ParentSpanID" + }, + { + "primitive": 1, + "name": "Flags" + }, + { + "primitive": 4, + "dict": "SpanName", + "name": "Name" + }, + { + "primitive": 1, + "name": "Kind" + }, + { + "primitive": 1, + "name": "StartTimeUnixNano" + }, + { + "primitive": 1, + "name": "EndTimeUnixNano" + }, + { + "multimap": "Attributes", + "name": "Attributes" + }, + { + "name": "DroppedAttributesCount", + "primitive": 1 + }, + { + "array": { + "struct": "Event" + }, + "name": "Events" + }, + { + "array": { + "struct": "Link" + }, + "name": "Links" + }, + { + "struct": "SpanStatus", + "name": "Status" + } + ] + }, + + "Link": { + "name": "Link", + "fields": [ + { + "primitive": 5, + "name": "TraceID" + }, + { + "primitive": 5, + "name": "SpanID" + }, + { + "primitive": 4, + "name": "TraceState" + }, + { + "primitive": 1, + "name": "Flags" + }, + { + "multimap": "Attributes", + "name": "Attributes" + }, + { + "name": "DroppedAttributesCount", + "primitive": 1 + } + ] + }, + + "Event": { + "name": "Event", + "fields": [ + { + "primitive": 4, + "dict": "SpanEventName", + "name": "Name" + }, + { + "primitive": 1, + "name": "TimeUnixNano" + }, + { + "multimap": "Attributes", + "name": "Attributes" + }, + { + "name": "DroppedAttributesCount", + "primitive": 1 + } + ] + }, + + "SpanStatus": { + "name": "SpanStatus", + "fields": [ + { + "primitive": 4, + "name": "Message" + }, + { + "primitive": 1, + "name": "Code" + } + ] + }, + + "Spans": { + "name": "Spans", + "root": true, + "fields": [ + { + "struct": "Envelope", + "name": "Envelope" + }, + { + "struct": "Resource", + "name": "Resource" + }, + { + "struct": "Scope", + "name": "Scope" + }, + { + "struct": "Span", + "name": "Span" + } + ] + } + }, + "multimaps": { + "Attributes": { + "name": "Attributes", + "key": { + "type": { + "primitive": 4, + "dict": "AttributeKey" + } + }, + "value": { + "type": { + "struct": "AnyValue" + } + } + }, + "EnvelopeAttributes": { + "name": "EnvelopeAttributes", + "key": { + "type": { + "primitive": 4 + } + }, + "value": { + "type": { + "primitive": 5 + } + } + }, + "KeyValueList": { + "name": "KeyValueList", + "key": { + "type": { + "primitive": 4 + } + }, + "value": { + "type": { + "struct": "AnyValue" + }, + "recursive": true + } + } + }, + "main": "Metrics" +} \ No newline at end of file diff --git a/go/pkg/schema/schema.go b/go/pkg/schema/schema.go index 2149730..1156182 100644 --- a/go/pkg/schema/schema.go +++ b/go/pkg/schema/schema.go @@ -9,7 +9,6 @@ type Schema struct { PackageName string `json:"package,omitempty"` Structs map[string]*Struct `json:"structs"` Multimaps map[string]*Multimap `json:"multimaps"` - MainStruct string `json:"main"` } type Compatibility int @@ -23,14 +22,6 @@ const ( // Compatible checks backward compatibility of this schema with oldSchema. // If the schemas are incompatible returns CompatibilityIncompatible and an error. func (d *Schema) Compatible(oldSchema *Schema) (Compatibility, error) { - if d.MainStruct != oldSchema.MainStruct { - return CompatibilityIncompatible, - fmt.Errorf( - "mismatched main structure names (old=%s, new=%s)", - oldSchema.MainStruct, d.MainStruct, - ) - } - // Exact compatibility is only possible if the number of structs is exactly the same. exact := len(d.Structs) == len(oldSchema.Structs) @@ -184,9 +175,8 @@ func isCompatibleFieldType( // are excluded. func (d *Schema) PrunedForRoot(rootStructName string) (*Schema, error) { out := Schema{ - Structs: map[string]*Struct{}, - Multimaps: map[string]*Multimap{}, - MainStruct: rootStructName, + Structs: map[string]*Struct{}, + Multimaps: map[string]*Multimap{}, } if err := d.copyPrunedStruct(rootStructName, &out); err != nil { return nil, err diff --git a/go/pkg/schema/schema_test.go b/go/pkg/schema/schema_test.go index 9d1bf13..bb9a0b2 100644 --- a/go/pkg/schema/schema_test.go +++ b/go/pkg/schema/schema_test.go @@ -97,7 +97,6 @@ func TestSchemaSelfCompatible(t *testing.T) { Structs: map[string]*Struct{ "Root": {Name: "Root"}, }, - MainStruct: "Root", }, { PackageName: "pkg", @@ -119,7 +118,6 @@ func TestSchemaSelfCompatible(t *testing.T) { Value: MultimapField{Type: FieldType{Primitive: &p}}, }, }, - MainStruct: "Root", }, } @@ -155,8 +153,7 @@ func TestSchemaSuperset(t *testing.T) { }, }, }, - Multimaps: nil, - MainStruct: "Root", + Multimaps: nil, }, new: &Schema{ PackageName: "def", @@ -179,8 +176,7 @@ func TestSchemaSuperset(t *testing.T) { }, }, }, - Multimaps: nil, - MainStruct: "Root", + Multimaps: nil, }, }, { @@ -253,7 +249,6 @@ func TestSchemaSuperset(t *testing.T) { Value: MultimapField{Type: FieldType{Primitive: &primitiveTypeString}}, }, }, - MainStruct: "Root", }, new: &Schema{ PackageName: "def", @@ -347,7 +342,6 @@ func TestSchemaSuperset(t *testing.T) { Value: MultimapField{Type: FieldType{Primitive: &primitiveTypeString}}, }, }, - MainStruct: "Root2", }, }, } @@ -392,8 +386,7 @@ func TestSchemaIncompatible(t *testing.T) { }, }, }, - Multimaps: nil, - MainStruct: "Root", + Multimaps: nil, }, new: &Schema{ PackageName: "def", @@ -410,7 +403,6 @@ func TestSchemaIncompatible(t *testing.T) { }, }, }, - MainStruct: "Root", }, err: "struct Root has fewer fields in new schema (1 vs 2)", }, @@ -428,7 +420,7 @@ func TestSchemaIncompatible(t *testing.T) { } func expandSchema(t *testing.T, r *rand.Rand, orig *Schema) (cpy *Schema) { - cpy, err := orig.PrunedForRoot(orig.MainStruct) + cpy, err := orig.PrunedForRoot("Metrics") require.NoError(t, err) for { for _, str := range cpy.Structs { @@ -509,7 +501,7 @@ func expandStruct(t *testing.T, r *rand.Rand, schema *Schema, str *Struct) bool } func shrinkSchema(t *testing.T, r *rand.Rand, orig *Schema) (cpy *Schema) { - cpy, err := orig.PrunedForRoot(orig.MainStruct) + cpy, err := orig.PrunedForRoot("Metrics") require.NoError(t, err) for { for _, str := range cpy.Structs { @@ -548,7 +540,7 @@ func TestSchemaExpand(t *testing.T) { orig := &Schema{} err = json.Unmarshal(schemaJson, &orig) require.NoError(t, err) - orig, err = orig.PrunedForRoot(orig.MainStruct) + orig, err = orig.PrunedForRoot("Metrics") require.NoError(t, err) r := rand.New(rand.NewSource(42)) diff --git a/go/pkg/schema/stef.abnf b/go/pkg/schema/stef.abnf deleted file mode 100644 index ee16bdb..0000000 --- a/go/pkg/schema/stef.abnf +++ /dev/null @@ -1,65 +0,0 @@ -; Validated by https://author-tools.ietf.org/abnf - -schema = struct oneof multimap - -; struct rules - -struct = "struct" struct-name *struct-modifier "{" struct-body "}" - -struct-name = identifier - -identifier = ALPHA *(ALPHA / DIGIT) - -struct-modifier = dict-modifier / "main" - -dict-modifier = "(" identifier ")" - -struct-body = 1*struct-field - -struct-field = identifier field-type [struct-field-modifier] - -struct-field-modifier = dict-modifier / optional-modifier - -optional-modifier = "optional" - -; oneof rules - -oneof = "oneof" oneof-name "{" oneof-body "}" - -oneof-name = identifier - -oneof-body = 1*oneof-field - -oneof-field = identifier field-type [oneof-field-modifier] - -oneof-field-modifier = dict-modifier - -; multimap rules - -multimap = "multimap" multimap-name "{" multimap-body "}" - -multimap-name = identifier - -multimap-body = multimap-key multimap-value - -multimap-key = "Key" field-type [multimap-field-modifier] - -multimap-value = "Value" field-type [multimap-field-modifier] - -multimap-field-modifier = dict-modifier - -; type definition - -field-type = [array-specifier] builtin-type / userdefined-type - -array-specifier = "[]" - -builtin-type = "bool" / "int64" / "float64" / "string" / "bytes" - -userdefined-type = identifier - -; core rules - -ALPHA = %x41-5A / %x61-7A ; A-Z / a-z - -DIGIT = %x30-39 ; 0-9 diff --git a/go/pkg/schema/testdata/oteltef.wire.json b/go/pkg/schema/testdata/oteltef.wire.json index d9cd4ad..ea938a5 100755 --- a/go/pkg/schema/testdata/oteltef.wire.json +++ b/go/pkg/schema/testdata/oteltef.wire.json @@ -246,6 +246,10 @@ { "multimap": "Attributes", "name": "Attributes" + }, + { + "name": "DroppedAttributesCount", + "primitive": 1 } ] }, @@ -271,6 +275,10 @@ { "multimap": "Attributes", "name": "Attributes" + }, + { + "name": "DroppedAttributesCount", + "primitive": 1 } ] }, @@ -319,6 +327,10 @@ "multimap": "Attributes", "name": "Attributes" }, + { + "name": "DroppedAttributesCount", + "primitive": 1 + }, { "array": { "struct": "Event" @@ -360,6 +372,10 @@ { "multimap": "Attributes", "name": "Attributes" + }, + { + "name": "DroppedAttributesCount", + "primitive": 1 } ] }, @@ -379,6 +395,10 @@ { "multimap": "Attributes", "name": "Attributes" + }, + { + "name": "DroppedAttributesCount", + "primitive": 1 } ] }, diff --git a/stef-spec/specification.md b/stef-spec/specification.md index a82f84b..3bc8ec5 100644 --- a/stef-spec/specification.md +++ b/stef-spec/specification.md @@ -169,8 +169,8 @@ oneof PointValue { } multimap Attributes { - Key string - Value string + key string + value string } ``` @@ -271,8 +271,8 @@ above `Measurement` example by the following tree: root Measurement |- MetricName string |- Attributes Attributes - | |- Key string - | |- Value string + | |- key string + | |- value string |- Timestamp uint64 |- Value PointValue |- Int64 int64 @@ -313,8 +313,8 @@ oneof PointValue { } multimap Attributes { - Key string - Value AnyValue + key string + value AnyValue } oneof AnyValue { @@ -324,8 +324,8 @@ oneof AnyValue { } multimap KVList { - Key string - Value AnyValue + key string + value AnyValue } ``` @@ -336,13 +336,13 @@ and KVList mutually refer to each other. The corresponding schema tree looks lik root Measurement |- MetricName string |- Attributes Attributes - | |- Key string - | |- Value AnyValue + | |- key string + | |- value AnyValue | |- String string | |- Array []AnyValue <--- loop detected here, backtrack. Non-primitive leaf. | |- KVList KVList - | |- Key string - | |- Value AnyValue <--- loop detected here, backtrack. Non-primitive leaf. + | |- key string + | |- value AnyValue <--- loop detected here, backtrack. Non-primitive leaf. |- Timestamp uint64 |- Value PointValue |- Int64 int64 @@ -365,7 +365,7 @@ Secondly, because the schema allows recursive types a record may contain more th value associated with the same node in the schema tree. Consider the following `AnyValue`: ``` -AnyValue = { KVList = { Key = "abc", Value = { AnyValue = { String = "xyx" } } } } +AnyValue = { KVList = { key = "abc", value = { AnyValue = { String = "xyx" } } } } ``` Represented as a tree this AnyValue can be laid out as: @@ -373,7 +373,7 @@ Represented as a tree this AnyValue can be laid out as: ``` AnyValue |- KVList - |- Key = "abc" + |- key = "abc" |- Value |- AnyValue |- String = "xyz" @@ -523,13 +523,13 @@ Tree Column Codec Type root Measurement 1 struct |- MetricName string 2 string |- Attributes Attributes 3 multimap - | |- Key string 4 string - | |- Value AnyValue 5 oneof + | |- key string 4 string + | |- value AnyValue 5 oneof | |- String string 6 string | |- Array []AnyValue 7 array | |- KVList KVList 8 multimap - | |- Key string 9 string - | |- Value AnyValue 10 oneof + | |- key string 9 string + | |- value AnyValue 10 oneof |- Timestamp uint64 11 uint64 |- Value PointValue 12 oneof |- Int64 int64 13 int64 diff --git a/stefgen/generator/compileschema.go b/stefgen/generator/compileschema.go index 70c8d1a..7df09ed 100644 --- a/stefgen/generator/compileschema.go +++ b/stefgen/generator/compileschema.go @@ -11,7 +11,6 @@ func compileSchema(src *schema.Schema) (*genSchema, error) { PackageName: src.PackageName, Structs: map[string]*genStructDef{}, Multimaps: map[string]*genMapDef{}, - MainStruct: src.MainStruct, } for name, struc := range src.Structs { @@ -26,8 +25,12 @@ func compileSchema(src *schema.Schema) (*genSchema, error) { return nil, err } - stack := recurseStack{asMap: map[string]bool{}} - computeRecursiveStruct(dst.Structs[dst.MainStruct], &stack) + for _, struc := range dst.Structs { + if struc.IsRoot { + stack := recurseStack{asMap: map[string]bool{}} + computeRecursiveStruct(struc, &stack) + } + } return dst, nil } diff --git a/stefgen/generator/generator.go b/stefgen/generator/generator.go index 58c87a9..590c9ea 100644 --- a/stefgen/generator/generator.go +++ b/stefgen/generator/generator.go @@ -108,7 +108,6 @@ func (g *Generator) oStartFile(fileName string) error { func (g *Generator) oTemplate(templateName, outputFileName string, data map[string]any) error { data["PackageName"] = g.compiledSchema.PackageName - data["MainStructName"] = g.compiledSchema.MainStruct if err := g.oStartFile(outputFileName); err != nil { return err diff --git a/stefgen/generator/genschema.go b/stefgen/generator/genschema.go index 183a2bd..2d07a97 100644 --- a/stefgen/generator/genschema.go +++ b/stefgen/generator/genschema.go @@ -12,7 +12,6 @@ type genSchema struct { PackageName string Structs map[string]*genStructDef Multimaps map[string]*genMapDef - MainStruct string } func (s *genSchema) SchemaStr() string { @@ -30,8 +29,8 @@ func (s *genSchema) SchemaStr() string { str += "struct" } str += " " + struc.Name - if struc.Name == s.MainStruct { - str += " main" + if struc.IsRoot { + str += " root" } if struc.Dict != "" { str += " dict(" + struc.Dict + ")" diff --git a/stefgen/generator/structs.go b/stefgen/generator/structs.go index cacc02b..b852336 100644 --- a/stefgen/generator/structs.go +++ b/stefgen/generator/structs.go @@ -74,7 +74,7 @@ func (g *Generator) oStruct(str *genStructDef) error { "Fields": fields, "DictName": str.Dict, "Type": str, - "IsMainStruct": str.Name == g.compiledSchema.MainStruct, + "IsMainStruct": str.IsRoot, "OptionalFieldCount": optionalFieldIndex, }