Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Implement a new lexer (and split Token and related code out of experimental/ast) #358

Open
wants to merge 6 commits into
base: main
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
155 changes: 22 additions & 133 deletions experimental/ast/context.go
Original file line number Diff line number Diff line change
Expand Up @@ -15,159 +15,48 @@
package ast

import (
"fmt"

"github.com/bufbuild/protocompile/experimental/internal"
"github.com/bufbuild/protocompile/experimental/report"
"github.com/bufbuild/protocompile/internal/arena"
"github.com/bufbuild/protocompile/experimental/token"
)

// Context is where all of the book-keeping for the AST of a particular file is kept.
//
// Virtually all operations inside of package ast involve a Context. However, most of
// the exported types carry their Context with them, so you don't need to worry about
// passing it around.
type Context struct {
file *report.IndexedFile

// Storage for tokens.
stream []tokenImpl
syntheticTokens []tokenSynthetic

// This contains materialized literals for some tokens. For example, given
// a token with text 1.5, this map will map that token's ID to the float
// value 1.5.
//
// Not all literal tokens will have an entry here; only those that have
// uncommon representations, such as hex literals, floats, and strings with
// escapes/implicit concatenation.
//
// This means the lexer can deal with the complex literal parsing logic on
// our behalf in general, but common cases are re-parsed on-demand.
// Specifically, the most common literals (decimal integers and simple
// quoted strings) do not generate entries in this map and thus do not
// contribute at-rest memory usage.
//
// All values in this map are string, uint64, or float64.
literals map[rawToken]any
type Context interface {
token.Context

// Storage for the various node types.
decls decls
types types
exprs exprs

options arena.Arena[rawCompactOptions]
Nodes() *Nodes
}

// Contextual is any AST type that carries a context (virtually all of them).
type Contextual interface {
// Context returns this types's [Context].
//
// Zero values of this type should return nil.
Context() *Context
}
type withContext = internal.With[Context]

// newContext creates a fresh context for a particular file.
func NewContext(file report.File) *Context {
c := &Context{file: report.NewIndexedFile(file), literals: map[rawToken]any{}}
c.NewDeclBody(Token{}) // This is the rawBody for the whole file.
return c
}

// Context implements [Contextual].
func (c *Context) Context() *Context {
return c
}

// Stream returns a cursor over the whole lexed token stream.
func (c *Context) Stream() *Cursor {
return &Cursor{
withContext: withContext{c},
start: 1,
end: rawToken(len(c.stream) + 1),
}
}

// Path returns the (alleged) file system path for this file.
//
// This path is not used for anything except for diagnostics.
func (c *Context) Path() string {
return c.file.File().Path
}

// Returns the full text of the file.
func (c *Context) Text() string {
return c.file.File().Text
}

// Root returns the root AST node for this context.
func (c *Context) Root() File {
// NewContext() sticks the root at the beginning of decls.body for us, so
// there is always a DeclBody at index 0, which corresponds to the whole
// file. We use a 1 here, not a 0, because arena.Arena's indices are
// off-by-one to accommodate the nil representation.
return File{wrapDeclBody(c, 1)}
}

// Tokens returns a flat slice over all of the natural tokens in this context,
// with no respect to nesting.
//
// You should probably use [Context.Stream] instead of this.
func (c *Context) Tokens() Slice[Token] {
return funcSlice[tokenImpl, Token]{
s: c.stream,
f: func(i int, _ *tokenImpl) Token { return rawToken(i + 1).With(c) },
func NewContext(file report.File) Context {
c := new(context)
c.stream = &token.Stream{
Context: c,
IndexedFile: report.NewIndexedFile(file),
}
}

// NOTE: Some methods of Context live in the context_*.go files. This is to
// reduce clutter in this file.

// panicIfNil panics if this context is nil.
//
// This is helpful for immediately panicking on function entry.
func (c *Context) panicIfNil() {
_ = c.file
}

// ours checks that a contextual value is owned by this context, and panics if not.
//
// Does not panic if that is nil or has a nil context. Panics if c is nil.
func (c *Context) panicIfNotOurs(that ...Contextual) {
c.panicIfNil()
for _, that := range that {
if that == nil {
continue
}

c2 := that.Context()
if c2 == nil || c2 == c {
continue
}
panic(fmt.Sprintf("protocompile/ast: attempt to mix different contexts: %p(%q) and %p(%q)", c, c.Path(), c2, c2.Path()))
c.nodes = &Nodes{
Context: c,
}
}

// withContext is an embedable type that provides common operations involving
// a context, causing it to implement Contextual.
type withContext struct {
ctx *Context
c.Nodes().NewDeclBody(token.Nil) // This is the rawBody for the whole file.
return c
}

// Context returns this type's associated [ast.Context].
//
// Returns `nil` if this is this type's zero value.
func (c withContext) Context() *Context {
return c.ctx
type context struct {
stream *token.Stream
nodes *Nodes
}

// Nil checks whether this is this type's zero value.
func (c withContext) Nil() bool {
return c.ctx == nil
func (c *context) Stream() *token.Stream {
return c.stream
}

// panicIfNil panics if this context is nil.
//
// This is helpful for immediately panicking on function entry.
func (c withContext) panicIfNil() {
c.Context().panicIfNil()
func (c *context) Nodes() *Nodes {
return c.nodes
}
Loading
Loading