Skip to content

Commit

Permalink
WIP on DSL method idx as a parameter\n part of #1004 (#1046)
Browse files Browse the repository at this point in the history
  • Loading branch information
bd82 authored Sep 15, 2019
1 parent 1dc658e commit 3785ea9
Show file tree
Hide file tree
Showing 10 changed files with 476 additions and 58 deletions.
153 changes: 127 additions & 26 deletions packages/chevrotain/api.d.ts
Original file line number Diff line number Diff line change
Expand Up @@ -86,6 +86,69 @@ declare abstract class BaseParser {
*/
/* protected */ ACTION<T>(impl: () => T): T

/**
* Like `CONSUME` with the numerical suffix as a parameter, e.g:
* consume(0, X) === CONSUME(X)
* consume(1, X) === CONSUME1(X)
* consume(2, X) === CONSUME2(X)
* ...
* @see CONSUME
*/
/* protected */ consume(
idx: number,
tokType: TokenType,
options?: ConsumeMethodOpts
): IToken

/**
* Like `OPTION` with the numerical suffix as a parameter, e.g:
* option(0, X) === OPTION(X)
* option(1, X) === OPTION1(X)
* option(2, X) === OPTION2(X)
* ...
* @see SUBRULE
*/
/* protected */ option<OUT>(
idx: number,
actionORMethodDef: GrammarAction<OUT> | DSLMethodOpts<OUT>
): OUT

/**
* Like `OR` with the numerical suffix as a parameter, e.g:
* or(0, X) === OR(X)
* or(1, X) === OR1(X)
* or(2, X) === OR2(X)
* ...
* @see OR
*/
/* protected */ or(idx: number, altsOrOpts: IOrAlt[] | OrMethodOpts): any

/**
* Like `MANY` with the numerical suffix as a parameter, e.g:
* many(0, X) === MANY(X)
* many(1, X) === MANY1(X)
* many(2, X) === MANY2(X)
* ...
* @see MANY
*/
/* protected */ many(
idx: number,
actionORMethodDef: GrammarAction<any> | DSLMethodOpts<any>
): void

/**
* Like `AT_LEAST_ONE` with the numerical suffix as a parameter, e.g:
* atLeastOne(0, X) === AT_LEAST_ONE(X)
* atLeastOne(1, X) === AT_LEAST_ONE1(X)
* atLeastOne(2, X) === AT_LEAST_ONE2(X)
* ...
* @see AT_LEAST_ONE
*/
/* protected */ atLeastOne(
idx: number,
actionORMethodDef: GrammarAction<any> | DSLMethodOptsWithErr<any>
): void

/**
*
* A Parsing DSL method use to consume a single Token.
Expand Down Expand Up @@ -879,26 +942,6 @@ export declare class Parser extends BaseParser {
config?: IRuleConfig<T>
): (idxInCallingRule?: number, ...args: any[]) => T | any

/**
* The Parsing DSL Method is used by one rule to call another.
* It is equivalent to a non-Terminal in EBNF notation.
*
* This may seem redundant as it does not actually do much.
* However using it is **mandatory** for all sub rule invocations.
*
* Calling another rule without wrapping in SUBRULE(...)
* will cause errors/mistakes in the Parser's self analysis phase,
* which will lead to errors in error recovery/automatic lookahead calculation
* and any other functionality relying on the Parser's self analysis
* output.
*
* As in CONSUME the index in the method name indicates the occurrence
* of the sub rule invocation in its rule.
*
* @param ruleToCall - The rule to invoke.
* @param options - optional properties to modify the behavior of SUBRULE.
* @returns The result of invoking ruleToCall.
*/
/* protected */ SUBRULE<T>(
ruleToCall: (idx: number) => T,
options?: SubruleMethodOpts
Expand Down Expand Up @@ -1001,7 +1044,7 @@ export declare class CstParser extends BaseParser {
/* protected */ static performSelfAnalysis(parserInstance: Parser): void

/**
* @see Parser.RULE
* Creates a Grammar Rule
*/
/* protected */ RULE(
name: string,
Expand All @@ -1010,7 +1053,8 @@ export declare class CstParser extends BaseParser {
): (idxInCallingRule?: number, ...args: any[]) => CstNode

/**
* @see Parser.RULE
* Overrides a Grammar Rule
* See usage example in: https://github.com/SAP/chevrotain/blob/master/examples/parser/versioning/versioning.js
*/
/* protected */ OVERRIDE_RULE<T>(
name: string,
Expand All @@ -1019,7 +1063,35 @@ export declare class CstParser extends BaseParser {
): (idxInCallingRule?: number, ...args: any[]) => CstNode

/**
* @see Parser.SUBRULE
* Like `SUBRULE` with the numerical suffix as a parameter, e.g:
* subrule(0, X) === SUBRULE(X)
* subrule(1, X) === SUBRULE1(X)
* subrule(2, X) === SUBRULE2(X)
* ...
* @see SUBRULE
*/
/* protected */ subrule(
idx: number,
ruleToCall: (idx: number) => CstNode,
options?: SubruleMethodOpts
): CstNode

/**
* The Parsing DSL Method is used by one rule to call another.
* It is equivalent to a non-Terminal in EBNF notation.
*
* This may seem redundant as it does not actually do much.
* However using it is **mandatory** for all sub rule invocations.
*
* Calling another rule without wrapping in SUBRULE(...)
* will cause errors/mistakes in the Parser's self analysis phase,
* which will lead to errors in error recovery/automatic lookahead calculation
* and any other functionality relying on the Parser's self analysis
* output.
*
* As in CONSUME the index in the method name indicates the occurrence
* of the sub rule invocation in its rule.
*
*/
/* protected */ SUBRULE(
ruleToCall: (idx: number) => CstNode,
Expand Down Expand Up @@ -1123,7 +1195,7 @@ export declare class EmbeddedActionsParser extends BaseParser {
// TODO: remove `outputCST` from the config options in the constructor

/**
* @see Parser.RULE
* Creates a Grammar Rule
*/
/* protected */ RULE<T>(
name: string,
Expand All @@ -1132,7 +1204,8 @@ export declare class EmbeddedActionsParser extends BaseParser {
): (idxInCallingRule?: number, ...args: any[]) => T

/**
* @see Parser.OVERRIDE_RULE
* Overrides a Grammar Rule
* See usage example in: https://github.com/SAP/chevrotain/blob/master/examples/parser/versioning/versioning.js
*/
/* protected */ OVERRIDE_RULE<T>(
name: string,
Expand All @@ -1141,7 +1214,35 @@ export declare class EmbeddedActionsParser extends BaseParser {
): (idxInCallingRule?: number, ...args: any[]) => T

/**
* @see BaseParser.SUBRULE
* Like `SUBRULE` with the numerical suffix as a parameter, e.g:
* subrule(0, X) === SUBRULE(X)
* subrule(1, X) === SUBRULE1(X)
* subrule(2, X) === SUBRULE2(X)
* ...
* @see SUBRULE
*/
/* protected */ subrule<T>(
idx: number,
ruleToCall: (idx: number) => T,
options?: SubruleMethodOpts
): T

/**
* The Parsing DSL Method is used by one rule to call another.
* It is equivalent to a non-Terminal in EBNF notation.
*
* This may seem redundant as it does not actually do much.
* However using it is **mandatory** for all sub rule invocations.
*
* Calling another rule without wrapping in SUBRULE(...)
* will cause errors/mistakes in the Parser's self analysis phase,
* which will lead to errors in error recovery/automatic lookahead calculation
* and any other functionality relying on the Parser's self analysis
* output.
*
* As in CONSUME the index in the method name indicates the occurrence
* of the sub rule invocation in its rule.
*
*/
/* protected */ SUBRULE<T>(
ruleToCall: (idx: number) => T,
Expand Down
4 changes: 2 additions & 2 deletions packages/chevrotain/benchmark_web/parsers/options.js
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
window.globalOptions = {
dev: { outputCst: true, maxLookahead: 2 },
latest: { outputCst: true, maxLookahead: 2 }
dev: { outputCst: false, maxLookahead: 2 },
latest: { outputCst: false, maxLookahead: 2 }
}
1 change: 1 addition & 0 deletions packages/chevrotain/docs/changes/CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,7 @@

#### Minor Changes

- [Larger Max numerical suffix/idx for the DSL Methods](https://github.com/SAP/chevrotain/issues/802)
- [Improve duplicate DSL methods suffix error message](https://github.com/SAP/chevrotain/issues/1020)

#### Bug Fixes
Expand Down
22 changes: 11 additions & 11 deletions packages/chevrotain/src/parse/grammar/keys.ts
Original file line number Diff line number Diff line change
@@ -1,26 +1,26 @@
// Lookahead keys are 32Bit integers in the form
// TTTTTTTT-ZZZZZZZZZZZZZZZZ-YYYY-XXXX
// TTTTTTTT-ZZZZZZZZZZZZ-YYYY-XXXXXXXX
// XXXX -> Occurrence Index bitmap.
// YYYY -> DSL Method Name bitmap.
// YYYY -> DSL Method Type bitmap.
// ZZZZZZZZZZZZZZZ -> Rule short Index bitmap.
// TTTTTTTTT -> alternation alternative index bitmap

export const BITS_FOR_METHOD_IDX = 4
export const BITS_FOR_OCCURRENCE_IDX = 4
export const BITS_FOR_RULE_IDX = 24
export const BITS_FOR_METHOD_TYPE = 4
export const BITS_FOR_OCCURRENCE_IDX = 8
export const BITS_FOR_RULE_IDX = 12
// TODO: validation, this means that there may at most 2^8 --> 256 alternatives for an alternation.
export const BITS_FOR_ALT_IDX = 8

// short string used as part of mapping keys.
// being short improves the performance when composing KEYS for maps out of these
// The 5 - 8 bits (16 possible values, are reserved for the DSL method indices)
/* tslint:disable */
export const OR_IDX = 1 << BITS_FOR_METHOD_IDX
export const OPTION_IDX = 2 << BITS_FOR_METHOD_IDX
export const MANY_IDX = 3 << BITS_FOR_METHOD_IDX
export const AT_LEAST_ONE_IDX = 4 << BITS_FOR_METHOD_IDX
export const MANY_SEP_IDX = 5 << BITS_FOR_METHOD_IDX
export const AT_LEAST_ONE_SEP_IDX = 6 << BITS_FOR_METHOD_IDX
export const OR_IDX = 1 << BITS_FOR_OCCURRENCE_IDX
export const OPTION_IDX = 2 << BITS_FOR_OCCURRENCE_IDX
export const MANY_IDX = 3 << BITS_FOR_OCCURRENCE_IDX
export const AT_LEAST_ONE_IDX = 4 << BITS_FOR_OCCURRENCE_IDX
export const MANY_SEP_IDX = 5 << BITS_FOR_OCCURRENCE_IDX
export const AT_LEAST_ONE_SEP_IDX = 6 << BITS_FOR_OCCURRENCE_IDX
/* tslint:enable */

// this actually returns a number, but it is always used as a string (object prop key)
Expand Down
51 changes: 50 additions & 1 deletion packages/chevrotain/src/parse/parser/traits/gast_recorder.ts
Original file line number Diff line number Diff line change
Expand Up @@ -40,6 +40,7 @@ import { Lexer } from "../../../scan/lexer_public"
import { augmentTokenTypes, hasShortKeyProperty } from "../../../scan/tokens"
import { createToken, createTokenInstance } from "../../../scan/tokens_public"
import { END_OF_FILE } from "../parser"
import { BITS_FOR_OCCURRENCE_IDX } from "../../grammar/keys"

type ProdWithDef = IProduction & { definition?: IProduction[] }
const RECORDING_NULL_OBJECT = {
Expand All @@ -48,6 +49,8 @@ const RECORDING_NULL_OBJECT = {
Object.freeze(RECORDING_NULL_OBJECT)

const HANDLE_SEPARATOR = true
const MAX_METHOD_IDX = Math.pow(2, BITS_FOR_OCCURRENCE_IDX) - 1

const RFT = createToken({ name: "RECORDING_PHASE_TOKEN", pattern: Lexer.NA })
augmentTokenTypes([RFT])
const RECORDING_PHASE_TOKEN = createTokenInstance(
Expand Down Expand Up @@ -107,7 +110,7 @@ export class GastRecorder {
return this.subruleInternalRecord(arg1, i, arg2)
}
this[`OPTION${idx}`] = function(arg1) {
this.optionInternalRecord(arg1, i)
return this.optionInternalRecord(arg1, i)
}
this[`OR${idx}`] = function(arg1) {
return this.orInternalRecord(arg1, i)
Expand All @@ -125,6 +128,27 @@ export class GastRecorder {
this.atLeastOneSepFirstInternalRecord(i, arg1)
}
}

// DSL methods with the idx(suffix) as an argument
this[`consume`] = function(idx, arg1, arg2) {
return this.consumeInternalRecord(arg1, idx, arg2)
}
this[`subrule`] = <any>function(idx, arg1, arg2) {
return this.subruleInternalRecord(arg1, idx, arg2)
}
this[`option`] = function(idx, arg1) {
return this.optionInternalRecord(arg1, idx)
}
this[`or`] = function(idx, arg1) {
return this.orInternalRecord(arg1, idx)
}
this[`many`] = function(idx, arg1) {
this.manyInternalRecord(idx, arg1)
}
this[`atLeastOne`] = function(idx, arg1) {
this.atLeastOneInternalRecord(idx, arg1)
}

this.ACTION = this.ACTION_RECORD
this.BACKTRACK = this.BACKTRACK_RECORD
this.LA = this.LA_RECORD
Expand All @@ -149,6 +173,14 @@ export class GastRecorder {
delete this[`AT_LEAST_ONE${idx}`]
delete this[`AT_LEAST_ONE_SEP${idx}`]
}

delete this[`consume`]
delete this[`subrule`]
delete this[`option`]
delete this[`or`]
delete this[`many`]
delete this[`atLeastOne`]

delete this.ACTION
delete this.BACKTRACK
delete this.LA
Expand Down Expand Up @@ -275,6 +307,7 @@ export class GastRecorder {
occurrence: number,
options?: SubruleMethodOpts
): T | CstNode {
assertMethodIdxIsValid(occurrence)
if (!ruleToCall || has(ruleToCall, "ruleName") === false) {
const error: any = new Error(
`<SUBRULE${getIdxSuffix(occurrence)}> argument is invalid` +
Expand Down Expand Up @@ -310,6 +343,7 @@ export class GastRecorder {
occurrence: number,
options: ConsumeMethodOpts
): IToken {
assertMethodIdxIsValid(occurrence)
if (!hasShortKeyProperty(tokType)) {
const error: any = new Error(
`<CONSUME${getIdxSuffix(occurrence)}> argument is invalid` +
Expand Down Expand Up @@ -340,6 +374,7 @@ function recordProd(
occurrence: number,
handleSep: boolean = false
): any {
assertMethodIdxIsValid(occurrence)
const prevProd: any = peek(this.recordingProdStack)
const grammarAction = isFunction(mainProdArg)
? mainProdArg
Expand All @@ -365,6 +400,7 @@ function recordProd(
}

function recordOrProd(mainProdArg: any, occurrence: number): any {
assertMethodIdxIsValid(occurrence)
const prevProd: any = peek(this.recordingProdStack)
// Only an array of alternatives
const hasOptions = isArray(mainProdArg) === false
Expand Down Expand Up @@ -410,3 +446,16 @@ function recordOrProd(mainProdArg: any, occurrence: number): any {
function getIdxSuffix(idx: number): string {
return idx === 0 ? "" : `${idx}`
}

function assertMethodIdxIsValid(idx): void {
if (idx < 0 || idx > MAX_METHOD_IDX) {
const error: any = new Error(
// The stack trace will contain all the needed details
`Invalid DSL Method idx value: <${idx}>\n\t` +
`Idx value must be a none negative value smaller than ${MAX_METHOD_IDX +
1}`
)
error.KNOWN_RECORDER_ERROR = true
throw error
}
}
Loading

0 comments on commit 3785ea9

Please sign in to comment.