From f03e191bdad1b9615e62f067e948c4764f419efe Mon Sep 17 00:00:00 2001 From: ota-meshi Date: Sun, 14 Apr 2024 15:19:45 +0900 Subject: [PATCH] Add support for ES2025 RegExp Duplicate named capturing groups --- acorn/src/acorn.d.ts | 2 +- acorn/src/regexp.js | 37 +++++++++++++++++++++++++++++++- bin/test262.unsupported-features | 1 - test/run.js | 1 + test/tests-regexp-2025.js | 17 +++++++++++++++ 5 files changed, 55 insertions(+), 3 deletions(-) create mode 100644 test/tests-regexp-2025.js diff --git a/acorn/src/acorn.d.ts b/acorn/src/acorn.d.ts index cf72b3704..4f7b383a3 100644 --- a/acorn/src/acorn.d.ts +++ b/acorn/src/acorn.d.ts @@ -573,7 +573,7 @@ export function tokenizer(input: string, options: Options): { [Symbol.iterator](): Iterator } -export type ecmaVersion = 3 | 5 | 6 | 7 | 8 | 9 | 10 | 11 | 12 | 13 | 14 | 15 | 2015 | 2016 | 2017 | 2018 | 2019 | 2020 | 2021 | 2022 | 2023 | 2024 | "latest" +export type ecmaVersion = 3 | 5 | 6 | 7 | 8 | 9 | 10 | 11 | 12 | 13 | 14 | 15 | 16 | 2015 | 2016 | 2017 | 2018 | 2019 | 2020 | 2021 | 2022 | 2023 | 2024 | 2025 | "latest" export interface Options { /** diff --git a/acorn/src/regexp.js b/acorn/src/regexp.js index 71babf7ae..be11e2538 100644 --- a/acorn/src/regexp.js +++ b/acorn/src/regexp.js @@ -23,6 +23,8 @@ export class RegExpValidationState { this.numCapturingParens = 0 this.maxBackReference = 0 this.groupNames = [] + this.groupNamesInDisjunction = [] + this.groupNamesInAlternative = [] this.backReferenceNames = [] } @@ -169,6 +171,8 @@ pp.regexp_pattern = function(state) { state.numCapturingParens = 0 state.maxBackReference = 0 state.groupNames.length = 0 + state.groupNamesInDisjunction.length = 0 + state.groupNamesInAlternative.length = 0 state.backReferenceNames.length = 0 this.regexp_disjunction(state) @@ -194,11 +198,27 @@ pp.regexp_pattern = function(state) { // https://www.ecma-international.org/ecma-262/8.0/#prod-Disjunction pp.regexp_disjunction = function(state) { + let upperGroupNamesInDisjunction + if (this.options.ecmaVersion >= 16) { + upperGroupNamesInDisjunction = state.groupNamesInDisjunction + state.groupNamesInDisjunction = [] + } + this.regexp_alternative(state) while (state.eat(0x7C /* | */)) { this.regexp_alternative(state) } + if (this.options.ecmaVersion >= 16) { + // Adds the group name that appears in current Disjunction + // as the group name of the current Alternative and upper Disjunction. + for (const groupName of state.groupNamesInDisjunction) { + upperGroupNamesInDisjunction.push(groupName) + state.groupNamesInAlternative.push(groupName) + } + state.groupNamesInDisjunction = upperGroupNamesInDisjunction + } + // Make the same message as V8. if (this.regexp_eatQuantifier(state, true)) { state.raise("Nothing to repeat") @@ -210,8 +230,18 @@ pp.regexp_disjunction = function(state) { // https://www.ecma-international.org/ecma-262/8.0/#prod-Alternative pp.regexp_alternative = function(state) { + let upperGroupNamesInAlternative + if (this.options.ecmaVersion >= 16) { + upperGroupNamesInAlternative = state.groupNamesInAlternative + state.groupNamesInAlternative = [...state.groupNamesInAlternative] + } + while (state.pos < state.source.length && this.regexp_eatTerm(state)) ; + + if (this.options.ecmaVersion >= 16) { + state.groupNamesInAlternative = upperGroupNamesInAlternative + } } // https://www.ecma-international.org/ecma-262/8.0/#prod-annexB-Term @@ -448,10 +478,15 @@ pp.regexp_eatExtendedPatternCharacter = function(state) { pp.regexp_groupSpecifier = function(state) { if (state.eat(0x3F /* ? */)) { if (this.regexp_eatGroupName(state)) { - if (state.groupNames.indexOf(state.lastStringValue) !== -1) { + const groupNames = this.options.ecmaVersion >= 16 ? state.groupNamesInAlternative : state.groupNames + if (groupNames.indexOf(state.lastStringValue) !== -1) { state.raise("Duplicate capture group name") } state.groupNames.push(state.lastStringValue) + if (this.options.ecmaVersion >= 16) { + state.groupNamesInAlternative.push(state.lastStringValue) + state.groupNamesInDisjunction.push(state.lastStringValue) + } return } state.raise("Invalid group") diff --git a/bin/test262.unsupported-features b/bin/test262.unsupported-features index 5ab02064c..383077264 100644 --- a/bin/test262.unsupported-features +++ b/bin/test262.unsupported-features @@ -1,3 +1,2 @@ decorators import-assertions -regexp-duplicate-named-groups diff --git a/test/run.js b/test/run.js index 05087c483..0587bc571 100644 --- a/test/run.js +++ b/test/run.js @@ -15,6 +15,7 @@ require("./tests-regexp-2020.js"); require("./tests-regexp-2022.js"); require("./tests-regexp-2024.js"); + require("./tests-regexp-2025.js"); require("./tests-json-superset.js"); require("./tests-optional-catch-binding.js"); require("./tests-bigint.js"); diff --git a/test/tests-regexp-2025.js b/test/tests-regexp-2025.js new file mode 100644 index 000000000..dd010ef42 --- /dev/null +++ b/test/tests-regexp-2025.js @@ -0,0 +1,17 @@ +if (typeof exports !== "undefined") { + var test = require("./driver.js").test + var testFail = require("./driver.js").testFail +} + +test("/(?a)|(?b)/", {}, { ecmaVersion: 2025 }) +testFail("/(?a)|(?b)/", "Invalid regular expression: /(?a)|(?b)/: Duplicate capture group name (1:1)", { ecmaVersion: 2024 }) +testFail("/(?a)(?b)/", "Invalid regular expression: /(?a)(?b)/: Duplicate capture group name (1:1)", { ecmaVersion: 2025 }) +test("/(?:(?a)|(?b))\\k/", {}, { ecmaVersion: 2025 }) +testFail("/(?:(?a)|(?b))\\k/", "Invalid regular expression: /(?:(?a)|(?b))\\k/: Duplicate capture group name (1:1)", { ecmaVersion: 2024 }) +testFail("/(?:(?a)(?b))\\k/", "Invalid regular expression: /(?:(?a)(?b))\\k/: Duplicate capture group name (1:1)", { ecmaVersion: 2025 }) +test("/(?a)(?a)|(?b)(?b)/", {}, { ecmaVersion: 2025 }) +test("/(?a)|(?b)|(?c)/", {}, { ecmaVersion: 2025 }) +test("/(?a)|\\k/", {}, { ecmaVersion: 2025 }) +testFail("/(?a)|(?b)(?c)/", "Invalid regular expression: /(?a)|(?b)(?c)/: Duplicate capture group name (1:1)", { ecmaVersion: 2025 }) +testFail("/(?:(?a)|(?b))(?c)/", "Invalid regular expression: /(?:(?a)|(?b))(?c)/: Duplicate capture group name (1:1)", { ecmaVersion: 2025 }) +testFail("/(?:(?:(?a)|(?b))|(?:))(?c)/", "Invalid regular expression: /(?:(?:(?a)|(?b))|(?:))(?c)/: Duplicate capture group name (1:1)", { ecmaVersion: 2025 })