From 67f9dfa9015d8c484a8c43869919220d02b189a0 Mon Sep 17 00:00:00 2001 From: Roger Peppe Date: Thu, 12 Sep 2024 17:07:58 +0100 Subject: [PATCH] encoding/jsonschema: detect Perl regexps MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit We want to treat Perl syntax as a missing feature and make genuinely invalid regular expressions an error. Signed-off-by: Roger Peppe Change-Id: I187be5f8846e02c9af514ec808fa19a8598e41ce Reviewed-on: https://review.gerrithub.io/c/cue-lang/cue/+/1201127 TryBot-Result: CUEcueckoo Reviewed-by: Daniel Martí --- encoding/jsonschema/constraints_string.go | 13 +++------- encoding/jsonschema/decode.go | 26 +++++++++++++++++++ .../optional/ecmascript-regex.json | 16 ++++++------ .../optional/ecmascript-regex.json | 16 ++++++------ .../draft4/optional/ecmascript-regex.json | 16 ++++++------ .../draft6/optional/ecmascript-regex.json | 16 ++++++------ .../draft7/optional/ecmascript-regex.json | 16 ++++++------ .../testdata/txtar/perl_pattern.txtar | 11 ++++++++ .../testdata/txtar/perl_pattern_strict.txtar | 13 ++++++++++ 9 files changed, 93 insertions(+), 50 deletions(-) create mode 100644 encoding/jsonschema/testdata/txtar/perl_pattern.txtar create mode 100644 encoding/jsonschema/testdata/txtar/perl_pattern_strict.txtar diff --git a/encoding/jsonschema/constraints_string.go b/encoding/jsonschema/constraints_string.go index 29a18518c41..1c9f1788be8 100644 --- a/encoding/jsonschema/constraints_string.go +++ b/encoding/jsonschema/constraints_string.go @@ -15,7 +15,6 @@ package jsonschema import ( - "regexp" "sync" "cuelang.org/go/cue" @@ -50,17 +49,11 @@ func constraintMinLength(key string, n cue.Value, s *state) { } func constraintPattern(key string, n cue.Value, s *state) { - str, _ := s.strValue(n) - if _, err := regexp.Compile(str); err != nil { - if s.cfg.StrictFeatures { - // TODO check if the error is only because of an unsupported - // regexp feature (e.g. perl regexp) or because the regexp is just - // bad. If the latter, this should be an error even if Strict is false. - s.errf(n, "unsupported regexp: %v", err) - } + str, ok := s.regexpValue(n) + if !ok { return } - s.add(n, stringType, &ast.UnaryExpr{Op: token.MAT, X: s.string(n)}) + s.add(n, stringType, &ast.UnaryExpr{Op: token.MAT, X: str}) } type formatFuncInfo struct { diff --git a/encoding/jsonschema/decode.go b/encoding/jsonschema/decode.go index d292fe16ac7..2eb38c8a440 100644 --- a/encoding/jsonschema/decode.go +++ b/encoding/jsonschema/decode.go @@ -22,6 +22,7 @@ import ( "fmt" "math" "net/url" + "regexp/syntax" "sort" "strconv" "strings" @@ -256,6 +257,31 @@ func (d *decoder) strValue(n cue.Value) (s string, ok bool) { return s, true } +func (d *decoder) regexpValue(n cue.Value) (ast.Expr, bool) { + s, ok := d.strValue(n) + if !ok { + return nil, false + } + _, err := syntax.Parse(s, syntax.Perl) + if err == nil { + return d.string(n), true + } + var regErr *syntax.Error + if errors.As(err, ®Err) && regErr.Code == syntax.ErrInvalidPerlOp { + // It's Perl syntax that we'll never support because the CUE evaluation + // engine uses Go's regexp implementation and because the missing + // features are usually not there for good reason (e.g. exponential + // runtime). In other words, this is a missing feature but not an invalid + // regular expression as such. + if d.cfg.StrictFeatures { + d.errf(n, "unsupported Perl regexp syntax in %q: %v", s, err) + } + return nil, false + } + d.errf(n, "invalid regexp %q: %v", s, err) + return nil, false +} + // const draftCutoff = 5 type coreType int diff --git a/encoding/jsonschema/testdata/external/tests/draft2019-09/optional/ecmascript-regex.json b/encoding/jsonschema/testdata/external/tests/draft2019-09/optional/ecmascript-regex.json index f71638f8a3c..64d510cb48b 100644 --- a/encoding/jsonschema/testdata/external/tests/draft2019-09/optional/ecmascript-regex.json +++ b/encoding/jsonschema/testdata/external/tests/draft2019-09/optional/ecmascript-regex.json @@ -47,8 +47,8 @@ "pattern": "^\\cC$" }, "skip": { - "v2": "extract error: unsupported regexp: error parsing regexp: invalid escape sequence: `\\c`", - "v3": "extract error: unsupported regexp: error parsing regexp: invalid escape sequence: `\\c`" + "v2": "extract error: invalid regexp \"^\\\\cC$\": error parsing regexp: invalid escape sequence: `\\c`", + "v3": "extract error: invalid regexp \"^\\\\cC$\": error parsing regexp: invalid escape sequence: `\\c`" }, "tests": [ { @@ -79,8 +79,8 @@ "pattern": "^\\cc$" }, "skip": { - "v2": "extract error: unsupported regexp: error parsing regexp: invalid escape sequence: `\\c`", - "v3": "extract error: unsupported regexp: error parsing regexp: invalid escape sequence: `\\c`" + "v2": "extract error: invalid regexp \"^\\\\cc$\": error parsing regexp: invalid escape sequence: `\\c`", + "v3": "extract error: invalid regexp \"^\\\\cc$\": error parsing regexp: invalid escape sequence: `\\c`" }, "tests": [ { @@ -370,8 +370,8 @@ "pattern": "\\p{Letter}cole" }, "skip": { - "v2": "extract error: unsupported regexp: error parsing regexp: invalid character class range: `\\p{Letter}`", - "v3": "extract error: unsupported regexp: error parsing regexp: invalid character class range: `\\p{Letter}`" + "v2": "extract error: invalid regexp \"\\\\p{Letter}cole\": error parsing regexp: invalid character class range: `\\p{Letter}`", + "v3": "extract error: invalid regexp \"\\\\p{Letter}cole\": error parsing regexp: invalid character class range: `\\p{Letter}`" }, "tests": [ { @@ -496,8 +496,8 @@ "pattern": "^\\p{digit}+$" }, "skip": { - "v2": "extract error: unsupported regexp: error parsing regexp: invalid character class range: `\\p{digit}`", - "v3": "extract error: unsupported regexp: error parsing regexp: invalid character class range: `\\p{digit}`" + "v2": "extract error: invalid regexp \"^\\\\p{digit}+$\": error parsing regexp: invalid character class range: `\\p{digit}`", + "v3": "extract error: invalid regexp \"^\\\\p{digit}+$\": error parsing regexp: invalid character class range: `\\p{digit}`" }, "tests": [ { diff --git a/encoding/jsonschema/testdata/external/tests/draft2020-12/optional/ecmascript-regex.json b/encoding/jsonschema/testdata/external/tests/draft2020-12/optional/ecmascript-regex.json index 3217bd9ee8c..0928e8a2e1e 100644 --- a/encoding/jsonschema/testdata/external/tests/draft2020-12/optional/ecmascript-regex.json +++ b/encoding/jsonschema/testdata/external/tests/draft2020-12/optional/ecmascript-regex.json @@ -47,8 +47,8 @@ "pattern": "^\\cC$" }, "skip": { - "v2": "extract error: unsupported regexp: error parsing regexp: invalid escape sequence: `\\c`", - "v3": "extract error: unsupported regexp: error parsing regexp: invalid escape sequence: `\\c`" + "v2": "extract error: invalid regexp \"^\\\\cC$\": error parsing regexp: invalid escape sequence: `\\c`", + "v3": "extract error: invalid regexp \"^\\\\cC$\": error parsing regexp: invalid escape sequence: `\\c`" }, "tests": [ { @@ -79,8 +79,8 @@ "pattern": "^\\cc$" }, "skip": { - "v2": "extract error: unsupported regexp: error parsing regexp: invalid escape sequence: `\\c`", - "v3": "extract error: unsupported regexp: error parsing regexp: invalid escape sequence: `\\c`" + "v2": "extract error: invalid regexp \"^\\\\cc$\": error parsing regexp: invalid escape sequence: `\\c`", + "v3": "extract error: invalid regexp \"^\\\\cc$\": error parsing regexp: invalid escape sequence: `\\c`" }, "tests": [ { @@ -370,8 +370,8 @@ "pattern": "\\p{Letter}cole" }, "skip": { - "v2": "extract error: unsupported regexp: error parsing regexp: invalid character class range: `\\p{Letter}`", - "v3": "extract error: unsupported regexp: error parsing regexp: invalid character class range: `\\p{Letter}`" + "v2": "extract error: invalid regexp \"\\\\p{Letter}cole\": error parsing regexp: invalid character class range: `\\p{Letter}`", + "v3": "extract error: invalid regexp \"\\\\p{Letter}cole\": error parsing regexp: invalid character class range: `\\p{Letter}`" }, "tests": [ { @@ -514,8 +514,8 @@ "pattern": "^\\p{digit}+$" }, "skip": { - "v2": "extract error: unsupported regexp: error parsing regexp: invalid character class range: `\\p{digit}`", - "v3": "extract error: unsupported regexp: error parsing regexp: invalid character class range: `\\p{digit}`" + "v2": "extract error: invalid regexp \"^\\\\p{digit}+$\": error parsing regexp: invalid character class range: `\\p{digit}`", + "v3": "extract error: invalid regexp \"^\\\\p{digit}+$\": error parsing regexp: invalid character class range: `\\p{digit}`" }, "tests": [ { diff --git a/encoding/jsonschema/testdata/external/tests/draft4/optional/ecmascript-regex.json b/encoding/jsonschema/testdata/external/tests/draft4/optional/ecmascript-regex.json index b563aff49df..eea301772d5 100644 --- a/encoding/jsonschema/testdata/external/tests/draft4/optional/ecmascript-regex.json +++ b/encoding/jsonschema/testdata/external/tests/draft4/optional/ecmascript-regex.json @@ -44,8 +44,8 @@ "pattern": "^\\cC$" }, "skip": { - "v2": "extract error: unsupported regexp: error parsing regexp: invalid escape sequence: `\\c`", - "v3": "extract error: unsupported regexp: error parsing regexp: invalid escape sequence: `\\c`" + "v2": "extract error: invalid regexp \"^\\\\cC$\": error parsing regexp: invalid escape sequence: `\\c`", + "v3": "extract error: invalid regexp \"^\\\\cC$\": error parsing regexp: invalid escape sequence: `\\c`" }, "tests": [ { @@ -75,8 +75,8 @@ "pattern": "^\\cc$" }, "skip": { - "v2": "extract error: unsupported regexp: error parsing regexp: invalid escape sequence: `\\c`", - "v3": "extract error: unsupported regexp: error parsing regexp: invalid escape sequence: `\\c`" + "v2": "extract error: invalid regexp \"^\\\\cc$\": error parsing regexp: invalid escape sequence: `\\c`", + "v3": "extract error: invalid regexp \"^\\\\cc$\": error parsing regexp: invalid escape sequence: `\\c`" }, "tests": [ { @@ -359,8 +359,8 @@ "pattern": "\\p{Letter}cole" }, "skip": { - "v2": "extract error: unsupported regexp: error parsing regexp: invalid character class range: `\\p{Letter}`", - "v3": "extract error: unsupported regexp: error parsing regexp: invalid character class range: `\\p{Letter}`" + "v2": "extract error: invalid regexp \"\\\\p{Letter}cole\": error parsing regexp: invalid character class range: `\\p{Letter}`", + "v3": "extract error: invalid regexp \"\\\\p{Letter}cole\": error parsing regexp: invalid character class range: `\\p{Letter}`" }, "tests": [ { @@ -481,8 +481,8 @@ "pattern": "^\\p{digit}+$" }, "skip": { - "v2": "extract error: unsupported regexp: error parsing regexp: invalid character class range: `\\p{digit}`", - "v3": "extract error: unsupported regexp: error parsing regexp: invalid character class range: `\\p{digit}`" + "v2": "extract error: invalid regexp \"^\\\\p{digit}+$\": error parsing regexp: invalid character class range: `\\p{digit}`", + "v3": "extract error: invalid regexp \"^\\\\p{digit}+$\": error parsing regexp: invalid character class range: `\\p{digit}`" }, "tests": [ { diff --git a/encoding/jsonschema/testdata/external/tests/draft6/optional/ecmascript-regex.json b/encoding/jsonschema/testdata/external/tests/draft6/optional/ecmascript-regex.json index 58a19c8b6b0..b23aa9ff031 100644 --- a/encoding/jsonschema/testdata/external/tests/draft6/optional/ecmascript-regex.json +++ b/encoding/jsonschema/testdata/external/tests/draft6/optional/ecmascript-regex.json @@ -44,8 +44,8 @@ "pattern": "^\\cC$" }, "skip": { - "v2": "extract error: unsupported regexp: error parsing regexp: invalid escape sequence: `\\c`", - "v3": "extract error: unsupported regexp: error parsing regexp: invalid escape sequence: `\\c`" + "v2": "extract error: invalid regexp \"^\\\\cC$\": error parsing regexp: invalid escape sequence: `\\c`", + "v3": "extract error: invalid regexp \"^\\\\cC$\": error parsing regexp: invalid escape sequence: `\\c`" }, "tests": [ { @@ -75,8 +75,8 @@ "pattern": "^\\cc$" }, "skip": { - "v2": "extract error: unsupported regexp: error parsing regexp: invalid escape sequence: `\\c`", - "v3": "extract error: unsupported regexp: error parsing regexp: invalid escape sequence: `\\c`" + "v2": "extract error: invalid regexp \"^\\\\cc$\": error parsing regexp: invalid escape sequence: `\\c`", + "v3": "extract error: invalid regexp \"^\\\\cc$\": error parsing regexp: invalid escape sequence: `\\c`" }, "tests": [ { @@ -359,8 +359,8 @@ "pattern": "\\p{Letter}cole" }, "skip": { - "v2": "extract error: unsupported regexp: error parsing regexp: invalid character class range: `\\p{Letter}`", - "v3": "extract error: unsupported regexp: error parsing regexp: invalid character class range: `\\p{Letter}`" + "v2": "extract error: invalid regexp \"\\\\p{Letter}cole\": error parsing regexp: invalid character class range: `\\p{Letter}`", + "v3": "extract error: invalid regexp \"\\\\p{Letter}cole\": error parsing regexp: invalid character class range: `\\p{Letter}`" }, "tests": [ { @@ -481,8 +481,8 @@ "pattern": "^\\p{digit}+$" }, "skip": { - "v2": "extract error: unsupported regexp: error parsing regexp: invalid character class range: `\\p{digit}`", - "v3": "extract error: unsupported regexp: error parsing regexp: invalid character class range: `\\p{digit}`" + "v2": "extract error: invalid regexp \"^\\\\p{digit}+$\": error parsing regexp: invalid character class range: `\\p{digit}`", + "v3": "extract error: invalid regexp \"^\\\\p{digit}+$\": error parsing regexp: invalid character class range: `\\p{digit}`" }, "tests": [ { diff --git a/encoding/jsonschema/testdata/external/tests/draft7/optional/ecmascript-regex.json b/encoding/jsonschema/testdata/external/tests/draft7/optional/ecmascript-regex.json index 58a19c8b6b0..b23aa9ff031 100644 --- a/encoding/jsonschema/testdata/external/tests/draft7/optional/ecmascript-regex.json +++ b/encoding/jsonschema/testdata/external/tests/draft7/optional/ecmascript-regex.json @@ -44,8 +44,8 @@ "pattern": "^\\cC$" }, "skip": { - "v2": "extract error: unsupported regexp: error parsing regexp: invalid escape sequence: `\\c`", - "v3": "extract error: unsupported regexp: error parsing regexp: invalid escape sequence: `\\c`" + "v2": "extract error: invalid regexp \"^\\\\cC$\": error parsing regexp: invalid escape sequence: `\\c`", + "v3": "extract error: invalid regexp \"^\\\\cC$\": error parsing regexp: invalid escape sequence: `\\c`" }, "tests": [ { @@ -75,8 +75,8 @@ "pattern": "^\\cc$" }, "skip": { - "v2": "extract error: unsupported regexp: error parsing regexp: invalid escape sequence: `\\c`", - "v3": "extract error: unsupported regexp: error parsing regexp: invalid escape sequence: `\\c`" + "v2": "extract error: invalid regexp \"^\\\\cc$\": error parsing regexp: invalid escape sequence: `\\c`", + "v3": "extract error: invalid regexp \"^\\\\cc$\": error parsing regexp: invalid escape sequence: `\\c`" }, "tests": [ { @@ -359,8 +359,8 @@ "pattern": "\\p{Letter}cole" }, "skip": { - "v2": "extract error: unsupported regexp: error parsing regexp: invalid character class range: `\\p{Letter}`", - "v3": "extract error: unsupported regexp: error parsing regexp: invalid character class range: `\\p{Letter}`" + "v2": "extract error: invalid regexp \"\\\\p{Letter}cole\": error parsing regexp: invalid character class range: `\\p{Letter}`", + "v3": "extract error: invalid regexp \"\\\\p{Letter}cole\": error parsing regexp: invalid character class range: `\\p{Letter}`" }, "tests": [ { @@ -481,8 +481,8 @@ "pattern": "^\\p{digit}+$" }, "skip": { - "v2": "extract error: unsupported regexp: error parsing regexp: invalid character class range: `\\p{digit}`", - "v3": "extract error: unsupported regexp: error parsing regexp: invalid character class range: `\\p{digit}`" + "v2": "extract error: invalid regexp \"^\\\\p{digit}+$\": error parsing regexp: invalid character class range: `\\p{digit}`", + "v3": "extract error: invalid regexp \"^\\\\p{digit}+$\": error parsing regexp: invalid character class range: `\\p{digit}`" }, "tests": [ { diff --git a/encoding/jsonschema/testdata/txtar/perl_pattern.txtar b/encoding/jsonschema/testdata/txtar/perl_pattern.txtar new file mode 100644 index 00000000000..61cc2f2c9ea --- /dev/null +++ b/encoding/jsonschema/testdata/txtar/perl_pattern.txtar @@ -0,0 +1,11 @@ +Invalid perl syntax should not result in failure when #strictFeatures +isn't enabled. + +-- schema.json -- +{ + "$schema": "https://json-schema.org/draft/2020-12/schema", + "pattern": "^(?![ \\t\\n]*\\(default(.*)\\))[\\s\\S]*" +} +-- out/decode/extract -- +@jsonschema(schema="https://json-schema.org/draft/2020-12/schema") +_ diff --git a/encoding/jsonschema/testdata/txtar/perl_pattern_strict.txtar b/encoding/jsonschema/testdata/txtar/perl_pattern_strict.txtar new file mode 100644 index 00000000000..84382be8fbe --- /dev/null +++ b/encoding/jsonschema/testdata/txtar/perl_pattern_strict.txtar @@ -0,0 +1,13 @@ +Invalid perl syntax should result in failure when #strictFeatures +is enabled. +#strictFeatures + +-- schema.json -- +{ + "$schema": "https://json-schema.org/draft/2020-12/schema", + "pattern": "^(?![ \\t\\n]*\\(default(.*)\\))[\\s\\S]*" +} +-- out/decode/extract -- +ERROR: +unsupported Perl regexp syntax in "^(?![ \\t\\n]*\\(default(.*)\\))[\\s\\S]*": error parsing regexp: invalid or unsupported Perl syntax: `(?!`: + schema.json:3:5