Skip to content

Commit

Permalink
[eggex] More flags support: spliced eggex must have same flags
Browse files Browse the repository at this point in the history
  • Loading branch information
Andy Chu committed Dec 14, 2023
1 parent a4845be commit 167dfcc
Show file tree
Hide file tree
Showing 4 changed files with 58 additions and 30 deletions.
3 changes: 1 addition & 2 deletions core/value.asdl
Original file line number Diff line number Diff line change
Expand Up @@ -75,8 +75,7 @@ module value

# expr is spliced
# / d+; ignorecase / -> '[[:digit:]]+' REG_ICASE
| Eggex(re spliced, List[EggexFlag] normalized_flags, str? as_ere,
int ere_flags,
| Eggex(re spliced, str canonical_flags, str? as_ere,
# inner ? is because some groups are not named
# outer ? because it's not set until ERE translation is done
List[NameType?]? name_types)
Expand Down
7 changes: 3 additions & 4 deletions frontend/syntax.asdl
Original file line number Diff line number Diff line change
Expand Up @@ -290,12 +290,11 @@ module syntax

EggexFlag = (bool negated, Token flag)

# ere_flags is a canonical version of flags that can be compared for
# equality. This is so we can splice eggexes correctly, e.g.
# / 'abc' @pat ; i /
# canonical_flags can be compared for equality. This is needed to splice
# eggexes correctly, e.g. / 'abc' @pat ; i /
Eggex = (
Token left, re regex, List[EggexFlag] flags, Token? trans_pref,
str? ere_flags)
str? canonical_flags)

pat =
Else
Expand Down
33 changes: 30 additions & 3 deletions spec/ysh-regex.test.sh
Original file line number Diff line number Diff line change
Expand Up @@ -48,13 +48,40 @@ yes
no
## END

#### Eggex flags ignorecase
#### Eggex flags to ignore case are respected
shopt -s ysh:upgrade

# based on Python's spelling
var pat = / 'abc' ; ignorecase /
var pat = / 'abc' ; i /
var pat2 = / @pat 'def' ; reg_icase / # this is allowed

if ('-abcdef-' ~ pat2) {
echo 'yes'
}

if ('-ABCDEF-' ~ pat2) {
echo 'yes'
}

if ('ABCDE' ~ pat2) {
echo 'BUG'
}

## STDOUT:
yes
yes
## END

= pat
#### Can't splice eggex with different flags
shopt -s ysh:upgrade

var pat = / 'abc' ; i /
var pat2 = / @pat 'def' ; reg_icase / # this is allowed

var pat3 = / @pat 'def' /
= pat3

## status: 1
## STDOUT:
## END

Expand Down
45 changes: 24 additions & 21 deletions ysh/expr_eval.py
Original file line number Diff line number Diff line change
Expand Up @@ -1222,10 +1222,13 @@ def _EvalClassLiteralTerm(self, term, out):
char_int, char_code_tok)
out.append(CharCode(char_int, False, char_code_tok))

def _EvalRegex(self, node):
# type: (re_t) -> re_t
def _EvalRegex(self, node, parent_flags):
# type: (re_t, str) -> re_t
"""Resolve references and eval constants in an Eggex
Args:
parent_flags: anything spliced must have the same flags
Rules:
Splice => re_t # like Hex and @const in / Hex '.' @const /
Speck/Token (syntax) => Primitive (logical)
Expand All @@ -1237,28 +1240,32 @@ def _EvalRegex(self, node):
if case(re_e.Seq):
node = cast(re.Seq, UP_node)
new_children = [
self._EvalRegex(child) for child in node.children
self._EvalRegex(child, parent_flags)
for child in node.children
]
return re.Seq(new_children)

elif case(re_e.Alt):
node = cast(re.Alt, UP_node)
new_children = [
self._EvalRegex(child) for child in node.children
self._EvalRegex(child, parent_flags)
for child in node.children
]
return re.Alt(new_children)

elif case(re_e.Repeat):
node = cast(re.Repeat, UP_node)
return re.Repeat(self._EvalRegex(node.child), node.op)
return re.Repeat(self._EvalRegex(node.child, parent_flags),
node.op)

elif case(re_e.Group):
node = cast(re.Group, UP_node)
return re.Group(self._EvalRegex(node.child))
return re.Group(self._EvalRegex(node.child, parent_flags))

elif case(re_e.Capture): # Identical to Group
node = cast(re.Capture, UP_node)
return re.Capture(self._EvalRegex(node.child), node.name_type)
return re.Capture(self._EvalRegex(node.child, parent_flags),
node.name_type)

elif case(re_e.CharClassLiteral):
node = cast(re.CharClassLiteral, UP_node)
Expand Down Expand Up @@ -1322,9 +1329,13 @@ def _EvalRegex(self, node):

elif case(value_e.Eggex):
val = cast(value.Eggex, UP_val)
# TODO: warn about flags that don't match
# This check will be transitive
# Splicing requires flags to match. This check is
# transitive.
to_splice = val.spliced
if val.canonical_flags != parent_flags:
e_die(
"Expected eggex flags %r, but got %r" %
(parent_flags, val.canonical_flags), node.name)

else:
raise error.TypeErr(
Expand All @@ -1341,19 +1352,11 @@ def _EvalRegex(self, node):

def EvalEggex(self, node):
# type: (Eggex) -> value.Eggex
"""Trivial wrapper.

It's a bit weird that this is re_t -> re_t, instead of different types.
It reflects the "macro expansion" of eggex.
"""
# _EvalRegex does splicing
# TODO:
# - check for incompatible flags, like i
# - or can the root override flags? Probably not
spliced = self._EvalRegex(node.regex)
#flags = [lexer.TokenVal(tok) for tok in node.flags]
flags = [] # type: List[EggexFlag]
return value.Eggex(spliced, flags, None, 0, None)
spliced = self._EvalRegex(node.regex, node.canonical_flags)

# as_ere and name_types filled in during translation
return value.Eggex(spliced, node.canonical_flags, None, None)


# vim: sw=4

0 comments on commit 167dfcc

Please sign in to comment.