Skip to content

Commit

Permalink
[frontend/refactor] Remove Id.Lit_TildeLike
Browse files Browse the repository at this point in the history
Replace with Id.Lit_Tilde and Id.Lit_Slash

Preparing to fix #1854 - tilde and brace expansion interaction.

benchmarks/tokens.sh shows that there isn't too big a blowup.
  • Loading branch information
Andy C committed Mar 5, 2024
1 parent 2f16ae9 commit f59ac79
Show file tree
Hide file tree
Showing 11 changed files with 206 additions and 111 deletions.
60 changes: 39 additions & 21 deletions core/completion.py
Original file line number Diff line number Diff line change
Expand Up @@ -47,6 +47,7 @@
from core import ui
from core import util
from frontend import consts
from frontend import lexer
from frontend import location
from frontend import reader
from mycpp import mylib
Expand Down Expand Up @@ -1039,7 +1040,8 @@ def Matches(self, comp):
if t2.id == Id.Left_DollarBrace and IsDummy(t1):
self.comp_ui_state.display_pos = t2.col + 2 # 2 for ${
for name in self.mem.VarNames():
yield line_until_tab + name # no need to quote var names
# no need to quote var names
yield line_until_tab + name
return

# echo $P
Expand All @@ -1052,8 +1054,8 @@ def Matches(self, comp):
n = len(to_complete)
for name in self.mem.VarNames():
if name.startswith(to_complete):
yield line_until_tab + name[
n:] # no need to quote var names
# no need to quote var names
yield line_until_tab + name[n:]
return

# echo ${P
Expand All @@ -1063,8 +1065,8 @@ def Matches(self, comp):
n = len(to_complete)
for name in self.mem.VarNames():
if name.startswith(to_complete):
yield line_until_tab + name[
n:] # no need to quote var names
# no need to quote var names
yield line_until_tab + name[n:]
return

# echo $(( VAR
Expand All @@ -1074,8 +1076,8 @@ def Matches(self, comp):
n = len(to_complete)
for name in self.mem.VarNames():
if name.startswith(to_complete):
yield line_until_tab + name[
n:] # no need to quote var names
# no need to quote var names
yield line_until_tab + name[n:]
return

if len(trail.words) > 0:
Expand All @@ -1086,23 +1088,39 @@ def Matches(self, comp):
# tokens, because otherwise f~a will complete. Looking at word_part is
# EXACTLY what we want.
parts = trail.words[-1].parts
if (len(parts) == 2 and parts[0].tag() == word_part_e.Literal and
parts[1].tag() == word_part_e.Literal and
cast(Token, parts[0]).id == Id.Lit_TildeLike and
cast(Token, parts[1]).id == Id.Lit_CompDummy):
t2 = cast(Token, parts[0])
if len(parts) > 0 and word_.LiteralId(parts[0]) == Id.Lit_Tilde:
#log('TILDE parts %s', parts)

# +1 for ~
self.comp_ui_state.display_pos = t2.col + 1
if (len(parts) == 2 and
word_.LiteralId(parts[1]) == Id.Lit_CompDummy):
tilde_tok = cast(Token, parts[0])

to_complete = t2.tval[1:]
n = len(to_complete)
for u in pyos.GetAllUsers(): # catch errors?
name = u.pw_name
if name.startswith(to_complete):
s = line_until_tab + ShellQuoteB(name[n:]) + '/'
# end of tilde
self.comp_ui_state.display_pos = tilde_tok.col + 1

to_complete = ''
for u in pyos.GetAllUsers():
name = u.pw_name
s = line_until_tab + ShellQuoteB(name) + '/'
yield s
return
return

if (len(parts) == 3 and
word_.LiteralId(parts[1]) == Id.Lit_Chars and
word_.LiteralId(parts[2]) == Id.Lit_CompDummy):

chars_tok = cast(Token, parts[1])

self.comp_ui_state.display_pos = chars_tok.col

to_complete = lexer.TokenVal(chars_tok)
n = len(to_complete)
for u in pyos.GetAllUsers(): # catch errors?
name = u.pw_name
if name.startswith(to_complete):
s = line_until_tab + ShellQuoteB(name[n:]) + '/'
yield s
return

# echo hi > f<TAB> (complete redirect arg)
if len(trail.redirects) > 0:
Expand Down
1 change: 1 addition & 0 deletions core/completion_test.py
Original file line number Diff line number Diff line change
Expand Up @@ -269,6 +269,7 @@ def testCompletesHomeDirs(self):
comp = MockApi(line='echo ~r')
print(comp)
m = list(r.Matches(comp))

# This test isn't hermetic, but I think root should be on all systems.
self.assert_('echo ~root/' in m, 'Got %s' % m)

Expand Down
2 changes: 1 addition & 1 deletion frontend/id_kind_def.py
Original file line number Diff line number Diff line change
Expand Up @@ -269,7 +269,7 @@ def AddKinds(spec):
'Equals', # For = f()
'Dollar', # detecting 'echo $'
'DRightBracket', # the ]] that matches [[, NOT a keyword
'TildeLike', # tilde expansion
'Tilde', # tilde expansion
'Pound', # for comment or VarOp state
'TPound', # for doc comments like ###
'TDot', # for multiline commands ...
Expand Down
31 changes: 13 additions & 18 deletions frontend/lexer_def.py
Original file line number Diff line number Diff line change
Expand Up @@ -61,14 +61,6 @@ def R(pat, tok_type):

_SIGNIFICANT_SPACE = R(r'[ \t]+', Id.WS_Space)

# Tilde expansion chars are Lit_Chars, but WITHOUT the /. The NEXT token (if
# any) after this TildeLike token should start with a /.
#
# It would have been REALLY NICE to add an optional /? at the end of THIS
# token, but we can't do that because of ${x//~/replace}. The third / is not
# part of the tilde sub!!!
_TILDE_LIKE = R(r'~[a-zA-Z0-9_.-]*', Id.Lit_TildeLike)

_BACKSLASH = [
# To be conservative, we could deny a set of chars similar to
# _LITERAL_WHITELIST_REGEX, rather than allowing all the operator characters
Expand Down Expand Up @@ -149,17 +141,14 @@ def R(pat, tok_type):
# TODO: Add + here because it's never special? It's different for YSH though.

# The range \x80-\xff makes sure that UTF-8 sequences are a single token.
_LITERAL_WHITELIST_REGEX = r'[\x80-\xffa-zA-Z0-9_/.\-]+'
_LITERAL_WHITELIST_REGEX = r'[\x80-\xffa-zA-Z0-9_.\-]+'

_UNQUOTED = _BACKSLASH + _LEFT_SUBS + _LEFT_UNQUOTED + _LEFT_PROCSUB + _VARS + [
# NOTE: We could add anything 128 and above to this character class? So
# utf-8 characters don't get split?
R(_LITERAL_WHITELIST_REGEX, Id.Lit_Chars),

# TODO: replace _TILDE_LIKE everywhere
_TILDE_LIKE,
#C('/', Id.Lit_Slash),

C('~', Id.Lit_Tilde), # for tilde sub
C('/', Id.Lit_Slash), # also for tilde sub
C(':', Id.Lit_Colon), # for special PATH=a:~foo tilde detection
C('$', Id.Lit_Dollar), # shopt -u parse_dollar
C('#', Id.Lit_Pound), # For comments
Expand Down Expand Up @@ -382,8 +371,11 @@ def R(pat, tok_type):

# NOTE: bash accounts for spaces and non-word punctuation like ; inside ()
# and []. We will avoid that and ask the user to extract a variable?
R(r'[a-zA-Z0-9_/-]+', Id.Lit_Chars), # not including period
_TILDE_LIKE, # bash weirdness: RHS of [[ x =~ ~ ]] is expanded
R(r'[a-zA-Z0-9_-]+', Id.Lit_Chars), # not including period

# Tokens for Tilde sub. bash weirdness: RHS of [[ x =~ ~ ]] is expanded
C('~', Id.Lit_Tilde),
C('/', Id.Lit_Slash),
_SIGNIFICANT_SPACE,

# Normally, \x evaluates to x. But quoted regex metacharacters like \* should
Expand Down Expand Up @@ -416,11 +408,14 @@ def R(pat, tok_type):
_BACKSLASH + _VS_ARG_COMMON + _LEFT_SUBS + _LEFT_UNQUOTED + _LEFT_PROCSUB + \
_VARS + _EXTGLOB_BEGIN + [

_TILDE_LIKE,
# Token for Tilde sub
C('~', Id.Lit_Tilde),

# - doesn't match ~ for tilde sub
# - doesn't match < and > so it doesn't eat <()
# - doesn't match @ ! ? + * so it doesn't eat _EXTGLOB_BEGIN -- ( alone it
# not enough
R(r'[^$`/}"\'\0\\#%<>@!?+*]+', Id.Lit_Chars),
R(r'[^$`~/}"\'\0\\#%<>@!?+*]+', Id.Lit_Chars),
R(r'[^\0]', Id.Lit_Other), # e.g. "$", must be last
]

Expand Down
24 changes: 18 additions & 6 deletions frontend/lexer_def_test.py
Original file line number Diff line number Diff line change
Expand Up @@ -54,7 +54,7 @@ def testRead(self):
self.assertTokensEqual(FakeTok(Id.WS_Space, ' '), t)

t = lexer.Read(lex_mode_e.ShCommand)
self.assertTokensEqual(FakeTok(Id.Lit_Chars, '/'), t)
self.assertTokensEqual(FakeTok(Id.Lit_Slash, '/'), t)

t = lexer.Read(lex_mode_e.ShCommand)
self.assertTokensEqual(FakeTok(Id.Op_Newline, '\n'), t)
Expand All @@ -67,7 +67,13 @@ def testRead(self):
self.assertTokensEqual(FakeTok(Id.WS_Space, ' '), t)

t = lexer.Read(lex_mode_e.ShCommand)
self.assertTokensEqual(FakeTok(Id.Lit_Chars, '/home/'), t)
self.assertTokensEqual(FakeTok(Id.Lit_Slash, '/'), t)

t = lexer.Read(lex_mode_e.ShCommand)
self.assertTokensEqual(FakeTok(Id.Lit_Chars, 'home'), t)

t = lexer.Read(lex_mode_e.ShCommand)
self.assertTokensEqual(FakeTok(Id.Lit_Slash, '/'), t)

t = lexer.Read(lex_mode_e.ShCommand)
self.assertTokensEqual(FakeTok(Id.Op_Newline, '\n'), t)
Expand All @@ -81,12 +87,18 @@ def testRead(self):

def testMode_VSub_ArgUnquoted(self):
# Another EOF gives EOF
lexer = _InitLexer("'hi'")
t = lexer.Read(lex_mode_e.VSub_ArgUnquoted)
#self.assertTokensEqual(FakeTok(Id.Eof_Real, ''), t)
#t = l.Read(lex_mode_e.VSub_ArgUnquoted)
lx = _InitLexer("'hi'")
t = lx.Read(lex_mode_e.VSub_ArgUnquoted)
print(t)

self.assertTokensEqual(FakeTok(Id.Left_SingleQuote, "'"), t)

lx = _InitLexer("~root")
t = lx.Read(lex_mode_e.VSub_ArgUnquoted)
print(t)

self.assertTokensEqual(FakeTok(Id.Lit_Tilde, '~'), t)

def testMode_ExtGlob(self):
lexer = _InitLexer('@(foo|bar)')

Expand Down
7 changes: 5 additions & 2 deletions frontend/location.py
Original file line number Diff line number Diff line change
Expand Up @@ -252,7 +252,7 @@ def LeftTokenForWordPart(part):

elif case(word_part_e.TildeSub):
part = cast(word_part.TildeSub, UP_part)
return part.token
return part.left

elif case(word_part_e.ArithSub):
part = cast(word_part.ArithSub, UP_part)
Expand Down Expand Up @@ -327,7 +327,10 @@ def _RightTokenForWordPart(part):

elif case(word_part_e.TildeSub):
part = cast(word_part.TildeSub, UP_part)
return part.token
if part.name is not None:
return part.name # ~bob/
else:
return part.left # ~/

elif case(word_part_e.ArithSub):
part = cast(word_part.ArithSub, UP_part)
Expand Down
3 changes: 2 additions & 1 deletion frontend/syntax.asdl
Original file line number Diff line number Diff line change
Expand Up @@ -179,7 +179,8 @@ module syntax
# For command sub and process sub: $(...) <(...) >(...)
| CommandSub %CommandSub
# ~ or ~bob
| TildeSub(Token token, str? user_name)
| TildeSub(Token left, # always the tilde
Token? name, str? user_name)
| ArithSub(Token left, arith_expr anode, Token right)
# {a,b,c}
| BracedTuple(List[CompoundWord] words)
Expand Down
Loading

0 comments on commit f59ac79

Please sign in to comment.