[frontend/refactor] Remove Id.Lit_TildeLike

Replace with Id.Lit_Tilde and Id.Lit_Slash Preparing to fix #1854 - tilde and brace expansion interaction. benchmarks/tokens.sh shows that there isn't too big a blowup.
oils-for-unix · Mar 5, 2024 · f59ac79 · f59ac79
1 parent 2f16ae9
commit f59ac79
Show file tree

Hide file tree

Showing 11 changed files with 206 additions and 111 deletions.
diff --git a/core/completion.py b/core/completion.py
@@ -47,6 +47,7 @@
 from core import ui
 from core import util
 from frontend import consts
+from frontend import lexer
 from frontend import location
 from frontend import reader
 from mycpp import mylib
@@ -1039,7 +1040,8 @@ def Matches(self, comp):
             if t2.id == Id.Left_DollarBrace and IsDummy(t1):
                 self.comp_ui_state.display_pos = t2.col + 2  # 2 for ${
                 for name in self.mem.VarNames():
-                    yield line_until_tab + name  # no need to quote var names
+                    # no need to quote var names
+                    yield line_until_tab + name
                 return
 
             # echo $P
@@ -1052,8 +1054,8 @@ def Matches(self, comp):
                 n = len(to_complete)
                 for name in self.mem.VarNames():
                     if name.startswith(to_complete):
-                        yield line_until_tab + name[
-                            n:]  # no need to quote var names
+                        # no need to quote var names
+                        yield line_until_tab + name[n:]
                 return
 
             # echo ${P
@@ -1063,8 +1065,8 @@ def Matches(self, comp):
                 n = len(to_complete)
                 for name in self.mem.VarNames():
                     if name.startswith(to_complete):
-                        yield line_until_tab + name[
-                            n:]  # no need to quote var names
+                        # no need to quote var names
+                        yield line_until_tab + name[n:]
                 return
 
             # echo $(( VAR
@@ -1074,8 +1076,8 @@ def Matches(self, comp):
                 n = len(to_complete)
                 for name in self.mem.VarNames():
                     if name.startswith(to_complete):
-                        yield line_until_tab + name[
-                            n:]  # no need to quote var names
+                        # no need to quote var names
+                        yield line_until_tab + name[n:]
                 return
 
         if len(trail.words) > 0:
@@ -1086,23 +1088,39 @@ def Matches(self, comp):
             # tokens, because otherwise f~a will complete.  Looking at word_part is
             # EXACTLY what we want.
             parts = trail.words[-1].parts
-            if (len(parts) == 2 and parts[0].tag() == word_part_e.Literal and
-                    parts[1].tag() == word_part_e.Literal and
-                    cast(Token, parts[0]).id == Id.Lit_TildeLike and
-                    cast(Token, parts[1]).id == Id.Lit_CompDummy):
-                t2 = cast(Token, parts[0])
+            if len(parts) > 0 and word_.LiteralId(parts[0]) == Id.Lit_Tilde:
+                #log('TILDE parts %s', parts)
 
-                # +1 for ~
-                self.comp_ui_state.display_pos = t2.col + 1
+                if (len(parts) == 2 and
+                        word_.LiteralId(parts[1]) == Id.Lit_CompDummy):
+                    tilde_tok = cast(Token, parts[0])
 
-                to_complete = t2.tval[1:]
-                n = len(to_complete)
-                for u in pyos.GetAllUsers():  # catch errors?
-                    name = u.pw_name
-                    if name.startswith(to_complete):
-                        s = line_until_tab + ShellQuoteB(name[n:]) + '/'
+                    # end of tilde
+                    self.comp_ui_state.display_pos = tilde_tok.col + 1
+
+                    to_complete = ''
+                    for u in pyos.GetAllUsers():
+                        name = u.pw_name
+                        s = line_until_tab + ShellQuoteB(name) + '/'
                         yield s
-                return
+                    return
+
+                if (len(parts) == 3 and
+                        word_.LiteralId(parts[1]) == Id.Lit_Chars and
+                        word_.LiteralId(parts[2]) == Id.Lit_CompDummy):
+
+                    chars_tok = cast(Token, parts[1])
+
+                    self.comp_ui_state.display_pos = chars_tok.col
+
+                    to_complete = lexer.TokenVal(chars_tok)
+                    n = len(to_complete)
+                    for u in pyos.GetAllUsers():  # catch errors?
+                        name = u.pw_name
+                        if name.startswith(to_complete):
+                            s = line_until_tab + ShellQuoteB(name[n:]) + '/'
+                            yield s
+                    return
 
         # echo hi > f<TAB>   (complete redirect arg)
         if len(trail.redirects) > 0:

diff --git a/core/completion_test.py b/core/completion_test.py
@@ -269,6 +269,7 @@ def testCompletesHomeDirs(self):
         comp = MockApi(line='echo ~r')
         print(comp)
         m = list(r.Matches(comp))
+
         # This test isn't hermetic, but I think root should be on all systems.
         self.assert_('echo ~root/' in m, 'Got %s' % m)
 

diff --git a/frontend/id_kind_def.py b/frontend/id_kind_def.py
@@ -269,7 +269,7 @@ def AddKinds(spec):
             'Equals',  # For = f()
             'Dollar',  # detecting 'echo $'
             'DRightBracket',  # the ]] that matches [[, NOT a keyword
-            'TildeLike',  # tilde expansion
+            'Tilde',  # tilde expansion
             'Pound',  # for comment or VarOp state
             'TPound',  # for doc comments like ###
             'TDot',  # for multiline commands ...

diff --git a/frontend/lexer_def.py b/frontend/lexer_def.py
@@ -61,14 +61,6 @@ def R(pat, tok_type):
 
 _SIGNIFICANT_SPACE = R(r'[ \t]+', Id.WS_Space)
 
-# Tilde expansion chars are Lit_Chars, but WITHOUT the /.  The NEXT token (if
-# any) after this TildeLike token should start with a /.
-#
-# It would have been REALLY NICE to add an optional /? at the end of THIS
-# token, but we can't do that because of ${x//~/replace}.  The third / is not
-# part of the tilde sub!!!
-_TILDE_LIKE = R(r'~[a-zA-Z0-9_.-]*', Id.Lit_TildeLike)
-
 _BACKSLASH = [
     # To be conservative, we could deny a set of chars similar to
     # _LITERAL_WHITELIST_REGEX, rather than allowing all the operator characters
@@ -149,17 +141,14 @@ def R(pat, tok_type):
 # TODO: Add + here because it's never special?  It's different for YSH though.
 
 # The range \x80-\xff makes sure that UTF-8 sequences are a single token.
-_LITERAL_WHITELIST_REGEX = r'[\x80-\xffa-zA-Z0-9_/.\-]+'
+_LITERAL_WHITELIST_REGEX = r'[\x80-\xffa-zA-Z0-9_.\-]+'
 
 _UNQUOTED = _BACKSLASH + _LEFT_SUBS + _LEFT_UNQUOTED + _LEFT_PROCSUB + _VARS + [
     # NOTE: We could add anything 128 and above to this character class?  So
     # utf-8 characters don't get split?
     R(_LITERAL_WHITELIST_REGEX, Id.Lit_Chars),
-
-    # TODO: replace _TILDE_LIKE everywhere
-    _TILDE_LIKE,
-    #C('/', Id.Lit_Slash),
-
+    C('~', Id.Lit_Tilde),  # for tilde sub
+    C('/', Id.Lit_Slash),  # also for tilde sub
     C(':', Id.Lit_Colon),  # for special PATH=a:~foo tilde detection
     C('$', Id.Lit_Dollar),  # shopt -u parse_dollar
     C('#', Id.Lit_Pound),  # For comments
@@ -382,8 +371,11 @@ def R(pat, tok_type):
 
     # NOTE: bash accounts for spaces and non-word punctuation like ; inside ()
     # and [].  We will avoid that and ask the user to extract a variable?
-    R(r'[a-zA-Z0-9_/-]+', Id.Lit_Chars),  # not including period
-    _TILDE_LIKE,  # bash weirdness: RHS of [[ x =~ ~ ]] is expanded
+    R(r'[a-zA-Z0-9_-]+', Id.Lit_Chars),  # not including period
+
+    # Tokens for Tilde sub.  bash weirdness: RHS of [[ x =~ ~ ]] is expanded
+    C('~', Id.Lit_Tilde),
+    C('/', Id.Lit_Slash),
     _SIGNIFICANT_SPACE,
 
     # Normally, \x evaluates to x.  But quoted regex metacharacters like \* should
@@ -416,11 +408,14 @@ def R(pat, tok_type):
   _BACKSLASH + _VS_ARG_COMMON + _LEFT_SUBS + _LEFT_UNQUOTED + _LEFT_PROCSUB + \
   _VARS + _EXTGLOB_BEGIN + [
 
-    _TILDE_LIKE,
+    # Token for Tilde sub
+    C('~', Id.Lit_Tilde),
+
+    # - doesn't match ~ for tilde sub
     # - doesn't match < and > so it doesn't eat <()
     # - doesn't match  @ ! ? + * so it doesn't eat _EXTGLOB_BEGIN -- ( alone it
     #   not enough
-    R(r'[^$`/}"\'\0\\#%<>@!?+*]+', Id.Lit_Chars),
+    R(r'[^$`~/}"\'\0\\#%<>@!?+*]+', Id.Lit_Chars),
     R(r'[^\0]', Id.Lit_Other),  # e.g. "$", must be last
 ]
 

diff --git a/frontend/lexer_def_test.py b/frontend/lexer_def_test.py
@@ -54,7 +54,7 @@ def testRead(self):
         self.assertTokensEqual(FakeTok(Id.WS_Space, ' '), t)
 
         t = lexer.Read(lex_mode_e.ShCommand)
-        self.assertTokensEqual(FakeTok(Id.Lit_Chars, '/'), t)
+        self.assertTokensEqual(FakeTok(Id.Lit_Slash, '/'), t)
 
         t = lexer.Read(lex_mode_e.ShCommand)
         self.assertTokensEqual(FakeTok(Id.Op_Newline, '\n'), t)
@@ -67,7 +67,13 @@ def testRead(self):
         self.assertTokensEqual(FakeTok(Id.WS_Space, ' '), t)
 
         t = lexer.Read(lex_mode_e.ShCommand)
-        self.assertTokensEqual(FakeTok(Id.Lit_Chars, '/home/'), t)
+        self.assertTokensEqual(FakeTok(Id.Lit_Slash, '/'), t)
+
+        t = lexer.Read(lex_mode_e.ShCommand)
+        self.assertTokensEqual(FakeTok(Id.Lit_Chars, 'home'), t)
+
+        t = lexer.Read(lex_mode_e.ShCommand)
+        self.assertTokensEqual(FakeTok(Id.Lit_Slash, '/'), t)
 
         t = lexer.Read(lex_mode_e.ShCommand)
         self.assertTokensEqual(FakeTok(Id.Op_Newline, '\n'), t)
@@ -81,12 +87,18 @@ def testRead(self):
 
     def testMode_VSub_ArgUnquoted(self):
         # Another EOF gives EOF
-        lexer = _InitLexer("'hi'")
-        t = lexer.Read(lex_mode_e.VSub_ArgUnquoted)
-        #self.assertTokensEqual(FakeTok(Id.Eof_Real, ''), t)
-        #t = l.Read(lex_mode_e.VSub_ArgUnquoted)
+        lx = _InitLexer("'hi'")
+        t = lx.Read(lex_mode_e.VSub_ArgUnquoted)
         print(t)
 
+        self.assertTokensEqual(FakeTok(Id.Left_SingleQuote, "'"), t)
+
+        lx = _InitLexer("~root")
+        t = lx.Read(lex_mode_e.VSub_ArgUnquoted)
+        print(t)
+
+        self.assertTokensEqual(FakeTok(Id.Lit_Tilde, '~'), t)
+
     def testMode_ExtGlob(self):
         lexer = _InitLexer('@(foo|bar)')
 

diff --git a/frontend/location.py b/frontend/location.py
@@ -252,7 +252,7 @@ def LeftTokenForWordPart(part):
 
         elif case(word_part_e.TildeSub):
             part = cast(word_part.TildeSub, UP_part)
-            return part.token
+            return part.left
 
         elif case(word_part_e.ArithSub):
             part = cast(word_part.ArithSub, UP_part)
@@ -327,7 +327,10 @@ def _RightTokenForWordPart(part):
 
         elif case(word_part_e.TildeSub):
             part = cast(word_part.TildeSub, UP_part)
-            return part.token
+            if part.name is not None:
+                return part.name  # ~bob/
+            else:
+                return part.left  # ~/
 
         elif case(word_part_e.ArithSub):
             part = cast(word_part.ArithSub, UP_part)

diff --git a/frontend/syntax.asdl b/frontend/syntax.asdl
@@ -179,7 +179,8 @@ module syntax
     # For command sub and process sub: $(...)  <(...)  >(...)
   | CommandSub %CommandSub
     # ~ or ~bob
-  | TildeSub(Token token, str? user_name)
+  | TildeSub(Token left, # always the tilde
+             Token? name, str? user_name)
   | ArithSub(Token left, arith_expr anode, Token right)
     # {a,b,c}
   | BracedTuple(List[CompoundWord] words)