Skip to content

Commit

Permalink
Added recursive rule (#20)
Browse files Browse the repository at this point in the history
We added support for recursive rules.

For example:
```python
import re
from lexer import Lexer, Rule

text = 'a b(ac) c'
lexer = Lexer(rules=[Rule('a', re.compile('a\s*')),
                     Rule('b', re.compile('b\(\w+\)\s*'), next=[
                         Rule('clause', re.compile('b\(')),
                         Rule('obj', re.compile('\w+')),
                         Rule(')', re.compile('\)')),
                         ]),
                     Rule('c', re.compile('c')),
                     ])
t = lexer.lex(text)
```

This code will give you:
```
[
    Token('a', 'a', 0, 0),
    Token(
        'b',
        [
            Token('clause', 'b(', 0, 0),
            Token('obj', 'ac', 2, 0),
            Token(')', ')', 4, 0)
        ],
        5, 0
    ),
    Token('c', 'c', 7, 0)
]
```
  • Loading branch information
mpskex authored May 23, 2023
1 parent 1f470c1 commit 5602772
Show file tree
Hide file tree
Showing 2 changed files with 30 additions and 6 deletions.
34 changes: 29 additions & 5 deletions lexery/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -43,7 +43,7 @@ def __eq__(self, other: object) -> bool:
class Rule:
"""Define a lexing rule."""

def __init__(self, identifier: str, pattern: Pattern) -> None:
def __init__(self, identifier: str, pattern: Pattern, next_rule=None) -> None:
"""
Initialize.
Expand All @@ -52,6 +52,31 @@ def __init__(self, identifier: str, pattern: Pattern) -> None:
"""
self.identifier = identifier
self.pattern = pattern
self.next_rule = next_rule

def match(self, text, pos, lno):
"""
Match the rules with given text and position.
:param text: the given texte to match
:param pos: position of the text
:param lno: line number of the text
"""
mtch = self.pattern.match(text, pos)
ret = []
if self.next_rule is not None and mtch is not None:
pos = 0
for rule in self.next_rule:
another_mtch, another_t = rule.match(mtch.group(), pos, 0)
if another_mtch:
ret.append(another_t)
pos += len(another_mtch.group())
else:
if mtch:
ret = mtch.group()
else:
ret = ''
return mtch, Token(self.identifier, content=ret, position=pos, lineno=lno)


NONTAB_RE = re.compile(r'[^\t]')
Expand Down Expand Up @@ -190,11 +215,10 @@ def lex(self, text: str) -> List[List[Token]]:
while position < len(line):
mtched = False
for rule in self.rules:
mtch = rule.pattern.match(line, position)
another_line = line
mtch, another_token = rule.match(another_line, position, lineno)
if mtch:
token = Token(
identifier=rule.identifier, content=mtch.group(), position=position, lineno=lineno)

token = another_token
lexing.emit_matched_token(token=token)

position = mtch.end()
Expand Down
2 changes: 1 addition & 1 deletion precommit.py
Original file line number Diff line number Diff line change
Expand Up @@ -57,4 +57,4 @@ def main() -> int:


if __name__ == "__main__":
sys.exit(main())
sys.exit(main())

0 comments on commit 5602772

Please sign in to comment.