From 5ec0ce76737968b32160d4251bbbe8fea8ae7a5c Mon Sep 17 00:00:00 2001 From: Andy C Date: Fri, 10 Jan 2025 02:44:16 -0500 Subject: [PATCH] [lazylex/html] Special lexing rules for or - we search until we see that + self.search_state = None # type: Optional[str] + def _Peek(self): # type: () -> Tuple[int, int] """ @@ -220,6 +220,15 @@ def _Peek(self): assert self.pos < self.right_pos, self.pos + if self.search_state is not None: + pos = self.s.find(self.search_state, self.pos) + if pos == -1: + # unterminated ' - pos = self.s.find(end_tag, self.pos) - if pos == -1: - # unterminated - raise LexError(self.s, self.pos) + if tok_id == Tok.StartTag: + tag_name = m.group(1) # captured + if tag_name == 'script': + self.search_state = '' + elif tag_name == 'style': + self.search_state = '' return tok_id, m.end() else: diff --git a/lazylex/html_test.py b/lazylex/html_test.py index 56801b9fd..2bacade95 100755 --- a/lazylex/html_test.py +++ b/lazylex/html_test.py @@ -179,26 +179,22 @@ def testScriptStyle(self): self.assertEqual(12, pos) self.assertEqual(Tok.RawData, tok_id) - return - # tok_id, pos = next(lex) - self.assertEqual(27, pos) log('tok %r', html.TokenName(tok_id)) - self.assertEqual(Tok.CDataEndTag, tok_id) + self.assertEqual(87, pos) + self.assertEqual(Tok.EndTag, tok_id) def testValid(self): Tok = html.Tok