diff --git a/devtools/refactor.sh b/devtools/refactor.sh
index 306ce4043..72133e960 100755
--- a/devtools/refactor.sh
+++ b/devtools/refactor.sh
@@ -292,4 +292,19 @@ singleton-primitive() {
echo
}
+htm8() {
+ for prefix in Tok html; do
+ for name in \
+ Decl Comment CommentBegin Processing ProcessingBegin \
+ CData CDataBegin \
+ StartTag StartEndTag EndTag \
+ DecChar HexChar CharEntity \
+ RawData HtmlCData \
+ BadAmpersand BadGreaterThan BadLessThan \
+ Invalid EndOfStream; do
+ sed -i "s/$prefix.$name/h8_id.$name/g" */*.py
+ done
+done
+}
+
task-five "$@"
diff --git a/doctools/help_gen.py b/doctools/help_gen.py
index 623945be3..1fa069d6b 100755
--- a/doctools/help_gen.py
+++ b/doctools/help_gen.py
@@ -35,6 +35,7 @@
import re
import sys
+from _devbuild.gen.htm8_asdl import h8_id
from doctools import html_lib
from doctools.util import log
from lazylex import html
@@ -309,7 +310,7 @@ def ExtractBody(s):
except StopIteration:
break
- if tok_id == html.StartTag:
+ if tok_id == h8_id.StartTag:
tag_lexer.Reset(pos, end_pos)
if tag_lexer.TagName() == 'body':
body_start_right = end_pos # right after
@@ -364,7 +365,7 @@ def HelpTopics(s):
except StopIteration:
break
- if tok_id == html.StartTag:
+ if tok_id == h8_id.StartTag:
tag_lexer.Reset(pos, end_pos)
#log('%r', tag_lexer.TagString())
#log('%r', tag_lexer.TagName())
diff --git a/doctools/oils_doc.py b/doctools/oils_doc.py
index e5de856c3..e90bc1f23 100755
--- a/doctools/oils_doc.py
+++ b/doctools/oils_doc.py
@@ -11,6 +11,8 @@
"""
from __future__ import print_function
+from _devbuild.gen.htm8_asdl import h8_id
+
import cgi
from typing import Iterator
from typing import Any
@@ -121,7 +123,7 @@ def ExpandLinks(s):
except StopIteration:
break
- if tok_id == html.StartTag:
+ if tok_id == h8_id.StartTag:
tag_lexer.Reset(pos, end_pos)
if tag_lexer.TagName() == 'a':
@@ -343,7 +345,7 @@ def SimpleHighlightCode(s):
except StopIteration:
break
- if tok_id == html.StartTag:
+ if tok_id == h8_id.StartTag:
tag_lexer.Reset(pos, end_pos)
if tag_lexer.TagName() == 'pre':
@@ -403,7 +405,7 @@ def HighlightCode(s, default_highlighter, debug_out=None):
except StopIteration:
break
- if tok_id == html.StartTag:
+ if tok_id == h8_id.StartTag:
tag_lexer.Reset(pos, end_pos)
if tag_lexer.TagName() == 'pre':
@@ -416,7 +418,7 @@ def HighlightCode(s, default_highlighter, debug_out=None):
break
tag_lexer.Reset(pos, end_pos)
- if tok_id == html.StartTag and tag_lexer.TagName() == 'code':
+ if tok_id == h8_id.StartTag and tag_lexer.TagName() == 'code':
css_class = tag_lexer.GetAttrRaw('class')
code_start_pos = end_pos
@@ -514,7 +516,7 @@ def HighlightCode(s, default_highlighter, debug_out=None):
except StopIteration:
break
tag_lexer.Reset(slash_code_right, end_pos)
- assert tok_id == html.EndTag, tok_id
+ assert tok_id == h8_id.EndTag, tok_id
assert (tag_lexer.TagName() == 'pre'
), tag_lexer.TagName()
slash_pre_right = end_pos
@@ -559,7 +561,7 @@ def ExtractCode(s, f):
except StopIteration:
break
- if tok_id == html.StartTag:
+ if tok_id == h8_id.StartTag:
tag_lexer.Reset(pos, end_pos)
if tag_lexer.TagName() == 'pre':
pre_start_pos = pos
@@ -571,7 +573,7 @@ def ExtractCode(s, f):
break
tag_lexer.Reset(pos, end_pos)
- if tok_id == html.StartTag and tag_lexer.TagName() == 'code':
+ if tok_id == h8_id.StartTag and tag_lexer.TagName() == 'code':
css_class = tag_lexer.GetAttrRaw('class')
# Skip code blocks that look like ```foo
diff --git a/doctools/ul_table.py b/doctools/ul_table.py
index 53a8332e7..6c76f7042 100755
--- a/doctools/ul_table.py
+++ b/doctools/ul_table.py
@@ -1,6 +1,8 @@
#!/usr/bin/env python2
"""ul_table.py: Markdown Tables Without New Syntax."""
+from _devbuild.gen.htm8_asdl import h8_id, h8_id_str
+
try:
from cStringIO import StringIO
except ImportError:
@@ -32,7 +34,7 @@ def RemoveComments(s):
pos = 0
for tok_id, end_pos in html.ValidTokens(s):
- if tok_id == html.Comment:
+ if tok_id == h8_id.Comment:
value = s[pos:end_pos]
# doc/release-index.md has etc.
if 'REPLACE' not in value:
@@ -54,7 +56,7 @@ def __init__(self, lexer, tag_lexer):
self.lexer = lexer
self.tag_lexer = tag_lexer
- self.tok_id = html.Invalid
+ self.tok_id = h8_id.Invalid
self.start_pos = 0
self.end_pos = 0
@@ -73,7 +75,7 @@ def _Next(self, comment_ok=False):
# Should have called RemoveComments() beforehand. That can still leave
# some REPLACE cmoments
- if not comment_ok and self.tok_id == html.Comment:
+ if not comment_ok and self.tok_id == h8_id.Comment:
raise html.ParseError('Unexpected HTML comment')
if 0:
@@ -85,9 +87,9 @@ def _EatRawData(self, regex):
"""
Assert that we got text data matching a regex, and advance
"""
- if self.tok_id != html.RawData:
+ if self.tok_id != h8_id.RawData:
raise html.ParseError('Expected RawData, got %s' %
- html.TokenName(self.tok_id))
+ h8_id_str(self.tok_id))
actual = self._CurrentString()
m = re.match(regex, actual) # could compile this
if m is None:
@@ -101,16 +103,16 @@ def _Eat(self, expected_id, expected_tag):
Assert that we got a start or end tag, with the given name, and advance
Args:
- expected_id: html.StartTag or html.EndTag
+ expected_id: h8_id.StartTag or h8_id.EndTag
expected_tag: 'a', 'span', etc.
"""
- assert expected_id in (html.StartTag,
- html.EndTag), html.TokenName(expected_id)
+ assert expected_id in (h8_id.StartTag,
+ h8_id.EndTag), h8_id_str(expected_id)
if self.tok_id != expected_id:
raise html.ParseError(
'Expected token %s, got %s' %
- (html.TokenName(expected_id), html.TokenName(self.tok_id)))
+ (h8_id_str(expected_id), h8_id_str(self.tok_id)))
self.tag_lexer.Reset(self.start_pos, self.end_pos)
tag_name = self.tag_lexer.TagName()
if expected_tag != tag_name:
@@ -124,7 +126,7 @@ def _WhitespaceOk(self):
"""
Optional whitespace
"""
- if (self.tok_id == html.RawData and
+ if (self.tok_id == h8_id.RawData and
_WHITESPACE_RE.match(self.lexer.s, self.start_pos)):
self._Next()
@@ -140,19 +142,19 @@ def FindUlTable(self):
# Find first table
while True:
self._Next(comment_ok=True)
- if self.tok_id == html.EndOfStream:
+ if self.tok_id == h8_id.EndOfStream:
return -1
tag_lexer.Reset(self.start_pos, self.end_pos)
- if (self.tok_id == html.StartTag and
+ if (self.tok_id == h8_id.StartTag and
tag_lexer.TagName() == 'table'):
while True:
self._Next(comment_ok=True)
- if self.tok_id != html.RawData:
+ if self.tok_id != h8_id.RawData:
break
tag_lexer.Reset(self.start_pos, self.end_pos)
- if (self.tok_id == html.StartTag and
+ if (self.tok_id == h8_id.StartTag and
tag_lexer.TagName() == 'ul'):
return self.start_pos
return -1
@@ -186,14 +188,14 @@ def _ListItem(self):
"""
self._WhitespaceOk()
- if self.tok_id != html.StartTag:
+ if self.tok_id != h8_id.StartTag:
return None, None
inner_html = None
td_attrs = None # Can we also have col-attrs?
td_attrs_span = None
- self._Eat(html.StartTag, 'li')
+ self._Eat(h8_id.StartTag, 'li')
left = self.start_pos
@@ -202,7 +204,7 @@ def _ListItem(self):
# because cells can have bulleted lists
balance = 0
while True:
- if self.tok_id == html.StartEndTag:
+ if self.tok_id == h8_id.StartEndTag:
self.tag_lexer.Reset(self.start_pos, self.end_pos)
tag_name = self.tag_lexer.TagName()
# TODO: remove td-attrs backward compat
@@ -211,12 +213,12 @@ def _ListItem(self):
td_attrs = self.tag_lexer.AllAttrsRaw()
#log('CELL ATTRS %r', self._CurrentString())
- elif self.tok_id == html.StartTag:
+ elif self.tok_id == h8_id.StartTag:
self.tag_lexer.Reset(self.start_pos, self.end_pos)
if self.tag_lexer.TagName() == 'li':
balance += 1
- elif self.tok_id == html.EndTag:
+ elif self.tok_id == h8_id.EndTag:
self.tag_lexer.Reset(self.start_pos, self.end_pos)
if self.tag_lexer.TagName() == 'li':
balance -= 1
@@ -236,7 +238,7 @@ def _ListItem(self):
inner_html = s[left:right]
#log('RAW inner html %r', inner_html)
- #self._Eat(html.EndTag, 'li')
+ #self._Eat(h8_id.EndTag, 'li')
self._Next()
return td_attrs, inner_html
@@ -284,7 +286,7 @@ def _ParseTHead(self):
cells = []
self._WhitespaceOk()
- self._Eat(html.StartTag, 'li')
+ self._Eat(h8_id.StartTag, 'li')
# In CommonMark, r'thead\n' is enough, because it strips trailing
# whitespace. I'm not sure if other Markdown processors do that, so
@@ -292,7 +294,7 @@ def _ParseTHead(self):
self._EatRawData(r'thead\s+')
# This is the row data
- self._Eat(html.StartTag, 'ul')
+ self._Eat(h8_id.StartTag, 'ul')
while True:
td_attrs, inner_html = self._ListItem()
@@ -301,10 +303,10 @@ def _ParseTHead(self):
cells.append((td_attrs, inner_html))
self._WhitespaceOk()
- self._Eat(html.EndTag, 'ul')
+ self._Eat(h8_id.EndTag, 'ul')
self._WhitespaceOk()
- self._Eat(html.EndTag, 'li')
+ self._Eat(h8_id.EndTag, 'li')
#log('_ParseTHead %s ', html.TOKEN_NAMES[self.tok_id])
return cells
@@ -334,15 +336,15 @@ def _ParseTr(self):
self._WhitespaceOk()
# Could be a
- if self.tok_id != html.StartTag:
+ if self.tok_id != h8_id.StartTag:
return None, None
- self._Eat(html.StartTag, 'li')
+ self._Eat(h8_id.StartTag, 'li')
self._EatRawData(r'tr\s*')
tr_attrs = None
- if self.tok_id == html.StartEndTag:
+ if self.tok_id == h8_id.StartEndTag:
self.tag_lexer.Reset(self.start_pos, self.end_pos)
tag_name = self.tag_lexer.TagName()
if tag_name != 'row-attrs':
@@ -352,7 +354,7 @@ def _ParseTr(self):
self._WhitespaceOk()
# This is the row data
- self._Eat(html.StartTag, 'ul')
+ self._Eat(h8_id.StartTag, 'ul')
while True:
td_attrs, inner_html = self._ListItem()
@@ -363,10 +365,10 @@ def _ParseTr(self):
self._WhitespaceOk()
- self._Eat(html.EndTag, 'ul')
+ self._Eat(h8_id.EndTag, 'ul')
self._WhitespaceOk()
- self._Eat(html.EndTag, 'li')
+ self._Eat(h8_id.EndTag, 'li')
#log('_ParseTHead %s ', html.TOKEN_NAMES[self.tok_id])
return tr_attrs, cells
@@ -394,7 +396,7 @@ def ParseTable(self):
table = {'tr': []}
ul_start = self.start_pos
- self._Eat(html.StartTag, 'ul')
+ self._Eat(h8_id.StartTag, 'ul')
# Look ahead 2 or 3 tokens:
if self.lexer.LookAhead(r'\s*thead\s+'):
@@ -416,7 +418,7 @@ def ParseTable(self):
#log('___ TR %s', tr)
table['tr'].append((tr_attrs, tr))
- self._Eat(html.EndTag, 'ul')
+ self._Eat(h8_id.EndTag, 'ul')
self._WhitespaceOk()
diff --git a/lazylex/html.py b/lazylex/html.py
index 5704a2205..cb93c8201 100755
--- a/lazylex/html.py
+++ b/lazylex/html.py
@@ -14,6 +14,8 @@
"""
from __future__ import print_function
+
+from _devbuild.gen.htm8_asdl import h8_id, h8_id_str
from typing import Iterator
from typing import Union
from typing import Any
@@ -41,7 +43,7 @@ class LexError(Exception):
"""
Examples of lex errors:
- - Tok.Invalid, like <> or &&
+ - h8_id.Invalid, like <> or &&
- Unclosed ', Tok.Comment),
+#(r'', h8_id.Comment),
# Hack from Claude: \s\S instead of re.DOTALL. I don't like this
-#(r'', Tok.Comment),
-#(r'', Tok.Comment),
+#(r'', h8_id.Comment),
+#(r'', h8_id.Comment),
HTM8_LEX_COMPILED = MakeLexer(HTM8_LEX)
@@ -306,7 +303,7 @@ def _Peek(self):
Note: not using _Peek() now
"""
if self.pos == self.right_pos:
- return Tok.EndOfStream, self.pos
+ return h8_id.EndOfStream, self.pos
assert self.pos < self.right_pos, self.pos
@@ -322,7 +319,7 @@ def _Peek(self):
raise LexError(self.s, self.pos)
self.search_state = None
# beginning
- return Tok.HtmlCData, pos
+ return h8_id.HtmlCData, pos
# Find the first match.
# Note: frontend/match.py uses _LongestMatch(), which is different!
@@ -331,7 +328,7 @@ def _Peek(self):
for pat, tok_id in HTM8_LEX_COMPILED:
m = pat.match(self.s, self.pos)
if m:
- if tok_id in (Tok.StartTag, Tok.EndTag, Tok.StartEndTag):
+ if tok_id in (h8_id.StartTag, h8_id.EndTag, h8_id.StartEndTag):
self.tag_pos_left = m.start(1)
self.tag_pos_right = m.end(1)
else:
@@ -339,28 +336,28 @@ def _Peek(self):
self.tag_pos_left = -1
self.tag_pos_right = -1
- if tok_id == Tok.CommentBegin:
+ if tok_id == h8_id.CommentBegin:
pos = self.s.find('-->', self.pos)
if pos == -1:
# unterminated
+ return h8_id.Comment, pos + 3 # -->
- if tok_id == Tok.ProcessingBegin:
+ if tok_id == h8_id.ProcessingBegin:
pos = self.s.find('?>', self.pos)
if pos == -1:
# unterminated
raise LexError(self.s, self.pos)
- return Tok.Processing, pos + 2 # ?>
+ return h8_id.Processing, pos + 2 # ?>
- if tok_id == Tok.CDataBegin:
+ if tok_id == h8_id.CDataBegin:
pos = self.s.find(']]>', self.pos)
if pos == -1:
# unterminated
+ return h8_id.CData, pos + 3 # ]]>
- if tok_id == Tok.StartTag:
+ if tok_id == h8_id.StartTag:
# TODO: reduce allocations
if (self.TagNameEquals('script') or
self.TagNameEquals('style')):
@@ -369,7 +366,7 @@ def _Peek(self):
return tok_id, m.end()
else:
- raise AssertionError('Tok.Invalid rule should have matched')
+ raise AssertionError('h8_id.Invalid rule should have matched')
def TagNameEquals(self, expected):
# type: (str) -> bool
@@ -427,7 +424,7 @@ def _Tokens(s, left_pos, right_pos):
while True:
tok_id, pos = lx.Read()
yield tok_id, pos
- if tok_id == Tok.EndOfStream:
+ if tok_id == h8_id.EndOfStream:
break
@@ -441,7 +438,7 @@ def ValidTokens(s, left_pos=0, right_pos=-1):
"""
pos = left_pos
for tok_id, end_pos in _Tokens(s, left_pos, right_pos):
- if tok_id == Tok.Invalid:
+ if tok_id == h8_id.Invalid:
raise LexError(s, pos)
yield tok_id, end_pos
pos = end_pos
@@ -457,9 +454,9 @@ def ValidTokenList(s, no_special_tags=False):
while True:
tok_id, end_pos = lx.Read()
tokens.append((tok_id, end_pos))
- if tok_id == Tok.EndOfStream:
+ if tok_id == h8_id.EndOfStream:
break
- if tok_id == Tok.Invalid:
+ if tok_id == h8_id.Invalid:
raise LexError(s, start_pos)
start_pos = end_pos
return tokens
@@ -572,7 +569,7 @@ def GetSpanForAttrValue(self, attr_name):
# The value should come next
tok_id, start, end = next(events)
assert tok_id in (QuotedValue, UnquotedValue,
- MissingValue), TokenName(tok_id)
+ MissingValue), h8_id_str(tok_id)
val = start, end
break
@@ -606,7 +603,7 @@ def AllAttrsRawSlice(self):
# The value should come next
tok_id, start, end = next(events)
assert tok_id in (QuotedValue, UnquotedValue,
- MissingValue), TokenName(tok_id)
+ MissingValue), h8_id_str(tok_id)
# Note: quoted values may have &
# We would need ANOTHER lexer to unescape them, but we
# don't need that for ul-table
@@ -691,8 +688,8 @@ def Tokens(self):
# Note: for unquoted values, & isn't allowed, and thus & and c and
# are not allowed. We could relax that?
ATTR_VALUE_LEXER = CHAR_LEX + [
- (r'[^>&\x00]+', Tok.RawData),
- (r'.', Tok.Invalid),
+ (r'[^>&\x00]+', h8_id.RawData),
+ (r'.', h8_id.Invalid),
]
ATTR_VALUE_LEXER = MakeLexer(ATTR_VALUE_LEXER)
@@ -725,7 +722,7 @@ def NumTokens(self):
num_tokens = 0
pos = self.start_pos
for tok_id, end_pos in self.Tokens():
- if tok_id == Tok.Invalid:
+ if tok_id == h8_id.Invalid:
raise LexError(self.s, pos)
pos = end_pos
#log('pos %d', pos)
@@ -751,7 +748,7 @@ def Tokens(self):
pos = end_pos
break
else:
- raise AssertionError('Tok.Invalid rule should have matched')
+ raise AssertionError('h8_id.Invalid rule should have matched')
def ReadUntilStartTag(it, tag_lexer, tag_name):
@@ -768,7 +765,7 @@ def ReadUntilStartTag(it, tag_lexer, tag_name):
except StopIteration:
break
tag_lexer.Reset(pos, end_pos)
- if tok_id == Tok.StartTag and tag_lexer.TagName() == tag_name:
+ if tok_id == h8_id.StartTag and tag_lexer.TagName() == tag_name:
return pos, end_pos
pos = end_pos
@@ -791,7 +788,7 @@ def ReadUntilEndTag(it, tag_lexer, tag_name):
except StopIteration:
break
tag_lexer.Reset(pos, end_pos)
- if tok_id == Tok.EndTag and tag_lexer.TagName() == tag_name:
+ if tok_id == h8_id.EndTag and tag_lexer.TagName() == tag_name:
return pos, end_pos
pos = end_pos
@@ -828,12 +825,12 @@ def ToText(s, left_pos=0, right_pos=-1):
pos = left_pos
for tok_id, end_pos in ValidTokens(s, left_pos, right_pos):
- if tok_id in (Tok.RawData, Tok.BadAmpersand, Tok.BadGreaterThan,
- Tok.BadLessThan):
+ if tok_id in (h8_id.RawData, h8_id.BadAmpersand, h8_id.BadGreaterThan,
+ h8_id.BadLessThan):
out.SkipTo(pos)
out.PrintUntil(end_pos)
- elif tok_id == Tok.CharEntity: # &
+ elif tok_id == h8_id.CharEntity: # &
entity = s[pos + 1:end_pos - 1]
@@ -842,10 +839,10 @@ def ToText(s, left_pos=0, right_pos=-1):
out.SkipTo(end_pos)
# Not handling these yet
- elif tok_id == Tok.HexChar:
+ elif tok_id == h8_id.HexChar:
raise AssertionError('Hex Char %r' % s[pos:pos + 20])
- elif tok_id == Tok.DecChar:
+ elif tok_id == h8_id.DecChar:
raise AssertionError('Dec Char %r' % s[pos:pos + 20])
else:
@@ -895,16 +892,16 @@ def Validate(contents, flags, counters):
tag_stack = []
while True:
tok_id, end_pos = lx.Read()
- #log('TOP %s %r', TokenName(tok_id), contents[start_pos:end_pos])
+ #log('TOP %s %r', h8_id_str(tok_id), contents[start_pos:end_pos])
- if tok_id == Tok.Invalid:
+ if tok_id == h8_id.Invalid:
raise LexError(contents, start_pos)
- if tok_id == Tok.EndOfStream:
+ if tok_id == h8_id.EndOfStream:
break
tokens.append((tok_id, end_pos))
- if tok_id == Tok.StartEndTag:
+ if tok_id == h8_id.StartEndTag:
counters.num_start_end_tags += 1
tag_lexer.Reset(start_pos, end_pos)
@@ -916,7 +913,7 @@ def Validate(contents, flags, counters):
counters.debug_attrs.extend(all_attrs)
- elif tok_id == Tok.StartTag:
+ elif tok_id == h8_id.StartTag:
counters.num_start_tags += 1
tag_lexer.Reset(start_pos, end_pos)
@@ -939,7 +936,7 @@ def Validate(contents, flags, counters):
counters.max_tag_stack = max(counters.max_tag_stack,
len(tag_stack))
- elif tok_id == Tok.EndTag:
+ elif tok_id == h8_id.EndTag:
if flags & BALANCED_TAGS:
try:
expected = tag_stack.pop()
@@ -991,14 +988,15 @@ def ToXml(htm8_str):
while True:
tok_id, end_pos = lx.Read()
- if tok_id == Tok.Invalid:
+ if tok_id == h8_id.Invalid:
raise LexError(htm8_str, pos)
- if tok_id == Tok.EndOfStream:
+ if tok_id == h8_id.EndOfStream:
break
- if tok_id in (Tok.RawData, Tok.CharEntity, Tok.HexChar, Tok.DecChar):
+ if tok_id in (h8_id.RawData, h8_id.CharEntity, h8_id.HexChar,
+ h8_id.DecChar):
out.PrintUntil(end_pos)
- elif tok_id in (Tok.StartTag, Tok.StartEndTag):
+ elif tok_id in (h8_id.StartTag, h8_id.StartEndTag):
tag_lexer.Reset(pos, end_pos)
# TODO: reduce allocations here
all_attrs = tag_lexer.AllAttrsRawSlice()
@@ -1014,16 +1012,16 @@ def ToXml(htm8_str):
# Missing : add ="", so missing becomes missing=""
tag_name = lx.CanonicalTagName()
- if tok_id == Tok.StartTag and tag_name in VOID_ELEMENTS:
+ if tok_id == h8_id.StartTag and tag_name in VOID_ELEMENTS:
# TODO: instead of closing >, print />
pass
- elif tok_id == Tok.BadAmpersand:
+ elif tok_id == h8_id.BadAmpersand:
#out.SkipTo(pos)
out.Print('&')
out.SkipTo(end_pos)
- elif tok_id == Tok.BadGreaterThan:
+ elif tok_id == h8_id.BadGreaterThan:
#out.SkipTo(pos)
out.Print('>')
out.SkipTo(end_pos)
@@ -1060,13 +1058,13 @@ def main(argv):
start_pos = 0
while True:
tok_id, end_pos = lx.Read()
- if tok_id == Tok.Invalid:
+ if tok_id == h8_id.Invalid:
raise LexError(contents, start_pos)
- if tok_id == Tok.EndOfStream:
+ if tok_id == h8_id.EndOfStream:
break
frag = contents[start_pos:end_pos]
- log('%d %s %r', end_pos, TokenName(tok_id), frag)
+ log('%d %s %r', end_pos, h8_id_str(tok_id), frag)
start_pos = end_pos
return 0
diff --git a/lazylex/html_test.py b/lazylex/html_test.py
index ebe0677b1..f91cad7bf 100755
--- a/lazylex/html_test.py
+++ b/lazylex/html_test.py
@@ -3,9 +3,10 @@
import unittest
+from _devbuild.gen.htm8_asdl import h8_id, h8_id_str
from lazylex import html # module under test log = html.log
-from typing import List
-from typing import Tuple
+
+from typing import List, Tuple
log = html.log
@@ -184,7 +185,7 @@ def Lex(h, no_special_tags=False):
start_pos = 0
for tok_id, end_pos in tokens:
frag = h[start_pos:end_pos]
- log('%d %s %r', end_pos, html.TokenName(tok_id), frag)
+ log('%d %s %r', end_pos, h8_id_str(tok_id), frag)
start_pos = end_pos
return tokens
@@ -219,10 +220,10 @@ def testCommentParse2(self):
self.assertEqual(
[
- (Tok.RawData, 12),
- (Tok.Comment, 50), # err ?>
- (Tok.StartEndTag, 55),
- (Tok.EndOfStream, 55),
+ (h8_id.RawData, 12),
+ (h8_id.Comment, 50), # err ?>
+ (h8_id.StartEndTag, 55),
+ (h8_id.EndOfStream, 55),
],
tokens)
@@ -235,9 +236,9 @@ def testProcessingInstruction(self):
self.assertEqual(
[
- (Tok.RawData, 3),
- (Tok.Processing, 12), # err ?>
- (Tok.EndOfStream, 12),
+ (h8_id.RawData, 3),
+ (h8_id.Processing, 12), # err ?>
+ (h8_id.EndOfStream, 12),
],
tokens)
@@ -251,12 +252,12 @@ def testScriptStyle(self):
tokens = Lex(h)
expected = [
- (Tok.RawData, 12),
- (Tok.StartTag, 27), #
- (Tok.RawData, 96), # \n
- (Tok.EndOfStream, 96), # \n
+ (h8_id.RawData, 12),
+ (h8_id.StartTag, 27), #
+ (h8_id.RawData, 96), # \n
+ (h8_id.EndOfStream, 96), # \n
]
self.assertEqual(expected, tokens)
@@ -273,13 +274,13 @@ def testScriptStyleXml(self):
self.assertEqual(
[
- (Tok.RawData, 3),
- (Tok.StartTag, 18), #
- (Tok.RawData, 24), # \n
- (Tok.EndTag, 33), # \n
- (Tok.EndOfStream, 33), # \n
+ (h8_id.RawData, 3),
+ (h8_id.StartTag, 18), #
+ (h8_id.RawData, 24), # \n
+ (h8_id.EndTag, 33), # \n
+ (h8_id.EndOfStream, 33), # \n
],
tokens)
@@ -293,10 +294,10 @@ def testCData(self):
tokens = Lex(h)
self.assertEqual([
- (Tok.StartTag, 9),
- (Tok.CData, 61),
- (Tok.EndTag, 71),
- (Tok.EndOfStream, 71),
+ (h8_id.StartTag, 9),
+ (h8_id.CData, 61),
+ (h8_id.EndTag, 71),
+ (h8_id.EndOfStream, 71),
], tokens)
def testEntity(self):
@@ -310,11 +311,11 @@ def testEntity(self):
tokens = Lex(h)
self.assertEqual([
- (Tok.CharEntity, 6),
- (Tok.RawData, 8),
- (Tok.CharEntity, 14),
- (Tok.RawData, 15),
- (Tok.EndOfStream, 15),
+ (h8_id.CharEntity, 6),
+ (h8_id.RawData, 8),
+ (h8_id.CharEntity, 14),
+ (h8_id.RawData, 15),
+ (h8_id.EndOfStream, 15),
], tokens)
def testStartTag(self):
@@ -325,10 +326,10 @@ def testStartTag(self):
tokens = Lex(h)
self.assertEqual([
- (Tok.StartTag, 3),
- (Tok.RawData, 5),
- (Tok.EndTag, 9),
- (Tok.EndOfStream, 9),
+ (h8_id.StartTag, 3),
+ (h8_id.RawData, 5),
+ (h8_id.EndTag, 9),
+ (h8_id.EndOfStream, 9),
], tokens)
# Make sure we don't consume too much
@@ -337,12 +338,12 @@ def testStartTag(self):
tokens = Lex(h)
self.assertEqual([
- (Tok.StartTag, 3),
- (Tok.StartTag, 11),
- (Tok.RawData, 14),
- (Tok.EndTag, 23),
- (Tok.EndTag, 27),
- (Tok.EndOfStream, 27),
+ (h8_id.StartTag, 3),
+ (h8_id.StartTag, 11),
+ (h8_id.RawData, 14),
+ (h8_id.EndTag, 23),
+ (h8_id.EndTag, 27),
+ (h8_id.EndOfStream, 27),
], tokens)
return
@@ -355,10 +356,10 @@ def testStartTag(self):
tokens = Lex(h)
self.assertEqual([
- (Tok.RawData, 9),
- (Tok.StartTag, 24),
- (Tok.RawData, 9),
- (Tok.EndOfStream, 9),
+ (h8_id.RawData, 9),
+ (h8_id.StartTag, 24),
+ (h8_id.RawData, 9),
+ (h8_id.EndOfStream, 9),
], tokens)
def testBad(self):
@@ -369,16 +370,16 @@ def testBad(self):
tokens = Lex(h)
self.assertEqual([
- (Tok.BadAmpersand, 1),
- (Tok.EndOfStream, 1),
+ (h8_id.BadAmpersand, 1),
+ (h8_id.EndOfStream, 1),
], tokens)
h = '>'
tokens = Lex(h)
self.assertEqual([
- (Tok.BadGreaterThan, 1),
- (Tok.EndOfStream, 1),
+ (h8_id.BadGreaterThan, 1),
+ (h8_id.EndOfStream, 1),
], tokens)
def testInvalid(self):