Skip to content

Commit

Permalink
[doctools/ul-table] HTML comments no longer interfere with parsing
Browse files Browse the repository at this point in the history
We remove them first.

ul-table could use a DOM implementation too.
  • Loading branch information
Andy C committed Dec 31, 2024
1 parent 55a49ca commit 45e66a8
Show file tree
Hide file tree
Showing 5 changed files with 52 additions and 35 deletions.
2 changes: 1 addition & 1 deletion doctools/cmark.py
Original file line number Diff line number Diff line change
Expand Up @@ -392,7 +392,7 @@ def Render(opts, meta, in_file, out_file, use_fastlex=True, debug_out=None):
opts.code_block_output)
text = oils_doc.ExtractCode(html, f)

html = oils_doc.RemoveComments(html)
html = ul_table.RemoveComments(html)

# Hack for allowing tables without <p> in cells, which CommonMark seems
# to require?
Expand Down
22 changes: 0 additions & 22 deletions doctools/oils_doc.py
Original file line number Diff line number Diff line change
Expand Up @@ -23,28 +23,6 @@
from lazylex import html


def RemoveComments(s):
"""Remove <!-- comments -->"""
f = StringIO()
out = html.Output(s, f)

tag_lexer = html.TagLexer(s)

pos = 0

for tok_id, end_pos in html.ValidTokens(s):
if tok_id == html.Comment:
value = s[pos:end_pos]
# doc/release-index.md has <!-- REPLACE_WITH_DATE --> etc.
if 'REPLACE' not in value:
out.PrintUntil(pos)
out.SkipTo(end_pos)
pos = end_pos

out.PrintTheRest()
return f.getvalue()


class _Abbrev(object):

def __init__(self, fmt):
Expand Down
2 changes: 1 addition & 1 deletion doctools/ul-table-test.ysh
Original file line number Diff line number Diff line change
Expand Up @@ -88,7 +88,7 @@ const TD_ATTRS = '''
- <cell-attrs class=unquoted /> name
- <cell-attrs class=quoted /> age
- role
- tr
- tr <!-- comment --> <!-- comment 2 -->
- <cell-attrs class="cool" /> alice
- 30
- parent
Expand Down
46 changes: 39 additions & 7 deletions doctools/ul_table.py
Original file line number Diff line number Diff line change
Expand Up @@ -11,6 +11,32 @@
from doctools.util import log
from lazylex import html


def RemoveComments(s):
"""Remove <!-- comments -->
This is a required preprocessing step for ul-table.
"""
f = StringIO()
out = html.Output(s, f)

tag_lexer = html.TagLexer(s)

pos = 0

for tok_id, end_pos in html.ValidTokens(s):
if tok_id == html.Comment:
value = s[pos:end_pos]
# doc/release-index.md has <!-- REPLACE_WITH_DATE --> etc.
if 'REPLACE' not in value:
out.PrintUntil(pos)
out.SkipTo(end_pos)
pos = end_pos

out.PrintTheRest()
return f.getvalue()


_WHITESPACE_RE = re.compile(r'\s*')


Expand All @@ -28,19 +54,22 @@ def _CurrentString(self):
part = self.lexer.s[self.start_pos:self.end_pos]
return part

def _Next(self):
def _Next(self, comment_ok=False):
"""
Advance and set self.tok_id, self.start_pos, self.end_pos
"""
self.start_pos = self.end_pos
self.tok_id, self.end_pos = self.lexer.Read()

# Should have called RemoveComments() beforehand. That can still leave
# some REPLACE cmoments
if not comment_ok and self.tok_id == html.Comment:
raise html.ParseError('Unexpected HTML comment')

if 0:
part = self._CurrentString()
log('[%3d - %3d] %r', self.start_pos, self.end_pos, part)

#self.tok_id = html.EndOfStream
# Don't change self.end_pos

def _EatRawData(self, regex):
# type: (str) -> None
"""
Expand Down Expand Up @@ -97,15 +126,15 @@ def FindUlTable(self):

# Find first table
while True:
self._Next()
self._Next(comment_ok=True)
if self.tok_id == html.EndOfStream:
return -1

tag_lexer.Reset(self.start_pos, self.end_pos)
if (self.tok_id == html.StartTag and
tag_lexer.TagName() == 'table'):
while True:
self._Next()
self._Next(comment_ok=True)
if self.tok_id != html.RawData:
break

Expand Down Expand Up @@ -506,4 +535,7 @@ def ReplaceTables(s, debug_out=None):

if __name__ == '__main__':
# Simple CLI filter
sys.stdout.write(ReplaceTables(sys.stdin.read()))
h = sys.stdin.read()
h = RemoveComments(h)
h = ReplaceTables(h)
sys.stdout.write(h)
15 changes: 11 additions & 4 deletions doctools/ul_table_test.py
Original file line number Diff line number Diff line change
Expand Up @@ -31,7 +31,7 @@
- <cell-attrs class=unquoted /> name
- <cell-attrs class=quoted /> age
- role
- tr
- tr <!-- comment --> <!-- comment 2 -->
- <cell-attrs class="cool" /> alice
- 30
- parent
Expand Down Expand Up @@ -152,6 +152,8 @@
# https://developer.mozilla.org/en-US/docs/Web/CSS/grid-column

COLSPAN = """\
<!-- begin REPLACE -->
<table>
- thead
Expand All @@ -167,9 +169,12 @@
- 42
</table>
<!-- end REPLACE -->
"""

COLSPAN_HTML = """\
<!-- begin REPLACE -->
<table>
<thead>
<tr>
Expand All @@ -189,6 +194,7 @@
<td>42</td>
</tr>
</table>
<!-- end REPLACE -->
"""

# UNUSED - not worth it now
Expand Down Expand Up @@ -229,15 +235,16 @@ def MarkdownToTable(md):
print(h)
print('')

h2 = ul_table.ReplaceTables(h)
h = ul_table.RemoveComments(h)
h = ul_table.ReplaceTables(h)

if 1:
print('---')
print('REPLACED')
print(h2)
print(h)
print('')

return h2
return h


class UlTableTest(unittest.TestCase):
Expand Down

0 comments on commit 45e66a8

Please sign in to comment.