From 48e6d06bea5101ea14741266ef38b511978b6509 Mon Sep 17 00:00:00 2001 From: Andy C Date: Tue, 14 Jan 2025 02:22:53 -0500 Subject: [PATCH] [doctools, lazylex] Add type annotations, using pyannotate Fixed some of them manually, e.g. with IO[str] --- devtools/types.sh | 10 ++++++--- doctools/cmark.py | 43 ++++++++++++++++++++++++++++++++---- doctools/cmark_test.py | 5 +++++ doctools/help_gen.py | 11 ++++++++++ doctools/help_gen_test.py | 3 +++ doctools/html_head.py | 3 ++- doctools/html_lib.py | 3 +++ doctools/html_lib_test.py | 1 + doctools/oils_doc.py | 10 +++++++++ doctools/oils_doc_test.py | 5 +++++ doctools/spelling.py | 2 ++ doctools/spelling_test.py | 1 + doctools/split_doc.py | 2 ++ doctools/split_doc_test.py | 4 ++++ doctools/ul_table.py | 24 +++++++++++++++++++- doctools/ul_table_test.py | 13 +++++++++++ lazylex/html.py | 45 ++++++++++++++++++++++++++++++++++++++ lazylex/html_test.py | 32 +++++++++++++++++++++++++++ 18 files changed, 208 insertions(+), 9 deletions(-) diff --git a/devtools/types.sh b/devtools/types.sh index a928aab81c..4f4553eae5 100755 --- a/devtools/types.sh +++ b/devtools/types.sh @@ -62,11 +62,15 @@ check-mycpp() { check-doctools() { local -a files=( - doctools/oils_doc.py lazylex/html.py + $(for x in doctools/*.py; do echo $x; done | grep -v '_test.py' ) + lazylex/html.py ) - #local -a flags=( --py2 --no-strict-optional --strict ) - local -a flags=( --py2 --no-strict-optional ) + # 777 errors before pyann + # 583 afterward + local -a flags=( --py2 --no-strict-optional --strict ) + #local -a flags=( --py2 --no-strict-optional ) + set -x mypy-check "${flags[@]}" "${files[@]}" } diff --git a/doctools/cmark.py b/doctools/cmark.py index 78986ab32d..864af41289 100755 --- a/doctools/cmark.py +++ b/doctools/cmark.py @@ -7,6 +7,12 @@ from __future__ import print_function import ctypes +from typing import List +from typing import Tuple +from typing import Union +from typing import Optional +from typing import IO +from typing import Dict try: from HTMLParser import HTMLParser except ImportError: @@ -74,6 +80,7 @@ def log(msg, *args): def md2html(md): + # type: (str) -> str if sys.version_info.major == 2: md_bytes = md else: @@ -104,6 +111,7 @@ class TocExtractor(HTMLParser): """ def __init__(self): + # type: () -> None HTMLParser.__init__(self) # make targets for these, regardless of whether the TOC links to them. @@ -122,6 +130,7 @@ def __init__(self): self.headings = [] def handle_starttag(self, tag, attrs): + # type: (str, List[Tuple[str, str]]) -> None if tag == 'div': if attrs == [('id', 'toc')]: log('%s> %s %s', self.indent * ' ', tag, attrs) @@ -149,6 +158,7 @@ def handle_starttag(self, tag, attrs): self.capturing = True # record the text inside

etc. def handle_endtag(self, tag): + # type: (str) -> None # Debug print if tag == 'div': self.indent -= 1 @@ -164,6 +174,7 @@ def handle_endtag(self, tag): self._AppendHtml('' % tag) def handle_entityref(self, data): + # type: (str) -> None """ From Python docs: This method is called to process a named character reference of the form @@ -174,6 +185,7 @@ def handle_entityref(self, data): self._AppendHtml('&%s;' % data) def handle_data(self, data): + # type: (str) -> None # Debug print if self.indent > 0: log('%s| %r', self.indent * ' ', data) @@ -183,11 +195,13 @@ def handle_data(self, data): self._AppendText(data) def _AppendText(self, text): + # type: (str) -> None """Accumulate text of the last heading.""" _, _, _, _, text_parts = self.headings[-1] text_parts.append(text) def _AppendHtml(self, html): + # type: (str) -> None """Accumulate HTML of the last heading.""" _, _, _, html_parts, _ = self.headings[-1] html_parts.append(html) @@ -201,8 +215,14 @@ def _AppendHtml(self, html): ANCHOR_FMT = '\n' -def _MakeTocInsertions(opts, toc_tags, headings, toc_pos, - preserve_anchor_case): +def _MakeTocInsertions( + opts, # type: Any + toc_tags, # type: Union[List[str], Tuple[str, str]] + headings, # type: List[Tuple[int, str, None, List[str], List[str]]] + toc_pos, # type: int + preserve_anchor_case, # type: bool +): + # type: (...) -> List[Tuple[int, str]] """Given extract headings list and TOC position, return a list of insertions. The insertions
for the TOC itself, and for the targets. @@ -266,7 +286,12 @@ def _MakeTocInsertions(opts, toc_tags, headings, toc_pos, return insertions -def _MakeTocInsertionsDense(headings, toc_pos, preserve_anchor_case): +def _MakeTocInsertionsDense( + headings, # type: List[Tuple[int, str, Optional[str], List[str], List[str]]] + toc_pos, # type: int + preserve_anchor_case, # type: bool +): + # type: (...) -> List[Tuple[int, str]] """For the dense-toc style with columns, used by doc/ref The style above is simpler: it outputs a div for every line: @@ -360,6 +385,7 @@ def _MakeTocInsertionsDense(headings, toc_pos, preserve_anchor_case): def _ApplyInsertions(lines, insertions, out_file): + # type: (List[str], List[Tuple[int, str]], IO[str]) -> None assert insertions, "Should be at least one insertion" j = 0 n = len(insertions) @@ -376,7 +402,15 @@ def _ApplyInsertions(lines, insertions, out_file): out_file.write(line) -def Render(opts, meta, in_file, out_file, use_fastlex=True, debug_out=None): +def Render( + opts, # type: Any + meta, # type: Dict + in_file, # type: IO[str] + out_file, # type: IO[str] + use_fastlex=True, # type: bool + debug_out=None, # type: Optional[Any] +): + # type: (...) -> None if debug_out is None: debug_out = [] @@ -456,6 +490,7 @@ def Render(opts, meta, in_file, out_file, use_fastlex=True, debug_out=None): def Options(): + # type: () -> Any p = optparse.OptionParser('cmark.py [options]') p.add_option('--common-mark', diff --git a/doctools/cmark_test.py b/doctools/cmark_test.py index 9157bf60a8..6ff3c1fce8 100755 --- a/doctools/cmark_test.py +++ b/doctools/cmark_test.py @@ -108,6 +108,7 @@ def f(): class RenderTest(unittest.TestCase): def testRender(self): + # type: () -> None opts, _ = cmark.Options().parse_args([]) out_file = cStringIO.StringIO() @@ -119,6 +120,7 @@ def testRender(self): print(out_file.getvalue()) def testNewRender(self): + # type: () -> None # New style of doc new_flags = ['--toc-tag', 'h2', '--toc-tag', 'h3'] @@ -132,6 +134,7 @@ def testNewRender(self): self.assert_('
' in h, h) def testNewPrettyHref(self): + # type: () -> None # New style of doc new_flags = ['--toc-tag', 'h2', '--toc-tag', 'h3', '--toc-pretty-href'] @@ -147,6 +150,7 @@ def testNewPrettyHref(self): print(h) def testExtractor(self): + # type: () -> None parser = cmark.TocExtractor() parser.feed(_HTML_1) self.assertEqual(5, parser.toc_begin_line) @@ -187,6 +191,7 @@ def testExtractor(self): self.assertEqual('Two', ''.join(text)) def testExtractorDense(self): + # type: () -> None parser = cmark.TocExtractor() parser.feed(_HTML_1.replace('"toc"', '"dense-toc"')) diff --git a/doctools/help_gen.py b/doctools/help_gen.py index 48915814f0..623945be3a 100755 --- a/doctools/help_gen.py +++ b/doctools/help_gen.py @@ -1,5 +1,9 @@ #!/usr/bin/env python2 from __future__ import print_function +from typing import List +from typing import Any +from typing import Dict +from typing import Iterator """help_gen.py Ideas for HTML -> ANSI converter: @@ -85,6 +89,7 @@ def _StringToHref(s): class TopicHtmlRenderer(object): def __init__(self, chapter, debug_out, linkify_stop_col): + # type: (str, List, int) -> None self.chapter = chapter self.debug_out = debug_out self.linkify_stop_col = linkify_stop_col @@ -92,6 +97,7 @@ def __init__(self, chapter, debug_out, linkify_stop_col): self.html_page = 'chap-%s.html' % chapter def _PrintTopic(self, m, out, line_info): + # type: (Any, html.Output, Dict[str, Any]) -> None # The X topic_impl = True if m.group(1): @@ -111,6 +117,7 @@ def _PrintTopic(self, m, out, line_info): out.Print('') def Render(self, line): + # type: (str) -> str """Convert a line of text to HTML. Topics are highlighted and X made red. @@ -217,6 +224,7 @@ class Splitter(HTMLParser.HTMLParser): """ def __init__(self, heading_tags, out): + # type: (List[str], List) -> None HTMLParser.HTMLParser.__init__(self) self.heading_tags = heading_tags self.out = out @@ -271,6 +279,7 @@ def handle_data(self, data): self.cur_group[3].append(data) def end(self): + # type: () -> None if self.cur_group: self.out.append(self.cur_group) @@ -282,6 +291,7 @@ def end(self): def ExtractBody(s): + # type: (str) -> str """Extract what's in between The splitter needs balanced tags, and what's in isn't @@ -316,6 +326,7 @@ def ExtractBody(s): def SplitIntoCards(heading_tags, contents): + # type: (List[str], str) -> Iterator contents = ExtractBody(contents) groups = [] diff --git a/doctools/help_gen_test.py b/doctools/help_gen_test.py index 780188b40e..b4b3ffe673 100755 --- a/doctools/help_gen_test.py +++ b/doctools/help_gen_test.py @@ -11,6 +11,7 @@ class HelpGenTest(unittest.TestCase): def testTopicRe(self): + # type: () -> None CASES = [ ('hello ', True), ('X hello ', True), @@ -29,6 +30,7 @@ def testTopicRe(self): self.assertEqual(matched, bool(m)) def testTopicHtml(self): + # type: () -> None os.environ['OILS_VERSION'] = '0.7.pre5' # Three spaces before @@ -91,6 +93,7 @@ def testTopicHtml(self): print() def testSplitIntoCards(self): + # type: () -> None contents = """

YSH Expression Language

diff --git a/doctools/html_head.py b/doctools/html_head.py index 54803560c7..d79bd9a276 100755 --- a/doctools/html_head.py +++ b/doctools/html_head.py @@ -12,7 +12,8 @@ try: import html except ImportError: - import cgi as html # only for cgi.escape -> html.escape + # only for cgi.escape -> html.escape + import cgi as html # type: ignore try: import cStringIO except ImportError: diff --git a/doctools/html_lib.py b/doctools/html_lib.py index facb663ae5..7a83186edc 100644 --- a/doctools/html_lib.py +++ b/doctools/html_lib.py @@ -9,9 +9,11 @@ import cgi import re +from typing import List def AttrsToString(attrs): + # type: (List) -> str if not attrs: return '' @@ -21,6 +23,7 @@ def AttrsToString(attrs): def PrettyHref(s, preserve_anchor_case=False): + # type: (str, bool) -> str """Turn arbitrary heading text into href with no special characters. This is modeled after what github does. It makes everything lower case. diff --git a/doctools/html_lib_test.py b/doctools/html_lib_test.py index ae2a45819d..d2808ac518 100755 --- a/doctools/html_lib_test.py +++ b/doctools/html_lib_test.py @@ -9,6 +9,7 @@ class FunctionsTest(unittest.TestCase): def testPrettyHref(self): + # type: () -> None self.assertEqual('foo-bar', html_lib.PrettyHref('foo bar', False)) self.assertEqual('why-not', html_lib.PrettyHref('Why Not??', False)) self.assertEqual('backslash-foo', diff --git a/doctools/oils_doc.py b/doctools/oils_doc.py index ac9033675c..e5de856c34 100755 --- a/doctools/oils_doc.py +++ b/doctools/oils_doc.py @@ -12,6 +12,10 @@ from __future__ import print_function import cgi +from typing import Iterator +from typing import Any +from typing import List +from typing import Optional try: from cStringIO import StringIO except ImportError: @@ -35,6 +39,7 @@ def __init__(self, fmt): self.fmt = fmt def __call__(self, value): + # type: (str) -> str return self.fmt % {'value': value} @@ -100,6 +105,7 @@ def __call__(self, value): def ExpandLinks(s): + # type: (str) -> str """Expand $xref:bash and so forth.""" f = StringIO() out = html.Output(s, f) @@ -165,6 +171,7 @@ class _Plugin(object): """ def __init__(self, s, start_pos, end_pos): + # type: (str, int, int) -> None self.s = s self.start_pos = start_pos self.end_pos = end_pos @@ -201,6 +208,7 @@ def PrintHighlighted(self, out): def Lines(s, start_pos, end_pos): + # type: (str, int, int) -> Iterator[int] """Yields positions in s that end a line.""" pos = start_pos while pos < end_pos: @@ -218,6 +226,7 @@ class ShPromptPlugin(_Plugin): """Highlight shell prompts.""" def PrintHighlighted(self, out): + # type: (html.Output) -> None pos = self.start_pos for line_end in Lines(self.s, self.start_pos, self.end_pos): @@ -368,6 +377,7 @@ def SimpleHighlightCode(s): def HighlightCode(s, default_highlighter, debug_out=None): + # type: (str, Optional[Any], Optional[List]) -> str """ Algorithm: 1. Collect what's inside
 ...
diff --git a/doctools/oils_doc_test.py b/doctools/oils_doc_test.py
index 16ab722194..c173cf17e9 100755
--- a/doctools/oils_doc_test.py
+++ b/doctools/oils_doc_test.py
@@ -14,6 +14,7 @@
 class OilsDocTest(unittest.TestCase):
 
     def testTopicCssClass(self):
+        # type: () -> None
 
         CASES = [
             ('language-chapter-links-expr-lang', True),
@@ -25,6 +26,7 @@ def testTopicCssClass(self):
             print(m.groups())
 
     def testExpandLinks(self):
+        # type: () -> None
         """
         bash
         ->
@@ -40,6 +42,7 @@ def testExpandLinks(self):
         self.assertEqual('', h)
 
     def testShPrompt(self):
+        # type: () -> None
         r = oils_doc._PROMPT_LINE_RE
         line = 'oil$ ls -l<TAB>  # comment'
         m = r.match(line)
@@ -54,6 +57,7 @@ def testShPrompt(self):
         plugin.PrintHighlighted(out)
 
     def testHighlightCode(self):
+        # type: () -> None
         # lazylex/testdata.html has the language-sh-prompt
 
         h = oils_doc.HighlightCode(TEST_HTML, None)
@@ -61,6 +65,7 @@ def testHighlightCode(self):
         #print(h)
 
     def testPygmentsPlugin(self):
+        # type: () -> None
         # TODO: Doesn't pass on Travis because pygments isn't there
         # use virtualenv or something?
         return
diff --git a/doctools/spelling.py b/doctools/spelling.py
index 0fcefcfb7c..57db3096b0 100755
--- a/doctools/spelling.py
+++ b/doctools/spelling.py
@@ -11,9 +11,11 @@
 import sys
 
 from doctools.util import log
+from typing import Iterator
 
 
 def SplitWords(contents):
+    # type: (str) -> Iterator[str]
     # Remove URLs so path components don't show up as words
     contents = re.sub(r'(http|https|file)://\S+', '', contents)
 
diff --git a/doctools/spelling_test.py b/doctools/spelling_test.py
index ca949bb864..4c2fff8225 100755
--- a/doctools/spelling_test.py
+++ b/doctools/spelling_test.py
@@ -10,6 +10,7 @@
 class SpellingTest(unittest.TestCase):
 
     def testSplitWords(self):
+        # type: () -> None
 
         docs = [
             r'''
diff --git a/doctools/split_doc.py b/doctools/split_doc.py
index 0057852a69..b0f3865cb7 100755
--- a/doctools/split_doc.py
+++ b/doctools/split_doc.py
@@ -6,6 +6,7 @@
 import optparse
 import re
 import sys
+from typing import Dict, IO
 
 DATE_RE = re.compile(r'(\d\d\d\d) / (\d\d) / (\d\d)', re.VERBOSE)
 
@@ -13,6 +14,7 @@
 
 
 def SplitDocument(default_vals, entry_f, meta_f, content_f, strict=False):
+    # type: (Dict[str, str], IO[str], IO[str], IO[str], bool) -> None
     """Split a document into metadata JSON and content Markdown.
 
     Used for blog posts and index.md / cross-ref.md.
diff --git a/doctools/split_doc_test.py b/doctools/split_doc_test.py
index cd74360de8..cb0f163503 100755
--- a/doctools/split_doc_test.py
+++ b/doctools/split_doc_test.py
@@ -11,6 +11,7 @@
 class FooTest(unittest.TestCase):
 
     def testStrict(self):
+        # type: () -> None
         entry_f = StringIO('''\
 Title
 =====
@@ -33,6 +34,7 @@ def testStrict(self):
         print(content_f.getvalue())
 
     def testMetadataAndTitle(self):
+        # type: () -> None
         print('_' * 40)
         print()
 
@@ -57,6 +59,7 @@ def testMetadataAndTitle(self):
         print(content_f.getvalue())
 
     def testMetadataAndTitleNoSpace(self):
+        # type: () -> None
         print('_' * 40)
         print()
 
@@ -80,6 +83,7 @@ def testMetadataAndTitleNoSpace(self):
         print(content_f.getvalue())
 
     def testTitleOnly(self):
+        # type: () -> None
         print('_' * 40)
         print()
 
diff --git a/doctools/ul_table.py b/doctools/ul_table.py
index 54cb0c0347..53a8332e76 100755
--- a/doctools/ul_table.py
+++ b/doctools/ul_table.py
@@ -10,9 +10,16 @@
 
 from doctools.util import log
 from lazylex import html
+from typing import List
+from typing import Optional
+from typing import Tuple
+from typing import Union
+from typing import Any
+from typing import Dict
 
 
 def RemoveComments(s):
+    # type: (str) -> str
     """Remove 
 
     This is a required preprocessing step for ul-table.
@@ -43,6 +50,7 @@ def RemoveComments(s):
 class UlTableParser(object):
 
     def __init__(self, lexer, tag_lexer):
+        # type: (html.Lexer, html.TagLexer) -> None
         self.lexer = lexer
         self.tag_lexer = tag_lexer
 
@@ -51,10 +59,12 @@ def __init__(self, lexer, tag_lexer):
         self.end_pos = 0
 
     def _CurrentString(self):
+        # type: () -> str
         part = self.lexer.s[self.start_pos:self.end_pos]
         return part
 
     def _Next(self, comment_ok=False):
+        # type: (bool) -> None
         """
         Advance and set self.tok_id, self.start_pos, self.end_pos
         """
@@ -86,6 +96,7 @@ def _EatRawData(self, regex):
         self._Next()
 
     def _Eat(self, expected_id, expected_tag):
+        # type: (int, str) -> None
         """
         Assert that we got a start or end tag, with the given name, and advance
 
@@ -109,6 +120,7 @@ def _Eat(self, expected_id, expected_tag):
         self._Next()
 
     def _WhitespaceOk(self):
+        # type: () -> None
         """
         Optional whitespace
         """
@@ -117,6 +129,7 @@ def _WhitespaceOk(self):
             self._Next()
 
     def FindUlTable(self):
+        # type: () -> int
         """Find 
    Return the START position of the
      @@ -145,6 +158,7 @@ def FindUlTable(self): return -1 def _ListItem(self): + # type: () -> Tuple[Optional[List[Tuple[str, str]]], Optional[str]] """Parse a list item nested below thead or tr. Returns: @@ -228,6 +242,7 @@ def _ListItem(self): return td_attrs, inner_html def _ParseTHead(self): + # type: () -> Union[List[Tuple[List[Tuple[str, str]], str]], List[Tuple[Optional[List[Tuple[str, str]]], str]]] """ Assume we're looking at the first
        tag. Now we want to find
      • thead and the nested
          @@ -295,6 +310,7 @@ def _ParseTHead(self): return cells def _ParseTr(self): + # type: () -> Tuple[None, Union[List[Tuple[List[Tuple[str, str]], str]], List[Tuple[None, str]], None]] """ Assume we're looking at the first
            tag. Now we want to find
          • tr and the nested
              @@ -356,6 +372,7 @@ def _ParseTr(self): return tr_attrs, cells def ParseTable(self): + # type: () -> Dict[str, Any] """ Returns a structure like this { 'thead': [ 'col1', 'col2' ], # TODO: columns can have CSS attributes @@ -417,7 +434,11 @@ def ParseTable(self): return table -def MergeAttrs(thead_td_attrs, row_td_attrs): +def MergeAttrs( + thead_td_attrs, # type: Optional[List[Tuple[str, str]]] + row_td_attrs, # type: Optional[List[Tuple[str, str]]] +): + # type: (...) -> List[Tuple[str, str]] merged_attrs = [] if row_td_attrs is None: @@ -445,6 +466,7 @@ def MergeAttrs(thead_td_attrs, row_td_attrs): def ReplaceTables(s, debug_out=None): + # type: (str, Optional[Any]) -> str """ ul-table: Write tables using bulleted list """ diff --git a/doctools/ul_table_test.py b/doctools/ul_table_test.py index 6b1e17c63f..1a4f37f142 100755 --- a/doctools/ul_table_test.py +++ b/doctools/ul_table_test.py @@ -222,6 +222,7 @@ def MarkdownToTable(md): + # type: (str) -> str # markdown -> HTML h = cmark.md2html(md) @@ -250,9 +251,11 @@ def MarkdownToTable(md): class UlTableTest(unittest.TestCase): def testOne(self): + # type: () -> None h = MarkdownToTable('hi\n' + TEST1 + '\n\n bye \n') def testNoHeader(self): + # type: () -> None # HTML looks like: # #
@@ -277,6 +280,7 @@ def testNoHeader(self): print(h) def testSimple(self): + # type: () -> None h = MarkdownToTable("""\
@@ -313,6 +317,7 @@ def testSimple(self): """, h) def testMultipleTables(self): + # type: () -> None # They can be right next to each other html_one = MarkdownToTable(TEST1) @@ -327,13 +332,16 @@ def testMultipleTables(self): self.assertMultiLineEqual(html_one + html_one + '\n', html_two) def testMultipleTablesWithSpace(self): + # type: () -> None h = MarkdownToTable(TEST1 + '\n\n hi \n' + TEST1) def testTdAttrs(self): + # type: () -> None h = MarkdownToTable(TD_ATTRS) self.assertMultiLineEqual(TD_ATTRS_HTML, h) def testTdAttrsTrailing(self): + # type: () -> None self.maxDiff = 2000 h = MarkdownToTable(TRAILING_ATTRS) if 1: @@ -342,20 +350,24 @@ def testTdAttrsTrailing(self): self.assertMultiLineEqual(TRAILING_ATTRS_HTML, h) def testColspan(self): + # type: () -> None h = MarkdownToTable(COLSPAN) self.assertMultiLineEqual(COLSPAN_HTML, h) def testTrAttrs(self): + # type: () -> None h = MarkdownToTable(TR_ATTRS) self.assertMultiLineEqual(TR_ATTRS_HTML, h) def testMixedTr(self): + # type: () -> None # Not worth it return h = MarkdownToTable(MIXED_TR) #self.assertMultiLineEqual(MIXED_TR, h) def testSyntaxErrors(self): + # type: () -> None # Once we get
    , then we TAKE OVER, and start being STRICT try: @@ -388,6 +400,7 @@ def testSyntaxErrors(self): self.fail('Expected parse error') def testColumnCheck(self): + # type: () -> None # Disabled because of colspan return diff --git a/lazylex/html.py b/lazylex/html.py index c26ff889a8..5704a22053 100755 --- a/lazylex/html.py +++ b/lazylex/html.py @@ -3,8 +3,21 @@ lazylex/html.py - Low-Level HTML Processing. See lazylex/README.md for details. + +TODO: +- Get rid of AttrValueLexer - this should be in the TagLexer + - this also means that unquoted values can be more similar + - We can use a single lexer mode for everything inside <> + - the SPACE is the only difference +- UTF-8 check, like JSON8 +- Static typing + """ from __future__ import print_function +from typing import Iterator +from typing import Union +from typing import Any +from typing import IO try: from cStringIO import StringIO @@ -19,6 +32,7 @@ def log(msg, *args): + # type: (str, *Any) -> None msg = msg % args print(msg, file=sys.stderr) @@ -32,14 +46,17 @@ class LexError(Exception): """ def __init__(self, s, start_pos): + # type: (str, int) -> None self.s = s self.start_pos = start_pos def __str__(self): + # type: () -> str return '(LexError %r)' % (self.s[self.start_pos:self.start_pos + 20]) def FindLineNum(s, error_pos): + # type: (str, int) -> int current_pos = 0 line_num = 1 while True: @@ -63,11 +80,13 @@ class ParseError(Exception): """ def __init__(self, msg, s=None, start_pos=-1): + # type: (str, Optional[str], int) -> None self.msg = msg self.s = s self.start_pos = start_pos def __str__(self): + # type: () -> str if self.s is not None: assert self.start_pos != -1, self.start_pos snippet = (self.s[self.start_pos:self.start_pos + 20]) @@ -88,26 +107,31 @@ class Output(object): """ def __init__(self, s, f, left_pos=0, right_pos=-1): + # type: (str, IO[str], int, int) -> None self.s = s self.f = f self.pos = left_pos self.right_pos = len(s) if right_pos == -1 else right_pos def SkipTo(self, pos): + # type: (int) -> None """Skip to a position.""" self.pos = pos def PrintUntil(self, pos): + # type: (int) -> None """Print until a position.""" piece = self.s[self.pos:pos] self.f.write(piece) self.pos = pos def PrintTheRest(self): + # type: () -> None """Print until the end of the string.""" self.PrintUntil(self.right_pos) def Print(self, s): + # type: (str) -> None """Print text to the underlying buffer.""" self.f.write(s) @@ -135,6 +159,7 @@ class Tok(object): def TokenName(tok_id): + # type: (int) -> str return TOKEN_NAMES[tok_id] @@ -258,6 +283,7 @@ def MakeLexer(rules): class Lexer(object): def __init__(self, s, left_pos=0, right_pos=-1, no_special_tags=False): + # type: (str, int, int, bool) -> None self.s = s self.pos = left_pos self.right_pos = len(s) if right_pos == -1 else right_pos @@ -378,6 +404,7 @@ def Read(self): return tok_id, end_pos def LookAhead(self, regex): + # type: (str) -> bool # Cache the regex compilation. This could also be LookAheadFor(THEAD) # or something. pat = self.cache.get(regex) @@ -390,6 +417,7 @@ def LookAhead(self, regex): def _Tokens(s, left_pos, right_pos): + # type: (str, int, int) -> Iterator[Tuple[int, int]] """ Args: s: string to parse @@ -404,6 +432,7 @@ def _Tokens(s, left_pos, right_pos): def ValidTokens(s, left_pos=0, right_pos=-1): + # type: (str, int, int) -> Iterator[Tuple[int, int]] """Wrapper around _Tokens to prevent callers from having to handle Invalid. I'm not combining the two functions because I might want to do a @@ -419,6 +448,7 @@ def ValidTokens(s, left_pos=0, right_pos=-1): def ValidTokenList(s, no_special_tags=False): + # type: (str, bool) -> List[Tuple[int, int]] """A wrapper that can be more easily translated to C++. Doesn't use iterators.""" start_pos = 0 @@ -500,11 +530,13 @@ class TagLexer(object): """ def __init__(self, s): + # type: (str) -> None self.s = s self.start_pos = -1 # Invalid self.end_pos = -1 def Reset(self, start_pos, end_pos): + # type: (int, int) -> None """Reuse instances of this object.""" assert start_pos >= 0, start_pos assert end_pos >= 0, end_pos @@ -516,11 +548,13 @@ def TagString(self): return self.s[self.start_pos:self.end_pos] def TagName(self): + # type: () -> str # First event tok_id, start, end = next(self.Tokens()) return self.s[start:end] def GetSpanForAttrValue(self, attr_name): + # type: (str) -> Tuple[int, int] """ Used by oils_doc.py, for href shortcuts """ @@ -547,6 +581,7 @@ def GetSpanForAttrValue(self, attr_name): return val def GetAttrRaw(self, attr_name): + # type: (str) -> Optional[str] """ Return the value, which may be UNESCAPED. """ @@ -556,6 +591,7 @@ def GetAttrRaw(self, attr_name): return self.s[start:end] def AllAttrsRawSlice(self): + # type: () -> List[Tuple[str, int, int]] """ Get a list of pairs [('class', 3, 5), ('href', 9, 12)] """ @@ -580,6 +616,7 @@ def AllAttrsRawSlice(self): return slices def AllAttrsRaw(self): + # type: () -> List[Tuple[str, str]] """ Get a list of pairs [('class', 'foo'), ('href', '?foo=1&bar=2')] @@ -593,6 +630,7 @@ def AllAttrsRaw(self): return pairs def Tokens(self): + # type: () -> Iterator[Tuple[int, int, int]] """ Yields a sequence of tokens: Tag (AttrName AttrValue?)* @@ -668,11 +706,13 @@ class AttrValueLexer(object): """ def __init__(self, s): + # type: (str) -> None self.s = s self.start_pos = -1 # Invalid self.end_pos = -1 def Reset(self, start_pos, end_pos): + # type: (int, int) -> None """Reuse instances of this object.""" assert start_pos >= 0, start_pos assert end_pos >= 0, end_pos @@ -681,6 +721,7 @@ def Reset(self, start_pos, end_pos): self.end_pos = end_pos def NumTokens(self): + # type: () -> int num_tokens = 0 pos = self.start_pos for tok_id, end_pos in self.Tokens(): @@ -692,6 +733,7 @@ def NumTokens(self): return num_tokens def Tokens(self): + # type: () -> Iterator[Union[Iterator, Iterator[Tuple[int, int]]]] pos = self.start_pos while pos < self.end_pos: # Find the first match, like above. @@ -735,6 +777,7 @@ def ReadUntilStartTag(it, tag_lexer, tag_name): def ReadUntilEndTag(it, tag_lexer, tag_name): + # type: (Iterator, TagLexer, str) -> Tuple[int, int] """Find the next , returning its (start, end) position Raise ParseError if it's not found. @@ -766,6 +809,7 @@ def ReadUntilEndTag(it, tag_lexer, tag_name): def ToText(s, left_pos=0, right_pos=-1): + # type: (str, int, int) -> str """Given HTML, return text by unquoting > and < etc. Used by: @@ -995,6 +1039,7 @@ def ToXml(htm8_str): class Counters(object): def __init__(self): + # type: () -> None self.num_tokens = 0 self.num_start_tags = 0 self.num_start_end_tags = 0 diff --git a/lazylex/html_test.py b/lazylex/html_test.py index e02e1c4dfb..7cb9f65535 100755 --- a/lazylex/html_test.py +++ b/lazylex/html_test.py @@ -4,6 +4,8 @@ import unittest from lazylex import html # module under test log = html.log +from typing import List +from typing import Tuple log = html.log @@ -14,6 +16,7 @@ class RegexTest(unittest.TestCase): def testDotAll(self): + # type: () -> None import re # Note that $ matches end of line, not end of string @@ -33,6 +36,7 @@ def testDotAll(self): print(p4.match('\n')) def testAttrRe(self): + # type: () -> None _ATTR_RE = html._ATTR_RE m = _ATTR_RE.match(' empty= val') print(m.groups()) @@ -41,23 +45,27 @@ def testAttrRe(self): class FunctionsTest(unittest.TestCase): def testFindLineNum(self): + # type: () -> None s = 'foo\n' * 3 for pos in [1, 5, 10, 50]: # out of bounds line_num = html.FindLineNum(s, pos) print(line_num) def testToText(self): + # type: () -> None t = html.ToText(' three < four && five ') self.assertEqual(' three < four && five ', t) def _MakeTagLexer(s): + # type: (str) -> html.TagLexer lex = html.TagLexer(s) lex.Reset(0, len(s)) return lex def _PrintTokens(lex): + # type: (html.TagLexer) -> None log('') log('tag = %r', lex.TagName()) for tok, start, end in lex.Tokens(): @@ -67,6 +75,7 @@ def _PrintTokens(lex): class TagLexerTest(unittest.TestCase): def testTagLexer(self): + # type: () -> None # Invalid! #lex = _MakeTagLexer('< >') #print(lex.Tag()) @@ -98,10 +107,12 @@ def testTagLexer(self): self.assertEqual('?foo=1&bar=2', lex.GetAttrRaw('href')) def testTagName(self): + # type: () -> None lex = _MakeTagLexer('') self.assertEqual('a', lex.TagName()) def testAllAttrs(self): + # type: () -> None """ [('key', 'value')] for all """ @@ -114,6 +125,7 @@ def testAllAttrs(self): self.assertEqual([('href', '?foo=1&bar=2')], lex.AllAttrsRaw()) def testEmptyMissingValues(self): + # type: () -> None # equivalent to