Merge remote-tracking branch 'upstream/main' into pep695docs

JelleZijlstra · May 26, 2023 · 67acd98 · 67acd98
2 parents 57ddbef + 61c1d67
commit 67acd98
Show file tree

Hide file tree

Showing 5 changed files with 24 additions and 15 deletions.
diff --git a/Doc/library/ast.rst b/Doc/library/ast.rst
@@ -481,7 +481,7 @@ Expressions
    Comparison operator tokens.
 
 
-.. class:: Call(func, args, keywords, starargs, kwargs)
+.. class:: Call(func, args, keywords)
 
    A function call. ``func`` is the function, which will often be a
    :class:`Name` or :class:`Attribute` object. Of the arguments:
@@ -491,7 +491,7 @@ Expressions
      arguments passed by keyword.
 
    When creating a ``Call`` node, ``args`` and ``keywords`` are required, but
-   they can be empty lists. ``starargs`` and ``kwargs`` are optional.
+   they can be empty lists.
 
    .. doctest::
 
@@ -1917,7 +1917,7 @@ Function and class definitions
             type_ignores=[])
 
 
-.. class:: ClassDef(name, type_params, bases, keywords, starargs, kwargs, body, decorator_list)
+.. class:: ClassDef(name, type_params, bases, keywords, body, decorator_list)
 
    A class definition.
 
@@ -1927,9 +1927,6 @@ Function and class definitions
    * ``keywords`` is a list of :class:`keyword` nodes, principally for 'metaclass'.
      Other keywords will be passed to the metaclass, as per `PEP-3115
      <https://peps.python.org/pep-3115/>`_.
-   * ``starargs`` and ``kwargs`` are each a single node, as in a function call.
-     starargs will be expanded to join the list of base classes, and kwargs will
-     be passed to the metaclass.
    * ``body`` is a list of nodes representing the code within the class
      definition.
    * ``decorator_list`` is a list of nodes, as in :class:`FunctionDef`.

diff --git a/Lib/idlelib/idle_test/test_editor.py b/Lib/idlelib/idle_test/test_editor.py
@@ -201,8 +201,8 @@ def test_searcher(self):
         test_info = (# text, (block, indent))
                      ("", (None, None)),
                      ("[1,", (None, None)),  # TokenError
-                     ("if 1:\n", ('if 1:', None)),
-                     ("if 1:\n  2\n  3\n", ('if 1:', '  2')),
+                     ("if 1:\n", ('if 1:\n', None)),
+                     ("if 1:\n  2\n  3\n", ('if 1:\n', '  2\n')),
                      )
         for code, expected_pair in test_info:
             with self.subTest(code=code):

diff --git a/Lib/test/test_tokenize.py b/Lib/test/test_tokenize.py
@@ -1174,7 +1174,7 @@ def readline():
 
         # skip the initial encoding token and the end tokens
         tokens = list(_tokenize(readline(), encoding='utf-8'))[:-2]
-        expected_tokens = [TokenInfo(3, '"ЉЊЈЁЂ"', (1, 0), (1, 7), '"ЉЊЈЁЂ"')]
+        expected_tokens = [TokenInfo(3, '"ЉЊЈЁЂ"', (1, 0), (1, 7), '"ЉЊЈЁЂ"\n')]
         self.assertEqual(tokens, expected_tokens,
                          "bytes not decoded with encoding")
 
@@ -1657,7 +1657,6 @@ def check_roundtrip(self, f):
             code = f.encode('utf-8')
         else:
             code = f.read()
-            f.close()
         readline = iter(code.splitlines(keepends=True)).__next__
         tokens5 = list(tokenize(readline))
         tokens2 = [tok[:2] for tok in tokens5]
@@ -1672,6 +1671,17 @@ def check_roundtrip(self, f):
         tokens2_from5 = [tok[:2] for tok in tokenize(readline5)]
         self.assertEqual(tokens2_from5, tokens2)
 
+    def check_line_extraction(self, f):
+        if isinstance(f, str):
+            code = f.encode('utf-8')
+        else:
+            code = f.read()
+        readline = iter(code.splitlines(keepends=True)).__next__
+        for tok in tokenize(readline):
+            if tok.type in  {ENCODING, ENDMARKER}:
+                continue
+            self.assertEqual(tok.string, tok.line[tok.start[1]: tok.end[1]])
+
     def test_roundtrip(self):
         # There are some standard formatting practices that are easy to get right.
 
@@ -1768,6 +1778,7 @@ def test_random_files(self):
             with open(testfile, 'rb') as f:
                 # with self.subTest(file=testfile):
                 self.check_roundtrip(f)
+                self.check_line_extraction(f)
 
 
     def roundtrip(self, code):

diff --git a/Misc/NEWS.d/next/Core and Builtins/2023-05-26-14-09-47.gh-issue-104972.El2UjE.rst b/Misc/NEWS.d/next/Core and Builtins/2023-05-26-14-09-47.gh-issue-104972.El2UjE.rst
@@ -0,0 +1,2 @@
+Ensure that the ``line`` attribute in :class:`tokenize.TokenInfo` objects in
+the :mod:`tokenize` module are always correct. Patch by Pablo Galindo
diff --git a/Python/Python-tokenize.c b/Python/Python-tokenize.c
@@ -194,15 +194,14 @@ tokenizeriter_next(tokenizeriterobject *it)
         goto exit;
     }
 
-    Py_ssize_t size = it->tok->inp - it->tok->buf;
-    assert(it->tok->buf[size-1] == '\n');
-    size -= 1; // Remove the newline character from the end of the line
-    PyObject *line = PyUnicode_DecodeUTF8(it->tok->buf, size, "replace");
+    const char *line_start = ISSTRINGLIT(type) ? it->tok->multi_line_start : it->tok->line_start;
+    Py_ssize_t size = it->tok->inp - line_start;
+    PyObject *line = PyUnicode_DecodeUTF8(line_start, size, "replace");
     if (line == NULL) {
         Py_DECREF(str);
         goto exit;
     }
-    const char *line_start = ISSTRINGLIT(type) ? it->tok->multi_line_start : it->tok->line_start;
+
     Py_ssize_t lineno = ISSTRINGLIT(type) ? it->tok->first_lineno : it->tok->lineno;
     Py_ssize_t end_lineno = it->tok->lineno;
     Py_ssize_t col_offset = -1;