From 7d2c39752fa6f685f15ad9c585d83a62553477c2 Mon Sep 17 00:00:00 2001 From: Serhiy Storchaka Date: Thu, 10 Oct 2024 00:20:53 +0300 Subject: [PATCH 01/80] gh-91818: Use default program name in the CLI of many modules (GH-124867) As argparse now detects by default when the code was run as a module. This leads to using the actual executable name instead of simply "python" to display in the usage message ("usage: python -m ..."). --- Lib/ast.py | 2 +- Lib/ensurepip/__init__.py | 2 +- Lib/ensurepip/_uninstall.py | 2 +- Lib/json/tool.py | 3 +-- Lib/pdb.py | 3 +-- Lib/sqlite3/__main__.py | 1 - Lib/test/test_sqlite3/test_cli.py | 4 +++- Lib/tokenize.py | 2 +- Lib/venv/__init__.py | 3 +-- .../Library/2024-10-01-23-29-09.gh-issue-91818.Kz8cPI.rst | 4 ++++ 10 files changed, 14 insertions(+), 12 deletions(-) create mode 100644 Misc/NEWS.d/next/Library/2024-10-01-23-29-09.gh-issue-91818.Kz8cPI.rst diff --git a/Lib/ast.py b/Lib/ast.py index a954d4a97d3c22c..154d2c8c1f9ebb9 100644 --- a/Lib/ast.py +++ b/Lib/ast.py @@ -1743,7 +1743,7 @@ def unparse(ast_obj): def main(): import argparse - parser = argparse.ArgumentParser(prog='python -m ast') + parser = argparse.ArgumentParser() parser.add_argument('infile', nargs='?', default='-', help='the file to parse; defaults to stdin') parser.add_argument('-m', '--mode', default='exec', diff --git a/Lib/ensurepip/__init__.py b/Lib/ensurepip/__init__.py index c5350df270487ab..585afc85836c065 100644 --- a/Lib/ensurepip/__init__.py +++ b/Lib/ensurepip/__init__.py @@ -205,7 +205,7 @@ def _uninstall_helper(*, verbosity=0): def _main(argv=None): import argparse - parser = argparse.ArgumentParser(prog="python -m ensurepip") + parser = argparse.ArgumentParser() parser.add_argument( "--version", action="version", diff --git a/Lib/ensurepip/_uninstall.py b/Lib/ensurepip/_uninstall.py index b257904328d2f5d..4183c28a809008f 100644 --- a/Lib/ensurepip/_uninstall.py +++ b/Lib/ensurepip/_uninstall.py @@ -6,7 +6,7 @@ def _main(argv=None): - parser = argparse.ArgumentParser(prog="python -m ensurepip._uninstall") + parser = argparse.ArgumentParser() parser.add_argument( "--version", action="version", diff --git a/Lib/json/tool.py b/Lib/json/tool.py index 9028e517fb9f7df..1ba91384c81f279 100644 --- a/Lib/json/tool.py +++ b/Lib/json/tool.py @@ -9,10 +9,9 @@ def main(): - prog = 'python -m json' description = ('A simple command line interface for json module ' 'to validate and pretty-print JSON objects.') - parser = argparse.ArgumentParser(prog=prog, description=description) + parser = argparse.ArgumentParser(description=description) parser.add_argument('infile', nargs='?', help='a JSON file to be validated or pretty-printed', default='-') diff --git a/Lib/pdb.py b/Lib/pdb.py index aea6fb70ae31067..d9aed24bfcd8e79 100644 --- a/Lib/pdb.py +++ b/Lib/pdb.py @@ -2423,8 +2423,7 @@ def help(): def main(): import argparse - parser = argparse.ArgumentParser(prog="pdb", - usage="%(prog)s [-h] [-c command] (-m module | pyfile) [args ...]", + parser = argparse.ArgumentParser(usage="%(prog)s [-h] [-c command] (-m module | pyfile) [args ...]", description=_usage, formatter_class=argparse.RawDescriptionHelpFormatter, allow_abbrev=False) diff --git a/Lib/sqlite3/__main__.py b/Lib/sqlite3/__main__.py index d9423c25e341358..cfdee61403d1fa0 100644 --- a/Lib/sqlite3/__main__.py +++ b/Lib/sqlite3/__main__.py @@ -65,7 +65,6 @@ def runsource(self, source, filename="", symbol="single"): def main(*args): parser = ArgumentParser( description="Python sqlite3 CLI", - prog="python -m sqlite3", ) parser.add_argument( "filename", type=str, default=":memory:", nargs="?", diff --git a/Lib/test/test_sqlite3/test_cli.py b/Lib/test/test_sqlite3/test_cli.py index 303f9e03b5383f2..d014a9ce8416070 100644 --- a/Lib/test/test_sqlite3/test_cli.py +++ b/Lib/test/test_sqlite3/test_cli.py @@ -34,7 +34,9 @@ def expect_failure(self, *args): def test_cli_help(self): out = self.expect_success("-h") - self.assertIn("usage: python -m sqlite3", out) + self.assertIn("usage: ", out) + self.assertIn(" [-h] [-v] [filename] [sql]", out) + self.assertIn("Python sqlite3 CLI", out) def test_cli_version(self): out = self.expect_success("-v") diff --git a/Lib/tokenize.py b/Lib/tokenize.py index 4b4c3cfe16999b5..7ece4e9b70d31b3 100644 --- a/Lib/tokenize.py +++ b/Lib/tokenize.py @@ -510,7 +510,7 @@ def error(message, filename=None, location=None): sys.exit(1) # Parse the arguments and options - parser = argparse.ArgumentParser(prog='python -m tokenize') + parser = argparse.ArgumentParser() parser.add_argument(dest='filename', nargs='?', metavar='filename.py', help='the file to tokenize; defaults to stdin') diff --git a/Lib/venv/__init__.py b/Lib/venv/__init__.py index a5cb2bdb0646928..a00fa690fa0b882 100644 --- a/Lib/venv/__init__.py +++ b/Lib/venv/__init__.py @@ -575,8 +575,7 @@ def create(env_dir, system_site_packages=False, clear=False, def main(args=None): import argparse - parser = argparse.ArgumentParser(prog=__name__, - description='Creates virtual Python ' + parser = argparse.ArgumentParser(description='Creates virtual Python ' 'environments in one or ' 'more target ' 'directories.', diff --git a/Misc/NEWS.d/next/Library/2024-10-01-23-29-09.gh-issue-91818.Kz8cPI.rst b/Misc/NEWS.d/next/Library/2024-10-01-23-29-09.gh-issue-91818.Kz8cPI.rst new file mode 100644 index 000000000000000..f45f00e48a38303 --- /dev/null +++ b/Misc/NEWS.d/next/Library/2024-10-01-23-29-09.gh-issue-91818.Kz8cPI.rst @@ -0,0 +1,4 @@ +The CLI of many modules (:mod:`ast`, :mod:`ensurepip`, :mod:`json`, +:mod:`pdb`, :mod:`sqlite3`, :mod:`tokenize`, :mod:`venv`) now uses the +actual executable name instead of simply "python" to display in the usage +message. From 52f70da19cf3c7198be37faeac233ef803080f6f Mon Sep 17 00:00:00 2001 From: Victor Stinner Date: Wed, 9 Oct 2024 23:56:30 +0200 Subject: [PATCH 02/80] gh-125196: Use PyUnicodeWriter for repr(list) (#125202) Replace the private _PyUnicodeWriter with the public PyUnicodeWriter. Replace PyObject_Repr() + _PyUnicodeWriter_WriteStr() with PyUnicodeWriter_WriteRepr(). --- Objects/listobject.c | 45 ++++++++++++++++++++++---------------------- 1 file changed, 23 insertions(+), 22 deletions(-) diff --git a/Objects/listobject.c b/Objects/listobject.c index 8abe15d6674140c..e7090f20001a39f 100644 --- a/Objects/listobject.c +++ b/Objects/listobject.c @@ -522,49 +522,50 @@ list_dealloc(PyObject *self) static PyObject * list_repr_impl(PyListObject *v) { - PyObject *s; - _PyUnicodeWriter writer; - Py_ssize_t i = Py_ReprEnter((PyObject*)v); - if (i != 0) { - return i > 0 ? PyUnicode_FromString("[...]") : NULL; + int res = Py_ReprEnter((PyObject*)v); + if (res != 0) { + return (res > 0 ? PyUnicode_FromString("[...]") : NULL); } - _PyUnicodeWriter_Init(&writer); - writer.overallocate = 1; /* "[" + "1" + ", 2" * (len - 1) + "]" */ - writer.min_length = 1 + 1 + (2 + 1) * (Py_SIZE(v) - 1) + 1; + Py_ssize_t prealloc = 1 + 1 + (2 + 1) * (Py_SIZE(v) - 1) + 1; + PyUnicodeWriter *writer = PyUnicodeWriter_Create(prealloc); + if (writer == NULL) { + goto error; + } - if (_PyUnicodeWriter_WriteChar(&writer, '[') < 0) + if (PyUnicodeWriter_WriteChar(writer, '[') < 0) { goto error; + } /* Do repr() on each element. Note that this may mutate the list, so must refetch the list size on each iteration. */ - for (i = 0; i < Py_SIZE(v); ++i) { + for (Py_ssize_t i = 0; i < Py_SIZE(v); ++i) { if (i > 0) { - if (_PyUnicodeWriter_WriteASCIIString(&writer, ", ", 2) < 0) + if (PyUnicodeWriter_WriteChar(writer, ',') < 0) { + goto error; + } + if (PyUnicodeWriter_WriteChar(writer, ' ') < 0) { goto error; + } } - s = PyObject_Repr(v->ob_item[i]); - if (s == NULL) - goto error; - - if (_PyUnicodeWriter_WriteStr(&writer, s) < 0) { - Py_DECREF(s); + if (PyUnicodeWriter_WriteRepr(writer, v->ob_item[i]) < 0) { goto error; } - Py_DECREF(s); } - writer.overallocate = 0; - if (_PyUnicodeWriter_WriteChar(&writer, ']') < 0) + if (PyUnicodeWriter_WriteChar(writer, ']') < 0) { goto error; + } Py_ReprLeave((PyObject *)v); - return _PyUnicodeWriter_Finish(&writer); + return PyUnicodeWriter_Finish(writer); error: - _PyUnicodeWriter_Dealloc(&writer); + if (writer != NULL) { + PyUnicodeWriter_Discard(writer); + } Py_ReprLeave((PyObject *)v); return NULL; } From 0c5a48c1c9039eb1ce25a96c43505c4de0a0b9d7 Mon Sep 17 00:00:00 2001 From: Savannah Ostrowski Date: Wed, 9 Oct 2024 15:56:50 -0600 Subject: [PATCH 03/80] GH-124693: Support parsing negative scientific and complex numbers argparse (GH-124823) Co-authored-by: Serhiy Storchaka --- Lib/argparse.py | 2 +- Lib/test/test_argparse.py | 27 ++++++++++++++----- ...-10-01-02-31-13.gh-issue-124693.qzbXKB.rst | 1 + 3 files changed, 22 insertions(+), 8 deletions(-) create mode 100644 Misc/NEWS.d/next/Library/2024-10-01-02-31-13.gh-issue-124693.qzbXKB.rst diff --git a/Lib/argparse.py b/Lib/argparse.py index 21299b69ecd74cb..d1f8fa2ace8611a 100644 --- a/Lib/argparse.py +++ b/Lib/argparse.py @@ -1367,7 +1367,7 @@ def __init__(self, self._defaults = {} # determines whether an "option" looks like a negative number - self._negative_number_matcher = _re.compile(r'^-(?:\d+(?:_\d+)*(?:\.\d+(?:_\d+)*)?|\.\d+(?:_\d+)*)$') + self._negative_number_matcher = _re.compile(r'-\.?\d') # whether or not there are any optionals that look like negative # numbers -- uses a list so it can be shared and edited diff --git a/Lib/test/test_argparse.py b/Lib/test/test_argparse.py index 1bf812b36fc2c60..c9e79eb18a08fb9 100644 --- a/Lib/test/test_argparse.py +++ b/Lib/test/test_argparse.py @@ -2195,20 +2195,33 @@ class TestNegativeNumber(ParserTestCase): argument_signatures = [ Sig('--int', type=int), Sig('--float', type=float), + Sig('--complex', type=complex), ] failures = [ '--float -_.45', '--float -1__000.0', + '--float -1.0.0', '--int -1__000', + '--int -1.0', + '--complex -1__000.0j', + '--complex -1.0jj', + '--complex -_.45j', ] successes = [ - ('--int -1000 --float -1000.0', NS(int=-1000, float=-1000.0)), - ('--int -1_000 --float -1_000.0', NS(int=-1000, float=-1000.0)), - ('--int -1_000_000 --float -1_000_000.0', NS(int=-1000000, float=-1000000.0)), - ('--float -1_000.0', NS(int=None, float=-1000.0)), - ('--float -1_000_000.0_0', NS(int=None, float=-1000000.0)), - ('--float -.5', NS(int=None, float=-0.5)), - ('--float -.5_000', NS(int=None, float=-0.5)), + ('--int -1000 --float -1000.0', NS(int=-1000, float=-1000.0, complex=None)), + ('--int -1_000 --float -1_000.0', NS(int=-1000, float=-1000.0, complex=None)), + ('--int -1_000_000 --float -1_000_000.0', NS(int=-1000000, float=-1000000.0, complex=None)), + ('--float -1_000.0', NS(int=None, float=-1000.0, complex=None)), + ('--float -1_000_000.0_0', NS(int=None, float=-1000000.0, complex=None)), + ('--float -.5', NS(int=None, float=-0.5, complex=None)), + ('--float -.5_000', NS(int=None, float=-0.5, complex=None)), + ('--float -1e3', NS(int=None, float=-1000, complex=None)), + ('--float -1e-3', NS(int=None, float=-0.001, complex=None)), + ('--complex -1j', NS(int=None, float=None, complex=-1j)), + ('--complex -1_000j', NS(int=None, float=None, complex=-1000j)), + ('--complex -1_000.0j', NS(int=None, float=None, complex=-1000.0j)), + ('--complex -1e3j', NS(int=None, float=None, complex=-1000j)), + ('--complex -1e-3j', NS(int=None, float=None, complex=-0.001j)), ] class TestInvalidAction(TestCase): diff --git a/Misc/NEWS.d/next/Library/2024-10-01-02-31-13.gh-issue-124693.qzbXKB.rst b/Misc/NEWS.d/next/Library/2024-10-01-02-31-13.gh-issue-124693.qzbXKB.rst new file mode 100644 index 000000000000000..3e87eb457d9911e --- /dev/null +++ b/Misc/NEWS.d/next/Library/2024-10-01-02-31-13.gh-issue-124693.qzbXKB.rst @@ -0,0 +1 @@ +Fix a bug where :mod:`argparse` doesn't recognize negative complex numbers or negative numbers using scientific notation. From ee3167b9787bf9424d5637a224233de775450231 Mon Sep 17 00:00:00 2001 From: Victor Stinner Date: Thu, 10 Oct 2024 00:01:02 +0200 Subject: [PATCH 04/80] gh-125196: Add fast-path for int in PyUnicodeWriter_WriteStr() (#125214) PyUnicodeWriter_WriteStr() and PyUnicodeWriter_WriteRepr() now call directly _PyLong_FormatWriter() if the argument is an int. --- Objects/unicodeobject.c | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/Objects/unicodeobject.c b/Objects/unicodeobject.c index 4ea7d5f380e9a28..a9b332481638800 100644 --- a/Objects/unicodeobject.c +++ b/Objects/unicodeobject.c @@ -13632,6 +13632,10 @@ _PyUnicodeWriter_WriteStr(_PyUnicodeWriter *writer, PyObject *str) int PyUnicodeWriter_WriteStr(PyUnicodeWriter *writer, PyObject *obj) { + if (Py_TYPE(obj) == &PyLong_Type) { + return _PyLong_FormatWriter((_PyUnicodeWriter*)writer, obj, 10, 0); + } + PyObject *str = PyObject_Str(obj); if (str == NULL) { return -1; @@ -13646,6 +13650,10 @@ PyUnicodeWriter_WriteStr(PyUnicodeWriter *writer, PyObject *obj) int PyUnicodeWriter_WriteRepr(PyUnicodeWriter *writer, PyObject *obj) { + if (Py_TYPE(obj) == &PyLong_Type) { + return _PyLong_FormatWriter((_PyUnicodeWriter*)writer, obj, 10, 0); + } + PyObject *repr = PyObject_Repr(obj); if (repr == NULL) { return -1; From 1877543d03d323d581b5fc0f19eff501926ba151 Mon Sep 17 00:00:00 2001 From: Victor Stinner Date: Thu, 10 Oct 2024 00:04:50 +0200 Subject: [PATCH 05/80] gh-125196: Use PyUnicodeWriter for repr(structseq) (#125219) Replace the private _PyUnicodeWriter with the public PyUnicodeWriter. * Avoid temporary PyUnicode_DecodeUTF8(): call PyUnicodeWriter_WriteUTF8() instead. * Avoid temporary PyObject_Repr(): call PyUnicodeWriter_WriteRepr() instead. --- Objects/structseq.c | 65 +++++++++++++++++---------------------------- 1 file changed, 24 insertions(+), 41 deletions(-) diff --git a/Objects/structseq.c b/Objects/structseq.c index ee3dbf9d4c047ae..6092742835400bb 100644 --- a/Objects/structseq.c +++ b/Objects/structseq.c @@ -266,83 +266,66 @@ static PyObject * structseq_repr(PyStructSequence *obj) { PyTypeObject *typ = Py_TYPE(obj); - _PyUnicodeWriter writer; - /* Write "typename(" */ - PyObject *type_name = PyUnicode_DecodeUTF8(typ->tp_name, - strlen(typ->tp_name), - NULL); - if (type_name == NULL) { + // count 5 characters per item: "x=1, " + Py_ssize_t type_name_len = strlen(typ->tp_name); + Py_ssize_t prealloc = (type_name_len + 1 + + VISIBLE_SIZE(obj) * 5 + 1); + PyUnicodeWriter *writer = PyUnicodeWriter_Create(prealloc); + if (writer == NULL) { return NULL; } - _PyUnicodeWriter_Init(&writer); - writer.overallocate = 1; - /* count 5 characters per item: "x=1, " */ - writer.min_length = (PyUnicode_GET_LENGTH(type_name) + 1 - + VISIBLE_SIZE(obj) * 5 + 1); - - if (_PyUnicodeWriter_WriteStr(&writer, type_name) < 0) { - Py_DECREF(type_name); + // Write "typename(" + if (PyUnicodeWriter_WriteUTF8(writer, typ->tp_name, type_name_len) < 0) { goto error; } - Py_DECREF(type_name); - - if (_PyUnicodeWriter_WriteChar(&writer, '(') < 0) { + if (PyUnicodeWriter_WriteChar(writer, '(') < 0) { goto error; } for (Py_ssize_t i=0; i < VISIBLE_SIZE(obj); i++) { if (i > 0) { - /* Write ", " */ - if (_PyUnicodeWriter_WriteASCIIString(&writer, ", ", 2) < 0) { + // Write ", " + if (PyUnicodeWriter_WriteChar(writer, ',') < 0) { + goto error; + } + if (PyUnicodeWriter_WriteChar(writer, ' ') < 0) { goto error; } } - /* Write "name=repr" */ + // Write name const char *name_utf8 = typ->tp_members[i].name; if (name_utf8 == NULL) { - PyErr_Format(PyExc_SystemError, "In structseq_repr(), member %zd name is NULL" + PyErr_Format(PyExc_SystemError, + "In structseq_repr(), member %zd name is NULL" " for type %.500s", i, typ->tp_name); goto error; } - - PyObject *name = PyUnicode_DecodeUTF8(name_utf8, strlen(name_utf8), NULL); - if (name == NULL) { - goto error; - } - if (_PyUnicodeWriter_WriteStr(&writer, name) < 0) { - Py_DECREF(name); + if (PyUnicodeWriter_WriteUTF8(writer, name_utf8, -1) < 0) { goto error; } - Py_DECREF(name); - if (_PyUnicodeWriter_WriteChar(&writer, '=') < 0) { + // Write "=" + repr(value) + if (PyUnicodeWriter_WriteChar(writer, '=') < 0) { goto error; } - PyObject *value = PyStructSequence_GetItem((PyObject*)obj, i); assert(value != NULL); - PyObject *repr = PyObject_Repr(value); - if (repr == NULL) { - goto error; - } - if (_PyUnicodeWriter_WriteStr(&writer, repr) < 0) { - Py_DECREF(repr); + if (PyUnicodeWriter_WriteRepr(writer, value) < 0) { goto error; } - Py_DECREF(repr); } - if (_PyUnicodeWriter_WriteChar(&writer, ')') < 0) { + if (PyUnicodeWriter_WriteChar(writer, ')') < 0) { goto error; } - return _PyUnicodeWriter_Finish(&writer); + return PyUnicodeWriter_Finish(writer); error: - _PyUnicodeWriter_Dealloc(&writer); + PyUnicodeWriter_Discard(writer); return NULL; } From c7d5d1d93b630e352abd9a0c93ea6d34c443f444 Mon Sep 17 00:00:00 2001 From: Pablo Galindo Salgado Date: Wed, 9 Oct 2024 23:30:56 +0100 Subject: [PATCH 06/80] gh-125140: Remove the current directory from sys.path when using pyrepl (GH-125212) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Signed-off-by: Pablo Galindo Co-authored-by: Łukasz Langa Co-authored-by: Peter Bierma --- Lib/site.py | 11 ++++++++--- .../2024-10-09-20-08-13.gh-issue-125140.YgNWRB.rst | 1 + 2 files changed, 9 insertions(+), 3 deletions(-) create mode 100644 Misc/NEWS.d/next/Security/2024-10-09-20-08-13.gh-issue-125140.YgNWRB.rst diff --git a/Lib/site.py b/Lib/site.py index b3194d79fb5ab88..07a6361fad44e54 100644 --- a/Lib/site.py +++ b/Lib/site.py @@ -503,9 +503,14 @@ def register_readline(): if PYTHON_BASIC_REPL: CAN_USE_PYREPL = False else: - import _pyrepl.readline - import _pyrepl.unix_console - from _pyrepl.main import CAN_USE_PYREPL + original_path = sys.path + sys.path = [p for p in original_path if p != ''] + try: + import _pyrepl.readline + import _pyrepl.unix_console + from _pyrepl.main import CAN_USE_PYREPL + finally: + sys.path = original_path except ImportError: return diff --git a/Misc/NEWS.d/next/Security/2024-10-09-20-08-13.gh-issue-125140.YgNWRB.rst b/Misc/NEWS.d/next/Security/2024-10-09-20-08-13.gh-issue-125140.YgNWRB.rst new file mode 100644 index 000000000000000..f4a49302372647a --- /dev/null +++ b/Misc/NEWS.d/next/Security/2024-10-09-20-08-13.gh-issue-125140.YgNWRB.rst @@ -0,0 +1 @@ +Remove the current directory from ``sys.path`` when using PyREPL. From 9ad55e85d78c5338b1ad170e614345652bd1b651 Mon Sep 17 00:00:00 2001 From: Serhiy Storchaka Date: Thu, 10 Oct 2024 02:30:14 +0300 Subject: [PATCH 07/80] gh-124969: Skip the test for ALT_DIGITS also on iOS (#125177) Skip the locale.ALT_DIGITS test on all Apple platforms, not just macOS. --- Lib/test/test__locale.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/Lib/test/test__locale.py b/Lib/test/test__locale.py index 02d2acc6d1c417d..e403c2a822788dc 100644 --- a/Lib/test/test__locale.py +++ b/Lib/test/test__locale.py @@ -209,8 +209,8 @@ def test_alt_digits_nl_langinfo(self): with self.subTest(locale=loc): alt_digits = nl_langinfo(locale.ALT_DIGITS) self.assertIsInstance(alt_digits, tuple) - if count and not alt_digits and sys.platform == 'darwin': - self.skipTest(f'ALT_DIGITS is not set for locale {loc!r} on macOS') + if count and not alt_digits and support.is_apple: + self.skipTest(f'ALT_DIGITS is not set for locale {loc!r} on Apple platforms') self.assertEqual(len(alt_digits), count) for i in samples: self.assertEqual(alt_digits[i], samples[i]) From 1b2a5485f94ccbe43a45eb9990a5649ae3d2499e Mon Sep 17 00:00:00 2001 From: Victor Stinner Date: Thu, 10 Oct 2024 01:32:02 +0200 Subject: [PATCH 08/80] gh-125196: PyUnicodeWriter_Discard(NULL) does nothing (#125222) --- Doc/c-api/unicode.rst | 2 ++ Objects/listobject.c | 4 +--- Objects/unicodeobject.c | 3 +++ 3 files changed, 6 insertions(+), 3 deletions(-) diff --git a/Doc/c-api/unicode.rst b/Doc/c-api/unicode.rst index f5704cffa199a58..4daf9e9fdbf2f10 100644 --- a/Doc/c-api/unicode.rst +++ b/Doc/c-api/unicode.rst @@ -1600,6 +1600,8 @@ object. Discard the internal Unicode buffer and destroy the writer instance. + If *writer* is ``NULL``, no operation is performed. + .. c:function:: int PyUnicodeWriter_WriteChar(PyUnicodeWriter *writer, Py_UCS4 ch) Write the single Unicode character *ch* into *writer*. diff --git a/Objects/listobject.c b/Objects/listobject.c index e7090f20001a39f..930aefde325a7c5 100644 --- a/Objects/listobject.c +++ b/Objects/listobject.c @@ -563,9 +563,7 @@ list_repr_impl(PyListObject *v) return PyUnicodeWriter_Finish(writer); error: - if (writer != NULL) { - PyUnicodeWriter_Discard(writer); - } + PyUnicodeWriter_Discard(writer); Py_ReprLeave((PyObject *)v); return NULL; } diff --git a/Objects/unicodeobject.c b/Objects/unicodeobject.c index a9b332481638800..93c1025b6a3cae0 100644 --- a/Objects/unicodeobject.c +++ b/Objects/unicodeobject.c @@ -13455,6 +13455,9 @@ PyUnicodeWriter_Create(Py_ssize_t length) void PyUnicodeWriter_Discard(PyUnicodeWriter *writer) { + if (writer == NULL) { + return; + } _PyUnicodeWriter_Dealloc((_PyUnicodeWriter*)writer); PyMem_Free(writer); } From 942916378aa6a0946b1385c2c7ca6935620d710a Mon Sep 17 00:00:00 2001 From: Victor Stinner Date: Thu, 10 Oct 2024 01:37:14 +0200 Subject: [PATCH 09/80] gh-125196: Use PyUnicodeWriter for repr(contextvars.Token) (#125220) Replace the private _PyUnicodeWriter with the public PyUnicodeWriter. --- Lib/test/test_context.py | 8 ++++++++ Python/context.c | 39 +++++++++++---------------------------- 2 files changed, 19 insertions(+), 28 deletions(-) diff --git a/Lib/test/test_context.py b/Lib/test/test_context.py index 255be306156c0bf..b06b9df9f5b0b8a 100644 --- a/Lib/test/test_context.py +++ b/Lib/test/test_context.py @@ -60,6 +60,14 @@ def test_context_var_repr_1(self): c.reset(t) self.assertIn(' used ', repr(t)) + @isolated_context + def test_token_repr_1(self): + c = contextvars.ContextVar('a') + tok = c.set(1) + self.assertRegex(repr(tok), + r"^ at 0x[0-9a-fA-F]+>$") + def test_context_subclassing_1(self): with self.assertRaisesRegex(TypeError, 'not an acceptable base type'): class MyContextVar(contextvars.ContextVar): diff --git a/Python/context.c b/Python/context.c index ddb03555f9e4025..36e2677c398f591 100644 --- a/Python/context.c +++ b/Python/context.c @@ -1154,48 +1154,31 @@ token_tp_dealloc(PyContextToken *self) static PyObject * token_tp_repr(PyContextToken *self) { - _PyUnicodeWriter writer; - - _PyUnicodeWriter_Init(&writer); - - if (_PyUnicodeWriter_WriteASCIIString(&writer, "tok_used) { - if (_PyUnicodeWriter_WriteASCIIString(&writer, " used", 5) < 0) { + if (PyUnicodeWriter_WriteUTF8(writer, " used", 5) < 0) { goto error; } } - - if (_PyUnicodeWriter_WriteASCIIString(&writer, " var=", 5) < 0) { + if (PyUnicodeWriter_WriteUTF8(writer, " var=", 5) < 0) { goto error; } - - PyObject *var = PyObject_Repr((PyObject *)self->tok_var); - if (var == NULL) { + if (PyUnicodeWriter_WriteRepr(writer, (PyObject *)self->tok_var) < 0) { goto error; } - if (_PyUnicodeWriter_WriteStr(&writer, var) < 0) { - Py_DECREF(var); - goto error; - } - Py_DECREF(var); - - PyObject *addr = PyUnicode_FromFormat(" at %p>", self); - if (addr == NULL) { - goto error; - } - if (_PyUnicodeWriter_WriteStr(&writer, addr) < 0) { - Py_DECREF(addr); + if (PyUnicodeWriter_Format(writer, " at %p>", self) < 0) { goto error; } - Py_DECREF(addr); - - return _PyUnicodeWriter_Finish(&writer); + return PyUnicodeWriter_Finish(writer); error: - _PyUnicodeWriter_Dealloc(&writer); + PyUnicodeWriter_Discard(writer); return NULL; } From 99400930ac1d4e5e10a5ae30f8202d8bc2661e39 Mon Sep 17 00:00:00 2001 From: Richard Hansen Date: Wed, 9 Oct 2024 19:44:03 -0400 Subject: [PATCH 10/80] gh-124872: Refine contextvars documentation (#124773) * Add definitions for "context", "current context", and "context management protocol". * Update related definitions to be consistent with the new definitions. * Restructure the documentation for the `contextvars.Context` class to prepare for adding context manager support, and for consistency with the definitions. * Use `testcode` and `testoutput` to test the `Context.run` example. * Expand the documentation for the `Py_CONTEXT_EVENT_ENTER` and `Py_CONTEXT_EVENT_EXIT` events to clarify and to prepare for planned changes. --- Doc/c-api/contextvars.rst | 20 ++-- Doc/glossary.rst | 40 ++++++-- Doc/library/contextvars.rst | 98 +++++++++++++------ Include/cpython/context.h | 23 +++-- ...-09-29-18-14-52.gh-issue-119333.7tinr0.rst | 3 + 5 files changed, 132 insertions(+), 52 deletions(-) create mode 100644 Misc/NEWS.d/next/Documentation/2024-09-29-18-14-52.gh-issue-119333.7tinr0.rst diff --git a/Doc/c-api/contextvars.rst b/Doc/c-api/contextvars.rst index 0de135b232aaaf1..59e74ba1ac70226 100644 --- a/Doc/c-api/contextvars.rst +++ b/Doc/c-api/contextvars.rst @@ -122,18 +122,24 @@ Context object management functions: .. c:type:: PyContextEvent Enumeration of possible context object watcher events: - - ``Py_CONTEXT_EVENT_ENTER`` - - ``Py_CONTEXT_EVENT_EXIT`` + + - ``Py_CONTEXT_EVENT_ENTER``: A context has been entered, causing the + :term:`current context` to switch to it. The object passed to the watch + callback is the now-current :class:`contextvars.Context` object. Each + enter event will eventually have a corresponding exit event for the same + context object after any subsequently entered contexts have themselves been + exited. + - ``Py_CONTEXT_EVENT_EXIT``: A context is about to be exited, which will + cause the :term:`current context` to switch back to what it was before the + context was entered. The object passed to the watch callback is the + still-current :class:`contextvars.Context` object. .. versionadded:: 3.14 .. c:type:: int (*PyContext_WatchCallback)(PyContextEvent event, PyContext* ctx) - Type of a context object watcher callback function. - If *event* is ``Py_CONTEXT_EVENT_ENTER``, then the callback is invoked - after *ctx* has been set as the current context for the current thread. - Otherwise, the callback is invoked before the deactivation of *ctx* as the current context - and the restoration of the previous contex object for the current thread. + Context object watcher callback function. The object passed to the callback + is event-specific; see :c:type:`PyContextEvent` for details. If the callback returns with an exception set, it must return ``-1``; this exception will be printed as an unraisable exception using diff --git a/Doc/glossary.rst b/Doc/glossary.rst index cb7e0a2b89d0379..1d407732eef5768 100644 --- a/Doc/glossary.rst +++ b/Doc/glossary.rst @@ -265,19 +265,33 @@ Glossary advanced mathematical feature. If you're not aware of a need for them, it's almost certain you can safely ignore them. + context + This term has different meanings depending on where and how it is used. + Some common meanings: + + * The temporary state or environment established by a :term:`context + manager` via a :keyword:`with` statement. + * The collection of key­value bindings associated with a particular + :class:`contextvars.Context` object and accessed via + :class:`~contextvars.ContextVar` objects. Also see :term:`context + variable`. + * A :class:`contextvars.Context` object. Also see :term:`current + context`. + + context management protocol + The :meth:`~object.__enter__` and :meth:`~object.__exit__` methods called + by the :keyword:`with` statement. See :pep:`343`. + context manager - An object which controls the environment seen in a :keyword:`with` - statement by defining :meth:`~object.__enter__` and :meth:`~object.__exit__` methods. - See :pep:`343`. + An object which implements the :term:`context management protocol` and + controls the environment seen in a :keyword:`with` statement. See + :pep:`343`. context variable - A variable which can have different values depending on its context. - This is similar to Thread-Local Storage in which each execution - thread may have a different value for a variable. However, with context - variables, there may be several contexts in one execution thread and the - main usage for context variables is to keep track of variables in + A variable whose value depends on which context is the :term:`current + context`. Values are accessed via :class:`contextvars.ContextVar` + objects. Context variables are primarily used to isolate state between concurrent asynchronous tasks. - See :mod:`contextvars`. contiguous .. index:: C-contiguous, Fortran contiguous @@ -311,6 +325,14 @@ Glossary is used when necessary to distinguish this implementation from others such as Jython or IronPython. + current context + The :term:`context` (:class:`contextvars.Context` object) that is + currently used by :class:`~contextvars.ContextVar` objects to access (get + or set) the values of :term:`context variables `. Each + thread has its own current context. Frameworks for executing asynchronous + tasks (see :mod:`asyncio`) associate each task with a context which + becomes the current context whenever the task starts or resumes execution. + decorator A function returning another function, usually applied as a function transformation using the ``@wrapper`` syntax. Common examples for diff --git a/Doc/library/contextvars.rst b/Doc/library/contextvars.rst index 2a79dfe8f81e26e..2b1fb9fdd29cd88 100644 --- a/Doc/library/contextvars.rst +++ b/Doc/library/contextvars.rst @@ -144,51 +144,89 @@ Manual Context Management To get a copy of the current context use the :func:`~contextvars.copy_context` function. - Every thread will have a different top-level :class:`~contextvars.Context` - object. This means that a :class:`ContextVar` object behaves in a similar - fashion to :func:`threading.local` when values are assigned in different - threads. + Each thread has its own effective stack of :class:`!Context` objects. The + :term:`current context` is the :class:`!Context` object at the top of the + current thread's stack. All :class:`!Context` objects in the stacks are + considered to be *entered*. + + *Entering* a context, which can be done by calling its :meth:`~Context.run` + method, makes the context the current context by pushing it onto the top of + the current thread's context stack. + + *Exiting* from the current context, which can be done by returning from the + callback passed to the :meth:`~Context.run` method, restores the current + context to what it was before the context was entered by popping the context + off the top of the context stack. + + Since each thread has its own context stack, :class:`ContextVar` objects + behave in a similar fashion to :func:`threading.local` when values are + assigned in different threads. + + Attempting to enter an already entered context, including contexts entered in + other threads, raises a :exc:`RuntimeError`. + + After exiting a context, it can later be re-entered (from any thread). + + Any changes to :class:`ContextVar` values via the :meth:`ContextVar.set` + method are recorded in the current context. The :meth:`ContextVar.get` + method returns the value associated with the current context. Exiting a + context effectively reverts any changes made to context variables while the + context was entered (if needed, the values can be restored by re-entering the + context). Context implements the :class:`collections.abc.Mapping` interface. .. method:: run(callable, *args, **kwargs) - Execute ``callable(*args, **kwargs)`` code in the context object - the *run* method is called on. Return the result of the execution - or propagate an exception if one occurred. + Enters the Context, executes ``callable(*args, **kwargs)``, then exits the + Context. Returns *callable*'s return value, or propagates an exception if + one occurred. + + Example: + + .. testcode:: + + import contextvars - Any changes to any context variables that *callable* makes will - be contained in the context object:: + var = contextvars.ContextVar('var') + var.set('spam') + print(var.get()) # 'spam' - var = ContextVar('var') - var.set('spam') + ctx = contextvars.copy_context() - def main(): - # 'var' was set to 'spam' before - # calling 'copy_context()' and 'ctx.run(main)', so: - # var.get() == ctx[var] == 'spam' + def main(): + # 'var' was set to 'spam' before + # calling 'copy_context()' and 'ctx.run(main)', so: + print(var.get()) # 'spam' + print(ctx[var]) # 'spam' - var.set('ham') + var.set('ham') - # Now, after setting 'var' to 'ham': - # var.get() == ctx[var] == 'ham' + # Now, after setting 'var' to 'ham': + print(var.get()) # 'ham' + print(ctx[var]) # 'ham' - ctx = copy_context() + # Any changes that the 'main' function makes to 'var' + # will be contained in 'ctx'. + ctx.run(main) - # Any changes that the 'main' function makes to 'var' - # will be contained in 'ctx'. - ctx.run(main) + # The 'main()' function was run in the 'ctx' context, + # so changes to 'var' are contained in it: + print(ctx[var]) # 'ham' - # The 'main()' function was run in the 'ctx' context, - # so changes to 'var' are contained in it: - # ctx[var] == 'ham' + # However, outside of 'ctx', 'var' is still set to 'spam': + print(var.get()) # 'spam' - # However, outside of 'ctx', 'var' is still set to 'spam': - # var.get() == 'spam' + .. testoutput:: + :hide: - The method raises a :exc:`RuntimeError` when called on the same - context object from more than one OS thread, or when called - recursively. + spam + spam + spam + ham + ham + ham + spam .. method:: copy() diff --git a/Include/cpython/context.h b/Include/cpython/context.h index ec72966e82c6f9b..d722b4d93134f70 100644 --- a/Include/cpython/context.h +++ b/Include/cpython/context.h @@ -28,15 +28,26 @@ PyAPI_FUNC(int) PyContext_Enter(PyObject *); PyAPI_FUNC(int) PyContext_Exit(PyObject *); typedef enum { - Py_CONTEXT_EVENT_ENTER, - Py_CONTEXT_EVENT_EXIT, + /* + * A context has been entered, causing the "current context" to switch to + * it. The object passed to the watch callback is the now-current + * contextvars.Context object. Each enter event will eventually have a + * corresponding exit event for the same context object after any + * subsequently entered contexts have themselves been exited. + */ + Py_CONTEXT_EVENT_ENTER, + /* + * A context is about to be exited, which will cause the "current context" + * to switch back to what it was before the context was entered. The + * object passed to the watch callback is the still-current + * contextvars.Context object. + */ + Py_CONTEXT_EVENT_EXIT, } PyContextEvent; /* - * Callback to be invoked when a context object is entered or exited. - * - * The callback is invoked with the event and a reference to - * the context after its entered and before its exited. + * Context object watcher callback function. The object passed to the callback + * is event-specific; see PyContextEvent for details. * * if the callback returns with an exception set, it must return -1. Otherwise * it should return 0 diff --git a/Misc/NEWS.d/next/Documentation/2024-09-29-18-14-52.gh-issue-119333.7tinr0.rst b/Misc/NEWS.d/next/Documentation/2024-09-29-18-14-52.gh-issue-119333.7tinr0.rst new file mode 100644 index 000000000000000..69a5c764d05a2ea --- /dev/null +++ b/Misc/NEWS.d/next/Documentation/2024-09-29-18-14-52.gh-issue-119333.7tinr0.rst @@ -0,0 +1,3 @@ +Added definitions for :term:`context`, :term:`current context`, and +:term:`context management protocol`, updated related definitions to be +consistent, and expanded the documentation for :class:`contextvars.Context`. From 7a10cdec359750b5154490fa9e24475c90d05aab Mon Sep 17 00:00:00 2001 From: Adam Turner <9087854+AA-Turner@users.noreply.github.com> Date: Thu, 10 Oct 2024 10:34:55 +0100 Subject: [PATCH 11/80] Pin the doctest workflow to Ubuntu 22.04 (#125236) --- .github/workflows/reusable-docs.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/reusable-docs.yml b/.github/workflows/reusable-docs.yml index 4b021b3dc32f154..7755cb431bd3015 100644 --- a/.github/workflows/reusable-docs.yml +++ b/.github/workflows/reusable-docs.yml @@ -95,7 +95,7 @@ jobs: # Run "doctest" on HEAD as new syntax doesn't exist in the latest stable release doctest: name: 'Doctest' - runs-on: ubuntu-latest + runs-on: ubuntu-22.04 timeout-minutes: 60 steps: - uses: actions/checkout@v4 From 1639d934b9180c278ac9c02be43cbb1beada494a Mon Sep 17 00:00:00 2001 From: Victor Stinner Date: Thu, 10 Oct 2024 12:11:06 +0200 Subject: [PATCH 12/80] gh-125196: Add a free list to PyUnicodeWriter (#125227) --- Include/internal/pycore_freelist_state.h | 2 ++ Objects/object.c | 1 + Objects/unicodeobject.c | 13 +++++++++---- 3 files changed, 12 insertions(+), 4 deletions(-) diff --git a/Include/internal/pycore_freelist_state.h b/Include/internal/pycore_freelist_state.h index 762c583ce94e9a0..4e04cf431e0b31e 100644 --- a/Include/internal/pycore_freelist_state.h +++ b/Include/internal/pycore_freelist_state.h @@ -20,6 +20,7 @@ extern "C" { # define Py_async_gen_asends_MAXFREELIST 80 # define Py_futureiters_MAXFREELIST 255 # define Py_object_stack_chunks_MAXFREELIST 4 +# define Py_unicode_writers_MAXFREELIST 1 // A generic freelist of either PyObjects or other data structures. struct _Py_freelist { @@ -44,6 +45,7 @@ struct _Py_freelists { struct _Py_freelist async_gen_asends; struct _Py_freelist futureiters; struct _Py_freelist object_stack_chunks; + struct _Py_freelist unicode_writers; }; #ifdef __cplusplus diff --git a/Objects/object.c b/Objects/object.c index 8d809158a6c1daa..a97a900890320d8 100644 --- a/Objects/object.c +++ b/Objects/object.c @@ -862,6 +862,7 @@ _PyObject_ClearFreeLists(struct _Py_freelists *freelists, int is_finalization) // stacks during GC, so emptying the free-list is counterproductive. clear_freelist(&freelists->object_stack_chunks, 1, PyMem_RawFree); } + clear_freelist(&freelists->unicode_writers, is_finalization, PyMem_Free); } /* diff --git a/Objects/unicodeobject.c b/Objects/unicodeobject.c index 93c1025b6a3cae0..b94a74c2c688a99 100644 --- a/Objects/unicodeobject.c +++ b/Objects/unicodeobject.c @@ -46,6 +46,7 @@ OF OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. #include "pycore_codecs.h" // _PyCodec_Lookup() #include "pycore_critical_section.h" // Py_*_CRITICAL_SECTION_SEQUENCE_FAST #include "pycore_format.h" // F_LJUST +#include "pycore_freelist.h" // _Py_FREELIST_FREE(), _Py_FREELIST_POP() #include "pycore_initconfig.h" // _PyStatus_OK() #include "pycore_interp.h" // PyInterpreterState.fs_codec #include "pycore_long.h" // _PyLong_FormatWriter() @@ -13436,9 +13437,13 @@ PyUnicodeWriter_Create(Py_ssize_t length) } const size_t size = sizeof(_PyUnicodeWriter); - PyUnicodeWriter *pub_writer = (PyUnicodeWriter *)PyMem_Malloc(size); + PyUnicodeWriter *pub_writer; + pub_writer = _Py_FREELIST_POP_MEM(unicode_writers); if (pub_writer == NULL) { - return (PyUnicodeWriter *)PyErr_NoMemory(); + pub_writer = (PyUnicodeWriter *)PyMem_Malloc(size); + if (pub_writer == NULL) { + return (PyUnicodeWriter *)PyErr_NoMemory(); + } } _PyUnicodeWriter *writer = (_PyUnicodeWriter *)pub_writer; @@ -13459,7 +13464,7 @@ void PyUnicodeWriter_Discard(PyUnicodeWriter *writer) return; } _PyUnicodeWriter_Dealloc((_PyUnicodeWriter*)writer); - PyMem_Free(writer); + _Py_FREELIST_FREE(unicode_writers, writer, PyMem_Free); } @@ -13881,7 +13886,7 @@ PyUnicodeWriter_Finish(PyUnicodeWriter *writer) { PyObject *str = _PyUnicodeWriter_Finish((_PyUnicodeWriter*)writer); assert(((_PyUnicodeWriter*)writer)->buffer == NULL); - PyMem_Free(writer); + _Py_FREELIST_FREE(unicode_writers, writer, PyMem_Free); return str; } From 82dfdc328779778295075d791ee30a0308fb9af4 Mon Sep 17 00:00:00 2001 From: Victor Stinner Date: Thu, 10 Oct 2024 12:20:53 +0200 Subject: [PATCH 13/80] gh-125196: Use PyUnicodeWriter for repr(tuple) (#125242) --- Objects/tupleobject.c | 50 +++++++++++++++++++++---------------------- 1 file changed, 24 insertions(+), 26 deletions(-) diff --git a/Objects/tupleobject.c b/Objects/tupleobject.c index 471346979180521..f3132e0933ac30f 100644 --- a/Objects/tupleobject.c +++ b/Objects/tupleobject.c @@ -232,56 +232,54 @@ tuple_repr(PyObject *self) return res > 0 ? PyUnicode_FromString("(...)") : NULL; } - _PyUnicodeWriter writer; - _PyUnicodeWriter_Init(&writer); - writer.overallocate = 1; + Py_ssize_t prealloc; if (n > 1) { - /* "(" + "1" + ", 2" * (len - 1) + ")" */ - writer.min_length = 1 + 1 + (2 + 1) * (n - 1) + 1; + // "(" + "1" + ", 2" * (len - 1) + ")" + prealloc = 1 + 1 + (2 + 1) * (n - 1) + 1; } else { - /* "(1,)" */ - writer.min_length = 4; + // "(1,)" + prealloc = 4; + } + PyUnicodeWriter *writer = PyUnicodeWriter_Create(prealloc); + if (writer == NULL) { + goto error; } - if (_PyUnicodeWriter_WriteChar(&writer, '(') < 0) + if (PyUnicodeWriter_WriteChar(writer, '(') < 0) { goto error; + } /* Do repr() on each element. */ for (Py_ssize_t i = 0; i < n; ++i) { - PyObject *s; - if (i > 0) { - if (_PyUnicodeWriter_WriteASCIIString(&writer, ", ", 2) < 0) + if (PyUnicodeWriter_WriteChar(writer, ',') < 0) { goto error; + } + if (PyUnicodeWriter_WriteChar(writer, ' ') < 0) { + goto error; + } } - s = PyObject_Repr(v->ob_item[i]); - if (s == NULL) - goto error; - - if (_PyUnicodeWriter_WriteStr(&writer, s) < 0) { - Py_DECREF(s); + if (PyUnicodeWriter_WriteRepr(writer, v->ob_item[i]) < 0) { goto error; } - Py_DECREF(s); } - writer.overallocate = 0; - if (n > 1) { - if (_PyUnicodeWriter_WriteChar(&writer, ')') < 0) + if (n == 1) { + if (PyUnicodeWriter_WriteChar(writer, ',') < 0) { goto error; + } } - else { - if (_PyUnicodeWriter_WriteASCIIString(&writer, ",)", 2) < 0) - goto error; + if (PyUnicodeWriter_WriteChar(writer, ')') < 0) { + goto error; } Py_ReprLeave((PyObject *)v); - return _PyUnicodeWriter_Finish(&writer); + return PyUnicodeWriter_Finish(writer); error: - _PyUnicodeWriter_Dealloc(&writer); + PyUnicodeWriter_Discard(writer); Py_ReprLeave((PyObject *)v); return NULL; } From f9ae5d1cee2f8927a71cd4f1f66f10050a4f658a Mon Sep 17 00:00:00 2001 From: Irit Katriel <1055913+iritkatriel@users.noreply.github.com> Date: Thu, 10 Oct 2024 11:21:12 +0100 Subject: [PATCH 14/80] gh-71784: [doc] add usage examples for traceback.TracebackException (#125189) Co-authored-by: Alex Waygood --- Doc/library/traceback.rst | 133 +++++++++++++++++++++++++++++++++----- 1 file changed, 118 insertions(+), 15 deletions(-) diff --git a/Doc/library/traceback.rst b/Doc/library/traceback.rst index 401e12be45f418d..100a92b73d5497a 100644 --- a/Doc/library/traceback.rst +++ b/Doc/library/traceback.rst @@ -8,11 +8,15 @@ -------------- -This module provides a standard interface to extract, format and print stack -traces of Python programs. It exactly mimics the behavior of the Python -interpreter when it prints a stack trace. This is useful when you want to print -stack traces under program control, such as in a "wrapper" around the -interpreter. +This module provides a standard interface to extract, format and print +stack traces of Python programs. It is more flexible than the +interpreter's default traceback display, and therefore makes it +possible to configure certain aspects of the output. Finally, +it contains a utility for capturing enough information about an +exception to print it later, without the need to save a reference +to the actual exception. Since exceptions can be the roots of large +objects graph, this utility can significantly improve +memory management. .. index:: pair: object; traceback @@ -29,7 +33,20 @@ which are assigned to the :attr:`~BaseException.__traceback__` field of Module :mod:`pdb` Interactive source code debugger for Python programs. -The module defines the following functions: +The module's API can be divided into two parts: + +* Module-level functions offering basic functionality, which are useful for interactive + inspection of exceptions and tracebacks. + +* :class:`TracebackException` class and its helper classes + :class:`StackSummary` and :class:`FrameSummary`. These offer both more + flexibility in the output generated and the ability to store the information + necessary for later formatting without holding references to actual exception + and traceback objects. + + +Module-Level Functions +---------------------- .. function:: print_tb(tb, limit=None, file=None) @@ -237,7 +254,6 @@ The module defines the following functions: .. versionadded:: 3.5 -The module also defines the following classes: :class:`!TracebackException` Objects ------------------------------------ @@ -245,12 +261,17 @@ The module also defines the following classes: .. versionadded:: 3.5 :class:`!TracebackException` objects are created from actual exceptions to -capture data for later printing in a lightweight fashion. +capture data for later printing. They offer a more lightweight method of +storing this information by avoiding holding references to +:ref:`traceback` and :ref:`frame` objects +In addition, they expose more options to configure the output compared to +the module-level functions described above. .. class:: TracebackException(exc_type, exc_value, exc_traceback, *, limit=None, lookup_lines=True, capture_locals=False, compact=False, max_group_width=15, max_group_depth=10) - Capture an exception for later rendering. *limit*, *lookup_lines* and - *capture_locals* are as for the :class:`StackSummary` class. + Capture an exception for later rendering. The meaning of *limit*, + *lookup_lines* and *capture_locals* are as for the :class:`StackSummary` + class. If *compact* is true, only data that is required by :class:`!TracebackException`'s :meth:`format` method @@ -509,8 +530,8 @@ in a :ref:`traceback `. .. _traceback-example: -Traceback Examples ------------------- +Examples of Using the Module-Level Functions +-------------------------------------------- This simple example implements a basic read-eval-print loop, similar to (but less useful than) the standard Python interactive interpreter loop. For a more @@ -549,8 +570,7 @@ exception and traceback: try: lumberjack() - except IndexError: - exc = sys.exception() + except IndexError as exc: print("*** print_tb:") traceback.print_tb(exc.__traceback__, limit=1, file=sys.stdout) print("*** print_exception:") @@ -653,5 +673,88 @@ This last example demonstrates the final few formatting functions: [' File "spam.py", line 3, in \n spam.eggs()\n', ' File "eggs.py", line 42, in eggs\n return "bacon"\n'] >>> an_error = IndexError('tuple index out of range') - >>> traceback.format_exception_only(type(an_error), an_error) + >>> traceback.format_exception_only(an_error) ['IndexError: tuple index out of range\n'] + + +Examples of Using :class:`TracebackException` +--------------------------------------------- + +With the helper class, we have more options:: + + >>> import sys + >>> from traceback import TracebackException + >>> + >>> def lumberjack(): + ... bright_side_of_life() + ... + >>> def bright_side_of_life(): + ... t = "bright", "side", "of", "life" + ... return t[5] + ... + >>> try: + ... lumberjack() + ... except IndexError as e: + ... exc = e + ... + >>> try: + ... try: + ... lumberjack() + ... except: + ... 1/0 + ... except Exception as e: + ... chained_exc = e + ... + >>> # limit works as with the module-level functions + >>> TracebackException.from_exception(exc, limit=-2).print() + Traceback (most recent call last): + File "", line 6, in lumberjack + bright_side_of_life() + ~~~~~~~~~~~~~~~~~~~^^ + File "", line 10, in bright_side_of_life + return t[5] + ~^^^ + IndexError: tuple index out of range + + >>> # capture_locals adds local variables in frames + >>> TracebackException.from_exception(exc, limit=-2, capture_locals=True).print() + Traceback (most recent call last): + File "", line 6, in lumberjack + bright_side_of_life() + ~~~~~~~~~~~~~~~~~~~^^ + File "", line 10, in bright_side_of_life + return t[5] + ~^^^ + t = ("bright", "side", "of", "life") + IndexError: tuple index out of range + + >>> # The *chain* kwarg to print() controls whether chained + >>> # exceptions are displayed + >>> TracebackException.from_exception(chained_exc).print() + Traceback (most recent call last): + File "", line 4, in + lumberjack() + ~~~~~~~~~~^^ + File "", line 7, in lumberjack + bright_side_of_life() + ~~~~~~~~~~~~~~~~~~~^^ + File "", line 11, in bright_side_of_life + return t[5] + ~^^^ + IndexError: tuple index out of range + + During handling of the above exception, another exception occurred: + + Traceback (most recent call last): + File "", line 6, in + 1/0 + ~^~ + ZeroDivisionError: division by zero + + >>> TracebackException.from_exception(chained_exc).print(chain=False) + Traceback (most recent call last): + File "", line 6, in + 1/0 + ~^~ + ZeroDivisionError: division by zero + From e4cab488d4445e8444932f3bed1c329c0d9e5038 Mon Sep 17 00:00:00 2001 From: Donghee Na Date: Thu, 10 Oct 2024 21:39:53 +0900 Subject: [PATCH 15/80] gh-124471: Set name for unnamed reusable workflow (#124475) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Co-authored-by: Adam Turner <9087854+AA-Turner@users.noreply.github.com> Co-authored-by: Sviatoslav Sydorenko (Святослав Сидоренко) --- .github/workflows/reusable-change-detection.yml | 4 +--- .github/workflows/reusable-docs.yml | 2 +- .github/workflows/reusable-macos.yml | 2 ++ .github/workflows/reusable-tsan.yml | 2 ++ .github/workflows/reusable-ubuntu.yml | 2 ++ .github/workflows/reusable-wasi.yml | 2 ++ .github/workflows/reusable-windows-msi.yml | 2 +- .github/workflows/reusable-windows.yml | 2 ++ 8 files changed, 13 insertions(+), 5 deletions(-) diff --git a/.github/workflows/reusable-change-detection.yml b/.github/workflows/reusable-change-detection.yml index 6f599f75547ceb1..5cd6fb39f1e12f4 100644 --- a/.github/workflows/reusable-change-detection.yml +++ b/.github/workflows/reusable-change-detection.yml @@ -1,6 +1,4 @@ ---- - -name: Change detection +name: Reusable change detection on: # yamllint disable-line rule:truthy workflow_call: diff --git a/.github/workflows/reusable-docs.yml b/.github/workflows/reusable-docs.yml index 7755cb431bd3015..3809f24dcc977e6 100644 --- a/.github/workflows/reusable-docs.yml +++ b/.github/workflows/reusable-docs.yml @@ -1,4 +1,4 @@ -name: Docs +name: Reusable Docs on: workflow_call: diff --git a/.github/workflows/reusable-macos.yml b/.github/workflows/reusable-macos.yml index b4227545887ad16..b3a160fbbf8053a 100644 --- a/.github/workflows/reusable-macos.yml +++ b/.github/workflows/reusable-macos.yml @@ -1,3 +1,5 @@ +name: Reusable macOS + on: workflow_call: inputs: diff --git a/.github/workflows/reusable-tsan.yml b/.github/workflows/reusable-tsan.yml index 27f4eacd86fd958..f4c976ca996410e 100644 --- a/.github/workflows/reusable-tsan.yml +++ b/.github/workflows/reusable-tsan.yml @@ -1,3 +1,5 @@ +name: Reusable Thread Sanitizer + on: workflow_call: inputs: diff --git a/.github/workflows/reusable-ubuntu.yml b/.github/workflows/reusable-ubuntu.yml index 769f1210de4d3cd..0cf40ba8a9b03b1 100644 --- a/.github/workflows/reusable-ubuntu.yml +++ b/.github/workflows/reusable-ubuntu.yml @@ -1,3 +1,5 @@ +name: Reusable Ubuntu + on: workflow_call: inputs: diff --git a/.github/workflows/reusable-wasi.yml b/.github/workflows/reusable-wasi.yml index 1b1a68c0badc763..4c8137c958a3128 100644 --- a/.github/workflows/reusable-wasi.yml +++ b/.github/workflows/reusable-wasi.yml @@ -1,3 +1,5 @@ +name: Reusable WASI + on: workflow_call: inputs: diff --git a/.github/workflows/reusable-windows-msi.yml b/.github/workflows/reusable-windows-msi.yml index fc34ab7c3eb1f25..abdb1a1982fef8d 100644 --- a/.github/workflows/reusable-windows-msi.yml +++ b/.github/workflows/reusable-windows-msi.yml @@ -1,4 +1,4 @@ -name: TestsMSI +name: Reusable Windows MSI on: workflow_call: diff --git a/.github/workflows/reusable-windows.yml b/.github/workflows/reusable-windows.yml index e9c3c8e05a801c4..dcfc62d7f5d1456 100644 --- a/.github/workflows/reusable-windows.yml +++ b/.github/workflows/reusable-windows.yml @@ -1,3 +1,5 @@ +name: Reusable Windows + on: workflow_call: inputs: From 87d7315ac57250046372b0d9ae4619ba619c8c87 Mon Sep 17 00:00:00 2001 From: Sergey B Kirpichev Date: Thu, 10 Oct 2024 15:42:03 +0300 Subject: [PATCH 16/80] gh-125118: don't copy arbitrary values to _Bool in the struct module (GH-125169) memcopy'ing arbitrary values to _Bool variable triggers undefined behaviour. Avoid this. We assume that `false` is represented by all zero bytes. Credits to Alex Gaynor. Co-authored-by: Sam Gross Co-authored-by: Victor Stinner Co-authored-by: Petr Viktorin --- Lib/test/test_struct.py | 3 +++ .../Library/2024-10-09-07-09-00.gh-issue-125118.J9rQ1S.rst | 1 + Modules/_struct.c | 5 ++--- 3 files changed, 6 insertions(+), 3 deletions(-) create mode 100644 Misc/NEWS.d/next/Library/2024-10-09-07-09-00.gh-issue-125118.J9rQ1S.rst diff --git a/Lib/test/test_struct.py b/Lib/test/test_struct.py index e3193c7863fbaef..04ec3ed0837c82b 100644 --- a/Lib/test/test_struct.py +++ b/Lib/test/test_struct.py @@ -540,6 +540,9 @@ def __bool__(self): for c in [b'\x01', b'\x7f', b'\xff', b'\x0f', b'\xf0']: self.assertTrue(struct.unpack('>?', c)[0]) + self.assertTrue(struct.unpack(' Date: Thu, 10 Oct 2024 21:57:13 +0900 Subject: [PATCH 17/80] gh-124153: Simplify PyType_GetBaseByToken (GH-124488) --- Objects/typeobject.c | 123 +++++++++++++++---------------------------- 1 file changed, 43 insertions(+), 80 deletions(-) diff --git a/Objects/typeobject.c b/Objects/typeobject.c index 6484e8921f8122a..5380633fa1149ec 100644 --- a/Objects/typeobject.c +++ b/Objects/typeobject.c @@ -5269,11 +5269,10 @@ PyType_GetModuleByDef(PyTypeObject *type, PyModuleDef *def) static PyTypeObject * -get_base_by_token_recursive(PyTypeObject *type, void *token) +get_base_by_token_recursive(PyObject *bases, void *token) { - assert(PyType_GetSlot(type, Py_tp_token) != token); - PyObject *bases = lookup_tp_bases(type); assert(bases != NULL); + PyTypeObject *res = NULL; Py_ssize_t n = PyTuple_GET_SIZE(bases); for (Py_ssize_t i = 0; i < n; i++) { PyTypeObject *base = _PyType_CAST(PyTuple_GET_ITEM(bases, i)); @@ -5281,112 +5280,76 @@ get_base_by_token_recursive(PyTypeObject *type, void *token) continue; } if (((PyHeapTypeObject*)base)->ht_token == token) { - return base; + res = base; + break; } - base = get_base_by_token_recursive(base, token); + base = get_base_by_token_recursive(lookup_tp_bases(base), token); if (base != NULL) { - return base; + res = base; + break; } } - return NULL; + return res; // Prefer to return recursively from one place } -static inline PyTypeObject * -get_base_by_token_from_mro(PyTypeObject *type, void *token) +int +PyType_GetBaseByToken(PyTypeObject *type, void *token, PyTypeObject **result) { - // Bypass lookup_tp_mro() as PyType_IsSubtype() does - PyObject *mro = type->tp_mro; - assert(mro != NULL); - assert(PyTuple_Check(mro)); - // mro_invoke() ensures that the type MRO cannot be empty. - assert(PyTuple_GET_SIZE(mro) >= 1); - // Also, the first item in the MRO is the type itself, which is supposed - // to be already checked by the caller. We skip it in the loop. - assert(PyTuple_GET_ITEM(mro, 0) == (PyObject *)type); - assert(PyType_GetSlot(type, Py_tp_token) != token); - - Py_ssize_t n = PyTuple_GET_SIZE(mro); - for (Py_ssize_t i = 1; i < n; i++) { - PyTypeObject *base = _PyType_CAST(PyTuple_GET_ITEM(mro, i)); - if (!_PyType_HasFeature(base, Py_TPFLAGS_HEAPTYPE)) { - continue; - } - if (((PyHeapTypeObject*)base)->ht_token == token) { - return base; - } + if (result != NULL) { + *result = NULL; } - return NULL; -} -static int -check_base_by_token(PyTypeObject *type, void *token) { - // Chain the branches, which will be optimized exclusive here if (token == NULL) { PyErr_Format(PyExc_SystemError, "PyType_GetBaseByToken called with token=NULL"); return -1; } - else if (!PyType_Check(type)) { + if (!PyType_Check(type)) { PyErr_Format(PyExc_TypeError, "expected a type, got a '%T' object", type); return -1; } - else if (!_PyType_HasFeature(type, Py_TPFLAGS_HEAPTYPE)) { - return 0; - } - else if (((PyHeapTypeObject*)type)->ht_token == token) { - return 1; - } - else if (type->tp_mro != NULL) { - // This will not be inlined - return get_base_by_token_from_mro(type, token) ? 1 : 0; - } - else { - return get_base_by_token_recursive(type, token) ? 1 : 0; - } -} -int -PyType_GetBaseByToken(PyTypeObject *type, void *token, PyTypeObject **result) -{ - if (result == NULL) { - // If the `result` is checked only once here, the subsequent - // branches will become trivial to optimize. - return check_base_by_token(type, token); - } - if (token == NULL || !PyType_Check(type)) { - *result = NULL; - return check_base_by_token(type, token); - } - - // Chain the branches, which will be optimized exclusive here - PyTypeObject *base; if (!_PyType_HasFeature(type, Py_TPFLAGS_HEAPTYPE)) { // No static type has a heaptype superclass, // which is ensured by type_ready_mro(). - *result = NULL; return 0; } - else if (((PyHeapTypeObject*)type)->ht_token == token) { - *result = (PyTypeObject *)Py_NewRef(type); + if (((PyHeapTypeObject*)type)->ht_token == token) { +found: + if (result != NULL) { + *result = (PyTypeObject *)Py_NewRef(type); + } return 1; } - else if (type->tp_mro != NULL) { - // Expect this to be inlined - base = get_base_by_token_from_mro(type, token); - } - else { - base = get_base_by_token_recursive(type, token); - } - if (base != NULL) { - *result = (PyTypeObject *)Py_NewRef(base); - return 1; - } - else { - *result = NULL; + PyObject *mro = type->tp_mro; // No lookup, following PyType_IsSubtype() + if (mro == NULL) { + PyTypeObject *base; + base = get_base_by_token_recursive(lookup_tp_bases(type), token); + if (base != NULL) { + // Copying the given type can cause a slowdown, + // unlike the overwrite below. + type = base; + goto found; + } return 0; } + // mro_invoke() ensures that the type MRO cannot be empty. + assert(PyTuple_GET_SIZE(mro) >= 1); + // Also, the first item in the MRO is the type itself, which + // we already checked above. We skip it in the loop. + assert(PyTuple_GET_ITEM(mro, 0) == (PyObject *)type); + Py_ssize_t n = PyTuple_GET_SIZE(mro); + for (Py_ssize_t i = 1; i < n; i++) { + PyTypeObject *base = (PyTypeObject *)PyTuple_GET_ITEM(mro, i); + if (_PyType_HasFeature(base, Py_TPFLAGS_HEAPTYPE) + && ((PyHeapTypeObject*)base)->ht_token == token) { + type = base; + goto found; + } + } + return 0; } From c914212474792312bb125211bae5719650fe2f58 Mon Sep 17 00:00:00 2001 From: Victor Stinner Date: Thu, 10 Oct 2024 15:33:00 +0200 Subject: [PATCH 18/80] gh-125196: Use PyUnicodeWriter for JSON encoder (#125249) Replace the private _PyUnicodeWriter with the public PyUnicodeWriter. --- Modules/_json.c | 87 ++++++++++++++++++++++++++++--------------------- 1 file changed, 49 insertions(+), 38 deletions(-) diff --git a/Modules/_json.c b/Modules/_json.c index 9e29de0f22465f0..ce0093ab431d05b 100644 --- a/Modules/_json.c +++ b/Modules/_json.c @@ -86,11 +86,11 @@ encoder_dealloc(PyObject *self); static int encoder_clear(PyEncoderObject *self); static int -encoder_listencode_list(PyEncoderObject *s, _PyUnicodeWriter *writer, PyObject *seq, PyObject *newline_indent); +encoder_listencode_list(PyEncoderObject *s, PyUnicodeWriter *writer, PyObject *seq, PyObject *newline_indent); static int -encoder_listencode_obj(PyEncoderObject *s, _PyUnicodeWriter *writer, PyObject *obj, PyObject *newline_indent); +encoder_listencode_obj(PyEncoderObject *s, PyUnicodeWriter *writer, PyObject *obj, PyObject *newline_indent); static int -encoder_listencode_dict(PyEncoderObject *s, _PyUnicodeWriter *writer, PyObject *dct, PyObject *newline_indent); +encoder_listencode_dict(PyEncoderObject *s, PyUnicodeWriter *writer, PyObject *dct, PyObject *newline_indent); static PyObject * _encoded_const(PyObject *obj); static void @@ -1268,38 +1268,39 @@ encoder_call(PyEncoderObject *self, PyObject *args, PyObject *kwds) { /* Python callable interface to encode_listencode_obj */ static char *kwlist[] = {"obj", "_current_indent_level", NULL}; - PyObject *obj, *result; + PyObject *obj; Py_ssize_t indent_level; - _PyUnicodeWriter writer; if (!PyArg_ParseTupleAndKeywords(args, kwds, "On:_iterencode", kwlist, - &obj, &indent_level)) + &obj, &indent_level)) return NULL; - _PyUnicodeWriter_Init(&writer); - writer.overallocate = 1; + PyUnicodeWriter *writer = PyUnicodeWriter_Create(0); + if (writer == NULL) { + return NULL; + } PyObject *newline_indent = NULL; if (self->indent != Py_None) { newline_indent = _create_newline_indent(self->indent, indent_level); if (newline_indent == NULL) { - _PyUnicodeWriter_Dealloc(&writer); + PyUnicodeWriter_Discard(writer); return NULL; } } - if (encoder_listencode_obj(self, &writer, obj, newline_indent)) { - _PyUnicodeWriter_Dealloc(&writer); + if (encoder_listencode_obj(self, writer, obj, newline_indent)) { + PyUnicodeWriter_Discard(writer); Py_XDECREF(newline_indent); return NULL; } Py_XDECREF(newline_indent); - result = PyTuple_New(1); - if (result == NULL || - PyTuple_SetItem(result, 0, _PyUnicodeWriter_Finish(&writer)) < 0) { - Py_XDECREF(result); + PyObject *str = PyUnicodeWriter_Finish(writer); + if (str == NULL) { return NULL; } + PyObject *result = PyTuple_Pack(1, str); + Py_DECREF(str); return result; } @@ -1370,16 +1371,16 @@ encoder_encode_string(PyEncoderObject *s, PyObject *obj) } static int -_steal_accumulate(_PyUnicodeWriter *writer, PyObject *stolen) +_steal_accumulate(PyUnicodeWriter *writer, PyObject *stolen) { /* Append stolen and then decrement its reference count */ - int rval = _PyUnicodeWriter_WriteStr(writer, stolen); + int rval = PyUnicodeWriter_WriteStr(writer, stolen); Py_DECREF(stolen); return rval; } static int -encoder_listencode_obj(PyEncoderObject *s, _PyUnicodeWriter *writer, +encoder_listencode_obj(PyEncoderObject *s, PyUnicodeWriter *writer, PyObject *obj, PyObject *newline_indent) { /* Encode Python object obj to a JSON term */ @@ -1387,13 +1388,13 @@ encoder_listencode_obj(PyEncoderObject *s, _PyUnicodeWriter *writer, int rv; if (obj == Py_None) { - return _PyUnicodeWriter_WriteASCIIString(writer, "null", 4); + return PyUnicodeWriter_WriteUTF8(writer, "null", 4); } else if (obj == Py_True) { - return _PyUnicodeWriter_WriteASCIIString(writer, "true", 4); + return PyUnicodeWriter_WriteUTF8(writer, "true", 4); } else if (obj == Py_False) { - return _PyUnicodeWriter_WriteASCIIString(writer, "false", 5); + return PyUnicodeWriter_WriteUTF8(writer, "false", 5); } else if (PyUnicode_Check(obj)) { PyObject *encoded = encoder_encode_string(s, obj); @@ -1402,6 +1403,10 @@ encoder_listencode_obj(PyEncoderObject *s, _PyUnicodeWriter *writer, return _steal_accumulate(writer, encoded); } else if (PyLong_Check(obj)) { + if (PyLong_CheckExact(obj)) { + // Fast-path for exact integers + return PyUnicodeWriter_WriteRepr(writer, obj); + } PyObject *encoded = PyLong_Type.tp_repr(obj); if (encoded == NULL) return -1; @@ -1478,7 +1483,7 @@ encoder_listencode_obj(PyEncoderObject *s, _PyUnicodeWriter *writer, } static int -encoder_encode_key_value(PyEncoderObject *s, _PyUnicodeWriter *writer, bool *first, +encoder_encode_key_value(PyEncoderObject *s, PyUnicodeWriter *writer, bool *first, PyObject *dct, PyObject *key, PyObject *value, PyObject *newline_indent, PyObject *item_separator) @@ -1518,7 +1523,7 @@ encoder_encode_key_value(PyEncoderObject *s, _PyUnicodeWriter *writer, bool *fir *first = false; } else { - if (_PyUnicodeWriter_WriteStr(writer, item_separator) < 0) { + if (PyUnicodeWriter_WriteStr(writer, item_separator) < 0) { Py_DECREF(keystr); return -1; } @@ -1533,7 +1538,7 @@ encoder_encode_key_value(PyEncoderObject *s, _PyUnicodeWriter *writer, bool *fir if (_steal_accumulate(writer, encoded) < 0) { return -1; } - if (_PyUnicodeWriter_WriteStr(writer, s->key_separator) < 0) { + if (PyUnicodeWriter_WriteStr(writer, s->key_separator) < 0) { return -1; } if (encoder_listencode_obj(s, writer, value, newline_indent) < 0) { @@ -1544,7 +1549,7 @@ encoder_encode_key_value(PyEncoderObject *s, _PyUnicodeWriter *writer, bool *fir } static int -encoder_listencode_dict(PyEncoderObject *s, _PyUnicodeWriter *writer, +encoder_listencode_dict(PyEncoderObject *s, PyUnicodeWriter *writer, PyObject *dct, PyObject *newline_indent) { /* Encode Python dict dct a JSON term */ @@ -1555,8 +1560,10 @@ encoder_listencode_dict(PyEncoderObject *s, _PyUnicodeWriter *writer, PyObject *new_newline_indent = NULL; PyObject *separator_indent = NULL; - if (PyDict_GET_SIZE(dct) == 0) /* Fast path */ - return _PyUnicodeWriter_WriteASCIIString(writer, "{}", 2); + if (PyDict_GET_SIZE(dct) == 0) { + /* Fast path */ + return PyUnicodeWriter_WriteUTF8(writer, "{}", 2); + } if (s->markers != Py_None) { int has_key; @@ -1574,8 +1581,9 @@ encoder_listencode_dict(PyEncoderObject *s, _PyUnicodeWriter *writer, } } - if (_PyUnicodeWriter_WriteChar(writer, '{')) + if (PyUnicodeWriter_WriteChar(writer, '{')) { goto bail; + } PyObject *current_item_separator = s->item_separator; // borrowed reference if (s->indent != Py_None) { @@ -1589,7 +1597,7 @@ encoder_listencode_dict(PyEncoderObject *s, _PyUnicodeWriter *writer, } // update item separator with a borrowed reference current_item_separator = separator_indent; - if (_PyUnicodeWriter_WriteStr(writer, new_newline_indent) < 0) { + if (PyUnicodeWriter_WriteStr(writer, new_newline_indent) < 0) { goto bail; } } @@ -1635,13 +1643,14 @@ encoder_listencode_dict(PyEncoderObject *s, _PyUnicodeWriter *writer, Py_CLEAR(new_newline_indent); Py_CLEAR(separator_indent); - if (_PyUnicodeWriter_WriteStr(writer, newline_indent) < 0) { + if (PyUnicodeWriter_WriteStr(writer, newline_indent) < 0) { goto bail; } } - if (_PyUnicodeWriter_WriteChar(writer, '}')) + if (PyUnicodeWriter_WriteChar(writer, '}')) { goto bail; + } return 0; bail: @@ -1653,7 +1662,7 @@ encoder_listencode_dict(PyEncoderObject *s, _PyUnicodeWriter *writer, } static int -encoder_listencode_list(PyEncoderObject *s, _PyUnicodeWriter *writer, +encoder_listencode_list(PyEncoderObject *s, PyUnicodeWriter *writer, PyObject *seq, PyObject *newline_indent) { PyObject *ident = NULL; @@ -1668,7 +1677,7 @@ encoder_listencode_list(PyEncoderObject *s, _PyUnicodeWriter *writer, return -1; if (PySequence_Fast_GET_SIZE(s_fast) == 0) { Py_DECREF(s_fast); - return _PyUnicodeWriter_WriteASCIIString(writer, "[]", 2); + return PyUnicodeWriter_WriteUTF8(writer, "[]", 2); } if (s->markers != Py_None) { @@ -1687,8 +1696,9 @@ encoder_listencode_list(PyEncoderObject *s, _PyUnicodeWriter *writer, } } - if (_PyUnicodeWriter_WriteChar(writer, '[')) + if (PyUnicodeWriter_WriteChar(writer, '[')) { goto bail; + } PyObject *separator = s->item_separator; // borrowed reference if (s->indent != Py_None) { @@ -1697,7 +1707,7 @@ encoder_listencode_list(PyEncoderObject *s, _PyUnicodeWriter *writer, goto bail; } - if (_PyUnicodeWriter_WriteStr(writer, new_newline_indent) < 0) { + if (PyUnicodeWriter_WriteStr(writer, new_newline_indent) < 0) { goto bail; } @@ -1710,7 +1720,7 @@ encoder_listencode_list(PyEncoderObject *s, _PyUnicodeWriter *writer, for (i = 0; i < PySequence_Fast_GET_SIZE(s_fast); i++) { PyObject *obj = PySequence_Fast_GET_ITEM(s_fast, i); if (i) { - if (_PyUnicodeWriter_WriteStr(writer, separator) < 0) + if (PyUnicodeWriter_WriteStr(writer, separator) < 0) goto bail; } if (encoder_listencode_obj(s, writer, obj, new_newline_indent)) { @@ -1727,13 +1737,14 @@ encoder_listencode_list(PyEncoderObject *s, _PyUnicodeWriter *writer, if (s->indent != Py_None) { Py_CLEAR(new_newline_indent); Py_CLEAR(separator_indent); - if (_PyUnicodeWriter_WriteStr(writer, newline_indent) < 0) { + if (PyUnicodeWriter_WriteStr(writer, newline_indent) < 0) { goto bail; } } - if (_PyUnicodeWriter_WriteChar(writer, ']')) + if (PyUnicodeWriter_WriteChar(writer, ']')) { goto bail; + } Py_DECREF(s_fast); return 0; From 01fc3b34cc6994bc83b6540da3a8573e79dfbb56 Mon Sep 17 00:00:00 2001 From: Petr Viktorin Date: Thu, 10 Oct 2024 16:27:52 +0200 Subject: [PATCH 19/80] gh-124570: ctypes: Run some Structure tests on Union as well (GH-124976) - Move some Structure tests to test_structunion; use a common base test class + two subclasses to run them on Union too - Remove test_union for now as it's redundant Note: `test_simple_structs` & `test_simple_unions` are in the common file because they share `formats`. --- Lib/test/test_ctypes/test_structunion.py | 353 +++++++++++++++++++++++ Lib/test/test_ctypes/test_structures.py | 337 +--------------------- Lib/test/test_ctypes/test_unions.py | 35 --- 3 files changed, 368 insertions(+), 357 deletions(-) create mode 100644 Lib/test/test_ctypes/test_structunion.py delete mode 100644 Lib/test/test_ctypes/test_unions.py diff --git a/Lib/test/test_ctypes/test_structunion.py b/Lib/test/test_ctypes/test_structunion.py new file mode 100644 index 000000000000000..973ac3b2f1919da --- /dev/null +++ b/Lib/test/test_ctypes/test_structunion.py @@ -0,0 +1,353 @@ +"""Common tests for ctypes.Structure and ctypes.Union""" + +import unittest +from ctypes import (Structure, Union, POINTER, sizeof, alignment, + c_char, c_byte, c_ubyte, + c_short, c_ushort, c_int, c_uint, + c_long, c_ulong, c_longlong, c_ulonglong, c_float, c_double) +from ._support import (_CData, PyCStructType, UnionType, + Py_TPFLAGS_DISALLOW_INSTANTIATION, + Py_TPFLAGS_IMMUTABLETYPE) +from struct import calcsize + + +class StructUnionTestBase: + formats = {"c": c_char, + "b": c_byte, + "B": c_ubyte, + "h": c_short, + "H": c_ushort, + "i": c_int, + "I": c_uint, + "l": c_long, + "L": c_ulong, + "q": c_longlong, + "Q": c_ulonglong, + "f": c_float, + "d": c_double, + } + + def test_subclass(self): + class X(self.cls): + _fields_ = [("a", c_int)] + + class Y(X): + _fields_ = [("b", c_int)] + + class Z(X): + pass + + self.assertEqual(sizeof(X), sizeof(c_int)) + self.check_sizeof(Y, + struct_size=sizeof(c_int)*2, + union_size=sizeof(c_int)) + self.assertEqual(sizeof(Z), sizeof(c_int)) + self.assertEqual(X._fields_, [("a", c_int)]) + self.assertEqual(Y._fields_, [("b", c_int)]) + self.assertEqual(Z._fields_, [("a", c_int)]) + + def test_subclass_delayed(self): + class X(self.cls): + pass + self.assertEqual(sizeof(X), 0) + X._fields_ = [("a", c_int)] + + class Y(X): + pass + self.assertEqual(sizeof(Y), sizeof(X)) + Y._fields_ = [("b", c_int)] + + class Z(X): + pass + + self.assertEqual(sizeof(X), sizeof(c_int)) + self.check_sizeof(Y, + struct_size=sizeof(c_int)*2, + union_size=sizeof(c_int)) + self.assertEqual(sizeof(Z), sizeof(c_int)) + self.assertEqual(X._fields_, [("a", c_int)]) + self.assertEqual(Y._fields_, [("b", c_int)]) + self.assertEqual(Z._fields_, [("a", c_int)]) + + def test_inheritance_hierarchy(self): + self.assertEqual(self.cls.mro(), [self.cls, _CData, object]) + self.assertEqual(type(self.metacls), type) + + def test_type_flags(self): + for cls in self.cls, self.metacls: + with self.subTest(cls=cls): + self.assertTrue(cls.__flags__ & Py_TPFLAGS_IMMUTABLETYPE) + self.assertFalse(cls.__flags__ & Py_TPFLAGS_DISALLOW_INSTANTIATION) + + def test_metaclass_details(self): + # Abstract classes (whose metaclass __init__ was not called) can't be + # instantiated directly + NewClass = self.metacls.__new__(self.metacls, 'NewClass', + (self.cls,), {}) + for cls in self.cls, NewClass: + with self.subTest(cls=cls): + with self.assertRaisesRegex(TypeError, "abstract class"): + obj = cls() + + # Cannot call the metaclass __init__ more than once + class T(self.cls): + _fields_ = [("x", c_char), + ("y", c_char)] + with self.assertRaisesRegex(SystemError, "already initialized"): + self.metacls.__init__(T, 'ptr', (), {}) + + def test_alignment(self): + class X(self.cls): + _fields_ = [("x", c_char * 3)] + self.assertEqual(alignment(X), calcsize("s")) + self.assertEqual(sizeof(X), calcsize("3s")) + + class Y(self.cls): + _fields_ = [("x", c_char * 3), + ("y", c_int)] + self.assertEqual(alignment(Y), alignment(c_int)) + self.check_sizeof(Y, + struct_size=calcsize("3s i"), + union_size=max(calcsize("3s"), calcsize("i"))) + + class SI(self.cls): + _fields_ = [("a", X), + ("b", Y)] + self.assertEqual(alignment(SI), max(alignment(Y), alignment(X))) + self.check_sizeof(SI, + struct_size=calcsize("3s0i 3si 0i"), + union_size=max(calcsize("3s"), calcsize("i"))) + + class IS(self.cls): + _fields_ = [("b", Y), + ("a", X)] + + self.assertEqual(alignment(SI), max(alignment(X), alignment(Y))) + self.check_sizeof(IS, + struct_size=calcsize("3si 3s 0i"), + union_size=max(calcsize("3s"), calcsize("i"))) + + class XX(self.cls): + _fields_ = [("a", X), + ("b", X)] + self.assertEqual(alignment(XX), alignment(X)) + self.check_sizeof(XX, + struct_size=calcsize("3s 3s 0s"), + union_size=calcsize("3s")) + + def test_empty(self): + # I had problems with these + # + # Although these are pathological cases: Empty Structures! + class X(self.cls): + _fields_ = [] + + # Is this really the correct alignment, or should it be 0? + self.assertTrue(alignment(X) == 1) + self.assertTrue(sizeof(X) == 0) + + class XX(self.cls): + _fields_ = [("a", X), + ("b", X)] + + self.assertEqual(alignment(XX), 1) + self.assertEqual(sizeof(XX), 0) + + def test_fields(self): + # test the offset and size attributes of Structure/Union fields. + class X(self.cls): + _fields_ = [("x", c_int), + ("y", c_char)] + + self.assertEqual(X.x.offset, 0) + self.assertEqual(X.x.size, sizeof(c_int)) + + if self.cls == Structure: + self.assertEqual(X.y.offset, sizeof(c_int)) + else: + self.assertEqual(X.y.offset, 0) + self.assertEqual(X.y.size, sizeof(c_char)) + + # readonly + self.assertRaises((TypeError, AttributeError), setattr, X.x, "offset", 92) + self.assertRaises((TypeError, AttributeError), setattr, X.x, "size", 92) + + # XXX Should we check nested data types also? + # offset is always relative to the class... + + def test_invalid_field_types(self): + class POINT(self.cls): + pass + self.assertRaises(TypeError, setattr, POINT, "_fields_", [("x", 1), ("y", 2)]) + + def test_invalid_name(self): + # field name must be string + def declare_with_name(name): + class S(self.cls): + _fields_ = [(name, c_int)] + + self.assertRaises(TypeError, declare_with_name, b"x") + + def test_intarray_fields(self): + class SomeInts(self.cls): + _fields_ = [("a", c_int * 4)] + + # can use tuple to initialize array (but not list!) + self.assertEqual(SomeInts((1, 2)).a[:], [1, 2, 0, 0]) + self.assertEqual(SomeInts((1, 2)).a[::], [1, 2, 0, 0]) + self.assertEqual(SomeInts((1, 2)).a[::-1], [0, 0, 2, 1]) + self.assertEqual(SomeInts((1, 2)).a[::2], [1, 0]) + self.assertEqual(SomeInts((1, 2)).a[1:5:6], [2]) + self.assertEqual(SomeInts((1, 2)).a[6:4:-1], []) + self.assertEqual(SomeInts((1, 2, 3, 4)).a[:], [1, 2, 3, 4]) + self.assertEqual(SomeInts((1, 2, 3, 4)).a[::], [1, 2, 3, 4]) + # too long + # XXX Should raise ValueError?, not RuntimeError + self.assertRaises(RuntimeError, SomeInts, (1, 2, 3, 4, 5)) + + def test_huge_field_name(self): + # issue12881: segfault with large structure field names + def create_class(length): + class S(self.cls): + _fields_ = [('x' * length, c_int)] + + for length in [10 ** i for i in range(0, 8)]: + try: + create_class(length) + except MemoryError: + # MemoryErrors are OK, we just don't want to segfault + pass + + def test_abstract_class(self): + class X(self.cls): + _abstract_ = "something" + with self.assertRaisesRegex(TypeError, r"^abstract class$"): + X() + + def test_methods(self): + self.assertIn("in_dll", dir(type(self.cls))) + self.assertIn("from_address", dir(type(self.cls))) + self.assertIn("in_dll", dir(type(self.cls))) + + +class StructureTestCase(unittest.TestCase, StructUnionTestBase): + cls = Structure + metacls = PyCStructType + + def test_metaclass_name(self): + self.assertEqual(self.metacls.__name__, "PyCStructType") + + def check_sizeof(self, cls, *, struct_size, union_size): + self.assertEqual(sizeof(cls), struct_size) + + def test_simple_structs(self): + for code, tp in self.formats.items(): + class X(Structure): + _fields_ = [("x", c_char), + ("y", tp)] + self.assertEqual((sizeof(X), code), + (calcsize("c%c0%c" % (code, code)), code)) + + +class UnionTestCase(unittest.TestCase, StructUnionTestBase): + cls = Union + metacls = UnionType + + def test_metaclass_name(self): + self.assertEqual(self.metacls.__name__, "UnionType") + + def check_sizeof(self, cls, *, struct_size, union_size): + self.assertEqual(sizeof(cls), union_size) + + def test_simple_unions(self): + for code, tp in self.formats.items(): + class X(Union): + _fields_ = [("x", c_char), + ("y", tp)] + self.assertEqual((sizeof(X), code), + (calcsize("%c" % (code)), code)) + + +class PointerMemberTestBase: + def test(self): + # a Structure/Union with a POINTER field + class S(self.cls): + _fields_ = [("array", POINTER(c_int))] + + s = S() + # We can assign arrays of the correct type + s.array = (c_int * 3)(1, 2, 3) + items = [s.array[i] for i in range(3)] + self.assertEqual(items, [1, 2, 3]) + + s.array[0] = 42 + + items = [s.array[i] for i in range(3)] + self.assertEqual(items, [42, 2, 3]) + + s.array[0] = 1 + + items = [s.array[i] for i in range(3)] + self.assertEqual(items, [1, 2, 3]) + +class PointerMemberTestCase_Struct(unittest.TestCase, PointerMemberTestBase): + cls = Structure + + def test_none_to_pointer_fields(self): + class S(self.cls): + _fields_ = [("x", c_int), + ("p", POINTER(c_int))] + + s = S() + s.x = 12345678 + s.p = None + self.assertEqual(s.x, 12345678) + +class PointerMemberTestCase_Union(unittest.TestCase, PointerMemberTestBase): + cls = Union + + def test_none_to_pointer_fields(self): + class S(self.cls): + _fields_ = [("x", c_int), + ("p", POINTER(c_int))] + + s = S() + s.x = 12345678 + s.p = None + self.assertFalse(s.p) # NULL pointers are falsy + + +class TestRecursiveBase: + def test_contains_itself(self): + class Recursive(self.cls): + pass + + try: + Recursive._fields_ = [("next", Recursive)] + except AttributeError as details: + self.assertIn("Structure or union cannot contain itself", + str(details)) + else: + self.fail("Structure or union cannot contain itself") + + + def test_vice_versa(self): + class First(self.cls): + pass + class Second(self.cls): + pass + + First._fields_ = [("second", Second)] + + try: + Second._fields_ = [("first", First)] + except AttributeError as details: + self.assertIn("_fields_ is final", str(details)) + else: + self.fail("AttributeError not raised") + +class TestRecursiveStructure(unittest.TestCase, TestRecursiveBase): + cls = Structure + +class TestRecursiveUnion(unittest.TestCase, TestRecursiveBase): + cls = Union diff --git a/Lib/test/test_ctypes/test_structures.py b/Lib/test/test_ctypes/test_structures.py index 6cc09c8f2b5b59f..0ec238e04b74cd3 100644 --- a/Lib/test/test_ctypes/test_structures.py +++ b/Lib/test/test_ctypes/test_structures.py @@ -1,209 +1,24 @@ +"""Tests for ctypes.Structure + +Features common with Union should go in test_structunion.py instead. +""" + from platform import architecture as _architecture import struct import sys import unittest -from ctypes import (CDLL, Structure, Union, POINTER, sizeof, byref, alignment, +from ctypes import (CDLL, Structure, Union, POINTER, sizeof, byref, c_void_p, c_char, c_wchar, c_byte, c_ubyte, - c_uint8, c_uint16, c_uint32, - c_short, c_ushort, c_int, c_uint, - c_long, c_ulong, c_longlong, c_ulonglong, c_float, c_double) + c_uint8, c_uint16, c_uint32, c_int, c_uint, + c_long, c_ulong, c_longlong, c_float, c_double) from ctypes.util import find_library -from struct import calcsize from collections import namedtuple from test import support from test.support import import_helper _ctypes_test = import_helper.import_module("_ctypes_test") -from ._support import (_CData, PyCStructType, Py_TPFLAGS_DISALLOW_INSTANTIATION, - Py_TPFLAGS_IMMUTABLETYPE) - - -class SubclassesTest(unittest.TestCase): - def test_subclass(self): - class X(Structure): - _fields_ = [("a", c_int)] - - class Y(X): - _fields_ = [("b", c_int)] - - class Z(X): - pass - - self.assertEqual(sizeof(X), sizeof(c_int)) - self.assertEqual(sizeof(Y), sizeof(c_int)*2) - self.assertEqual(sizeof(Z), sizeof(c_int)) - self.assertEqual(X._fields_, [("a", c_int)]) - self.assertEqual(Y._fields_, [("b", c_int)]) - self.assertEqual(Z._fields_, [("a", c_int)]) - - def test_subclass_delayed(self): - class X(Structure): - pass - self.assertEqual(sizeof(X), 0) - X._fields_ = [("a", c_int)] - - class Y(X): - pass - self.assertEqual(sizeof(Y), sizeof(X)) - Y._fields_ = [("b", c_int)] - - class Z(X): - pass - - self.assertEqual(sizeof(X), sizeof(c_int)) - self.assertEqual(sizeof(Y), sizeof(c_int)*2) - self.assertEqual(sizeof(Z), sizeof(c_int)) - self.assertEqual(X._fields_, [("a", c_int)]) - self.assertEqual(Y._fields_, [("b", c_int)]) - self.assertEqual(Z._fields_, [("a", c_int)]) class StructureTestCase(unittest.TestCase): - formats = {"c": c_char, - "b": c_byte, - "B": c_ubyte, - "h": c_short, - "H": c_ushort, - "i": c_int, - "I": c_uint, - "l": c_long, - "L": c_ulong, - "q": c_longlong, - "Q": c_ulonglong, - "f": c_float, - "d": c_double, - } - - def test_inheritance_hierarchy(self): - self.assertEqual(Structure.mro(), [Structure, _CData, object]) - - self.assertEqual(PyCStructType.__name__, "PyCStructType") - self.assertEqual(type(PyCStructType), type) - - - def test_type_flags(self): - for cls in Structure, PyCStructType: - with self.subTest(cls=cls): - self.assertTrue(Structure.__flags__ & Py_TPFLAGS_IMMUTABLETYPE) - self.assertFalse(Structure.__flags__ & Py_TPFLAGS_DISALLOW_INSTANTIATION) - - def test_metaclass_details(self): - # Abstract classes (whose metaclass __init__ was not called) can't be - # instantiated directly - NewStructure = PyCStructType.__new__(PyCStructType, 'NewStructure', - (Structure,), {}) - for cls in Structure, NewStructure: - with self.subTest(cls=cls): - with self.assertRaisesRegex(TypeError, "abstract class"): - obj = cls() - - # Cannot call the metaclass __init__ more than once - class T(Structure): - _fields_ = [("x", c_char), - ("y", c_char)] - with self.assertRaisesRegex(SystemError, "already initialized"): - PyCStructType.__init__(T, 'ptr', (), {}) - - def test_simple_structs(self): - for code, tp in self.formats.items(): - class X(Structure): - _fields_ = [("x", c_char), - ("y", tp)] - self.assertEqual((sizeof(X), code), - (calcsize("c%c0%c" % (code, code)), code)) - - def test_unions(self): - for code, tp in self.formats.items(): - class X(Union): - _fields_ = [("x", c_char), - ("y", tp)] - self.assertEqual((sizeof(X), code), - (calcsize("%c" % (code)), code)) - - def test_struct_alignment(self): - class X(Structure): - _fields_ = [("x", c_char * 3)] - self.assertEqual(alignment(X), calcsize("s")) - self.assertEqual(sizeof(X), calcsize("3s")) - - class Y(Structure): - _fields_ = [("x", c_char * 3), - ("y", c_int)] - self.assertEqual(alignment(Y), alignment(c_int)) - self.assertEqual(sizeof(Y), calcsize("3si")) - - class SI(Structure): - _fields_ = [("a", X), - ("b", Y)] - self.assertEqual(alignment(SI), max(alignment(Y), alignment(X))) - self.assertEqual(sizeof(SI), calcsize("3s0i 3si 0i")) - - class IS(Structure): - _fields_ = [("b", Y), - ("a", X)] - - self.assertEqual(alignment(SI), max(alignment(X), alignment(Y))) - self.assertEqual(sizeof(IS), calcsize("3si 3s 0i")) - - class XX(Structure): - _fields_ = [("a", X), - ("b", X)] - self.assertEqual(alignment(XX), alignment(X)) - self.assertEqual(sizeof(XX), calcsize("3s 3s 0s")) - - def test_empty(self): - # I had problems with these - # - # Although these are pathological cases: Empty Structures! - class X(Structure): - _fields_ = [] - - class Y(Union): - _fields_ = [] - - # Is this really the correct alignment, or should it be 0? - self.assertTrue(alignment(X) == alignment(Y) == 1) - self.assertTrue(sizeof(X) == sizeof(Y) == 0) - - class XX(Structure): - _fields_ = [("a", X), - ("b", X)] - - self.assertEqual(alignment(XX), 1) - self.assertEqual(sizeof(XX), 0) - - def test_fields(self): - # test the offset and size attributes of Structure/Union fields. - class X(Structure): - _fields_ = [("x", c_int), - ("y", c_char)] - - self.assertEqual(X.x.offset, 0) - self.assertEqual(X.x.size, sizeof(c_int)) - - self.assertEqual(X.y.offset, sizeof(c_int)) - self.assertEqual(X.y.size, sizeof(c_char)) - - # readonly - self.assertRaises((TypeError, AttributeError), setattr, X.x, "offset", 92) - self.assertRaises((TypeError, AttributeError), setattr, X.x, "size", 92) - - class X(Union): - _fields_ = [("x", c_int), - ("y", c_char)] - - self.assertEqual(X.x.offset, 0) - self.assertEqual(X.x.size, sizeof(c_int)) - - self.assertEqual(X.y.offset, 0) - self.assertEqual(X.y.size, sizeof(c_char)) - - # readonly - self.assertRaises((TypeError, AttributeError), setattr, X.x, "offset", 92) - self.assertRaises((TypeError, AttributeError), setattr, X.x, "size", 92) - - # XXX Should we check nested data types also? - # offset is always relative to the class... - def test_packed(self): class X(Structure): _fields_ = [("a", c_byte), @@ -290,36 +105,6 @@ class POINT(Structure): pt = POINT(y=2, x=1) self.assertEqual((pt.x, pt.y), (1, 2)) - def test_invalid_field_types(self): - class POINT(Structure): - pass - self.assertRaises(TypeError, setattr, POINT, "_fields_", [("x", 1), ("y", 2)]) - - def test_invalid_name(self): - # field name must be string - def declare_with_name(name): - class S(Structure): - _fields_ = [(name, c_int)] - - self.assertRaises(TypeError, declare_with_name, b"x") - - def test_intarray_fields(self): - class SomeInts(Structure): - _fields_ = [("a", c_int * 4)] - - # can use tuple to initialize array (but not list!) - self.assertEqual(SomeInts((1, 2)).a[:], [1, 2, 0, 0]) - self.assertEqual(SomeInts((1, 2)).a[::], [1, 2, 0, 0]) - self.assertEqual(SomeInts((1, 2)).a[::-1], [0, 0, 2, 1]) - self.assertEqual(SomeInts((1, 2)).a[::2], [1, 0]) - self.assertEqual(SomeInts((1, 2)).a[1:5:6], [2]) - self.assertEqual(SomeInts((1, 2)).a[6:4:-1], []) - self.assertEqual(SomeInts((1, 2, 3, 4)).a[:], [1, 2, 3, 4]) - self.assertEqual(SomeInts((1, 2, 3, 4)).a[::], [1, 2, 3, 4]) - # too long - # XXX Should raise ValueError?, not RuntimeError - self.assertRaises(RuntimeError, SomeInts, (1, 2, 3, 4, 5)) - def test_nested_initializers(self): # test initializing nested structures class Phone(Structure): @@ -374,37 +159,12 @@ class Person(Structure): self.assertEqual(msg, "(Phone) TypeError: too many initializers") - def test_huge_field_name(self): - # issue12881: segfault with large structure field names - def create_class(length): - class S(Structure): - _fields_ = [('x' * length, c_int)] - - for length in [10 ** i for i in range(0, 8)]: - try: - create_class(length) - except MemoryError: - # MemoryErrors are OK, we just don't want to segfault - pass - def get_except(self, func, *args): try: func(*args) except Exception as detail: return detail.__class__, str(detail) - def test_abstract_class(self): - class X(Structure): - _abstract_ = "something" - # try 'X()' - cls, msg = self.get_except(eval, "X()", locals()) - self.assertEqual((cls, msg), (TypeError, "abstract class")) - - def test_methods(self): - self.assertIn("in_dll", dir(type(Structure))) - self.assertIn("from_address", dir(type(Structure))) - self.assertIn("in_dll", dir(type(Structure))) - def test_positional_args(self): # see also http://bugs.python.org/issue5042 class W(Structure): @@ -507,6 +267,8 @@ class X(Structure): self.assertEqual(s.second, got.second) def _test_issue18060(self, Vector): + # Regression tests for gh-62260 + # The call to atan2() should succeed if the # class fields were correctly cloned in the # subclasses. Otherwise, it will segfault. @@ -698,6 +460,7 @@ class Test3E(Structure): self.assertEqual(result.data[i], float(i+1)) def test_38368(self): + # Regression test for gh-82549 class U(Union): _fields_ = [ ('f1', c_uint8 * 16), @@ -719,9 +482,9 @@ class U(Union): self.assertEqual(f2, [0x4567, 0x0123, 0xcdef, 0x89ab, 0x3210, 0x7654, 0xba98, 0xfedc]) - @unittest.skipIf(True, 'Test disabled for now - see bpo-16575/bpo-16576') + @unittest.skipIf(True, 'Test disabled for now - see gh-60779/gh-60780') def test_union_by_value(self): - # See bpo-16575 + # See gh-60779 # These should mirror the structures in Modules/_ctypes/_ctypes_test.c @@ -800,9 +563,9 @@ class Test5(Structure): self.assertEqual(test5.nested.an_int, 0) self.assertEqual(test5.another_int, 0) - @unittest.skipIf(True, 'Test disabled for now - see bpo-16575/bpo-16576') + @unittest.skipIf(True, 'Test disabled for now - see gh-60779/gh-60780') def test_bitfield_by_value(self): - # See bpo-16576 + # See gh-60780 # These should mirror the structures in Modules/_ctypes/_ctypes_test.c @@ -882,75 +645,5 @@ class Test8(Union): 'a union by value, which is unsupported.') -class PointerMemberTestCase(unittest.TestCase): - - def test(self): - # a Structure with a POINTER field - class S(Structure): - _fields_ = [("array", POINTER(c_int))] - - s = S() - # We can assign arrays of the correct type - s.array = (c_int * 3)(1, 2, 3) - items = [s.array[i] for i in range(3)] - self.assertEqual(items, [1, 2, 3]) - - # The following are bugs, but are included here because the unittests - # also describe the current behaviour. - # - # This fails with SystemError: bad arg to internal function - # or with IndexError (with a patch I have) - - s.array[0] = 42 - - items = [s.array[i] for i in range(3)] - self.assertEqual(items, [42, 2, 3]) - - s.array[0] = 1 - - items = [s.array[i] for i in range(3)] - self.assertEqual(items, [1, 2, 3]) - - def test_none_to_pointer_fields(self): - class S(Structure): - _fields_ = [("x", c_int), - ("p", POINTER(c_int))] - - s = S() - s.x = 12345678 - s.p = None - self.assertEqual(s.x, 12345678) - - -class TestRecursiveStructure(unittest.TestCase): - def test_contains_itself(self): - class Recursive(Structure): - pass - - try: - Recursive._fields_ = [("next", Recursive)] - except AttributeError as details: - self.assertIn("Structure or union cannot contain itself", - str(details)) - else: - self.fail("Structure or union cannot contain itself") - - - def test_vice_versa(self): - class First(Structure): - pass - class Second(Structure): - pass - - First._fields_ = [("second", Second)] - - try: - Second._fields_ = [("first", First)] - except AttributeError as details: - self.assertIn("_fields_ is final", str(details)) - else: - self.fail("AttributeError not raised") - - if __name__ == '__main__': unittest.main() diff --git a/Lib/test/test_ctypes/test_unions.py b/Lib/test/test_ctypes/test_unions.py deleted file mode 100644 index e2dff0f22a9213f..000000000000000 --- a/Lib/test/test_ctypes/test_unions.py +++ /dev/null @@ -1,35 +0,0 @@ -import unittest -from ctypes import Union, c_char -from ._support import (_CData, UnionType, Py_TPFLAGS_DISALLOW_INSTANTIATION, - Py_TPFLAGS_IMMUTABLETYPE) - - -class ArrayTestCase(unittest.TestCase): - def test_inheritance_hierarchy(self): - self.assertEqual(Union.mro(), [Union, _CData, object]) - - self.assertEqual(UnionType.__name__, "UnionType") - self.assertEqual(type(UnionType), type) - - def test_type_flags(self): - for cls in Union, UnionType: - with self.subTest(cls=Union): - self.assertTrue(Union.__flags__ & Py_TPFLAGS_IMMUTABLETYPE) - self.assertFalse(Union.__flags__ & Py_TPFLAGS_DISALLOW_INSTANTIATION) - - def test_metaclass_details(self): - # Abstract classes (whose metaclass __init__ was not called) can't be - # instantiated directly - NewUnion = UnionType.__new__(UnionType, 'NewUnion', - (Union,), {}) - for cls in Union, NewUnion: - with self.subTest(cls=cls): - with self.assertRaisesRegex(TypeError, "abstract class"): - obj = cls() - - # Cannot call the metaclass __init__ more than once - class T(Union): - _fields_ = [("x", c_char), - ("y", c_char)] - with self.assertRaisesRegex(SystemError, "already initialized"): - UnionType.__init__(T, 'ptr', (), {}) From c9014374c50d6ef64786d3e7d9c7e99053d5c9e2 Mon Sep 17 00:00:00 2001 From: Mark Shannon Date: Thu, 10 Oct 2024 18:19:08 +0100 Subject: [PATCH 20/80] GH-125174: Make immortal objects more robust, following design from PEP 683 (GH-125251) --- .../pycore_global_objects_fini_generated.h | 2 +- Include/internal/pycore_object.h | 34 ++++----------- Include/object.h | 2 +- Include/refcount.h | 42 ++++++++++--------- Lib/test/test_builtin.py | 4 +- ...-10-10-12-04-56.gh-issue-125174._8h6T7.rst | 4 ++ Modules/_asynciomodule.c | 2 +- Objects/bytesobject.c | 6 +-- Objects/dictobject.c | 10 +++-- Objects/object.c | 2 +- Objects/structseq.c | 2 +- Objects/typeobject.c | 8 ++-- Python/bytecodes.c | 4 +- Python/executor_cases.c.h | 4 +- Python/generated_cases.c.h | 4 +- Python/import.c | 2 +- Tools/cases_generator/analyzer.py | 1 - 17 files changed, 61 insertions(+), 72 deletions(-) create mode 100644 Misc/NEWS.d/next/Core_and_Builtins/2024-10-10-12-04-56.gh-issue-125174._8h6T7.rst diff --git a/Include/internal/pycore_global_objects_fini_generated.h b/Include/internal/pycore_global_objects_fini_generated.h index 3140a75a47c5ee7..de68ef932572348 100644 --- a/Include/internal/pycore_global_objects_fini_generated.h +++ b/Include/internal/pycore_global_objects_fini_generated.h @@ -11,7 +11,7 @@ extern "C" { #ifdef Py_DEBUG static inline void _PyStaticObject_CheckRefcnt(PyObject *obj) { - if (Py_REFCNT(obj) < _Py_IMMORTAL_REFCNT) { + if (!_Py_IsImmortal(obj)) { fprintf(stderr, "Immortal Object has less refcnt than expected.\n"); _PyObject_Dump(obj); } diff --git a/Include/internal/pycore_object.h b/Include/internal/pycore_object.h index 0af13b1bcda20b7..8832692d03c29e3 100644 --- a/Include/internal/pycore_object.h +++ b/Include/internal/pycore_object.h @@ -16,9 +16,6 @@ extern "C" { #include "pycore_pystate.h" // _PyInterpreterState_GET() #include "pycore_uniqueid.h" // _PyType_IncrefSlow - -#define _Py_IMMORTAL_REFCNT_LOOSE ((_Py_IMMORTAL_REFCNT >> 1) + 1) - // This value is added to `ob_ref_shared` for objects that use deferred // reference counting so that they are not immediately deallocated when the // non-deferred reference count drops to zero. @@ -27,25 +24,8 @@ extern "C" { // `ob_ref_shared` are used for flags. #define _Py_REF_DEFERRED (PY_SSIZE_T_MAX / 8) -// gh-121528, gh-118997: Similar to _Py_IsImmortal() but be more loose when -// comparing the reference count to stay compatible with C extensions built -// with the stable ABI 3.11 or older. Such extensions implement INCREF/DECREF -// as refcnt++ and refcnt-- without taking in account immortal objects. For -// example, the reference count of an immortal object can change from -// _Py_IMMORTAL_REFCNT to _Py_IMMORTAL_REFCNT+1 (INCREF) or -// _Py_IMMORTAL_REFCNT-1 (DECREF). -// -// This function should only be used in assertions. Otherwise, _Py_IsImmortal() -// must be used instead. -static inline int _Py_IsImmortalLoose(PyObject *op) -{ -#if defined(Py_GIL_DISABLED) - return _Py_IsImmortal(op); -#else - return (op->ob_refcnt >= _Py_IMMORTAL_REFCNT_LOOSE); -#endif -} -#define _Py_IsImmortalLoose(op) _Py_IsImmortalLoose(_PyObject_CAST(op)) +/* For backwards compatibility -- Do not use this */ +#define _Py_IsImmortalLoose(op) _Py_IsImmortal /* Check if an object is consistent. For example, ensure that the reference @@ -97,7 +77,7 @@ PyAPI_FUNC(int) _PyObject_IsFreed(PyObject *); #else #define _PyObject_HEAD_INIT(type) \ { \ - .ob_refcnt = _Py_IMMORTAL_REFCNT, \ + .ob_refcnt = _Py_IMMORTAL_INITIAL_REFCNT, \ .ob_type = (type) \ } #endif @@ -184,7 +164,7 @@ PyAPI_FUNC(void) _Py_SetImmortalUntracked(PyObject *op); static inline void _Py_SetMortal(PyObject *op, Py_ssize_t refcnt) { if (op) { - assert(_Py_IsImmortalLoose(op)); + assert(_Py_IsImmortal(op)); #ifdef Py_GIL_DISABLED op->ob_tid = _Py_UNOWNED_TID; op->ob_ref_local = 0; @@ -316,7 +296,7 @@ static inline void _Py_INCREF_TYPE(PyTypeObject *type) { if (!_PyType_HasFeature(type, Py_TPFLAGS_HEAPTYPE)) { - assert(_Py_IsImmortalLoose(type)); + assert(_Py_IsImmortal(type)); _Py_INCREF_IMMORTAL_STAT_INC(); return; } @@ -357,7 +337,7 @@ static inline void _Py_DECREF_TYPE(PyTypeObject *type) { if (!_PyType_HasFeature(type, Py_TPFLAGS_HEAPTYPE)) { - assert(_Py_IsImmortalLoose(type)); + assert(_Py_IsImmortal(type)); _Py_DECREF_IMMORTAL_STAT_INC(); return; } @@ -393,7 +373,7 @@ _PyObject_Init(PyObject *op, PyTypeObject *typeobj) { assert(op != NULL); Py_SET_TYPE(op, typeobj); - assert(_PyType_HasFeature(typeobj, Py_TPFLAGS_HEAPTYPE) || _Py_IsImmortalLoose(typeobj)); + assert(_PyType_HasFeature(typeobj, Py_TPFLAGS_HEAPTYPE) || _Py_IsImmortal(typeobj)); _Py_INCREF_TYPE(typeobj); _Py_NewReference(op); } diff --git a/Include/object.h b/Include/object.h index 418f2196062df7c..5be4dedadc20ebd 100644 --- a/Include/object.h +++ b/Include/object.h @@ -81,7 +81,7 @@ whose size is determined when the object is allocated. #else #define PyObject_HEAD_INIT(type) \ { \ - { _Py_IMMORTAL_REFCNT }, \ + { _Py_IMMORTAL_INITIAL_REFCNT }, \ (type) \ }, #endif diff --git a/Include/refcount.h b/Include/refcount.h index 9a4e15065ecab8c..141cbd34dd72e64 100644 --- a/Include/refcount.h +++ b/Include/refcount.h @@ -21,25 +21,30 @@ cleanup during runtime finalization. #if SIZEOF_VOID_P > 4 /* -In 64+ bit systems, an object will be marked as immortal by setting all of the -lower 32 bits of the reference count field, which is equal to: 0xFFFFFFFF +In 64+ bit systems, any object whose 32 bit reference count is >= 2**31 +will be treated as immortal. Using the lower 32 bits makes the value backwards compatible by allowing C-Extensions without the updated checks in Py_INCREF and Py_DECREF to safely -increase and decrease the objects reference count. The object would lose its -immortality, but the execution would still be correct. +increase and decrease the objects reference count. + +In order to offer sufficient resilience to C extensions using the stable ABI +compiled against 3.11 or earlier, we set the initial value near the +middle of the range (2**31, 2**32). That way the the refcount can be +off by ~1 billion without affecting immortality. Reference count increases will use saturated arithmetic, taking advantage of having all the lower 32 bits set, which will avoid the reference count to go beyond the refcount limit. Immortality checks for reference count decreases will be done by checking the bit sign flag in the lower 32 bits. + */ -#define _Py_IMMORTAL_REFCNT _Py_CAST(Py_ssize_t, UINT_MAX) +#define _Py_IMMORTAL_INITIAL_REFCNT ((Py_ssize_t)(3UL << 30)) #else /* -In 32 bit systems, an object will be marked as immortal by setting all of the -lower 30 bits of the reference count field, which is equal to: 0x3FFFFFFF +In 32 bit systems, an object will be treated as immortal if its reference +count equals or exceeds _Py_IMMORTAL_MINIMUM_REFCNT (2**30). Using the lower 30 bits makes the value backwards compatible by allowing C-Extensions without the updated checks in Py_INCREF and Py_DECREF to safely @@ -47,9 +52,10 @@ increase and decrease the objects reference count. The object would lose its immortality, but the execution would still be correct. Reference count increases and decreases will first go through an immortality -check by comparing the reference count field to the immortality reference count. +check by comparing the reference count field to the minimum immortality refcount. */ -#define _Py_IMMORTAL_REFCNT _Py_CAST(Py_ssize_t, UINT_MAX >> 2) +#define _Py_IMMORTAL_INITIAL_REFCNT ((Py_ssize_t)(3L << 29)) +#define _Py_IMMORTAL_MINIMUM_REFCNT ((Py_ssize_t)(1L << 30)) #endif // Py_GIL_DISABLED builds indicate immortal objects using `ob_ref_local`, which is @@ -90,7 +96,7 @@ PyAPI_FUNC(Py_ssize_t) Py_REFCNT(PyObject *ob); #else uint32_t local = _Py_atomic_load_uint32_relaxed(&ob->ob_ref_local); if (local == _Py_IMMORTAL_REFCNT_LOCAL) { - return _Py_IMMORTAL_REFCNT; + return _Py_IMMORTAL_INITIAL_REFCNT; } Py_ssize_t shared = _Py_atomic_load_ssize_relaxed(&ob->ob_ref_shared); return _Py_STATIC_CAST(Py_ssize_t, local) + @@ -109,9 +115,9 @@ static inline Py_ALWAYS_INLINE int _Py_IsImmortal(PyObject *op) return (_Py_atomic_load_uint32_relaxed(&op->ob_ref_local) == _Py_IMMORTAL_REFCNT_LOCAL); #elif SIZEOF_VOID_P > 4 - return (_Py_CAST(PY_INT32_T, op->ob_refcnt) < 0); + return _Py_CAST(PY_INT32_T, op->ob_refcnt) < 0; #else - return (op->ob_refcnt == _Py_IMMORTAL_REFCNT); + return op->ob_refcnt >= _Py_IMMORTAL_MINIMUM_REFCNT; #endif } #define _Py_IsImmortal(op) _Py_IsImmortal(_PyObject_CAST(op)) @@ -236,7 +242,7 @@ static inline Py_ALWAYS_INLINE void Py_INCREF(PyObject *op) uint32_t new_local = local + 1; if (new_local == 0) { _Py_INCREF_IMMORTAL_STAT_INC(); - // local is equal to _Py_IMMORTAL_REFCNT: do nothing + // local is equal to _Py_IMMORTAL_REFCNT_LOCAL: do nothing return; } if (_Py_IsOwnedByCurrentThread(op)) { @@ -246,18 +252,14 @@ static inline Py_ALWAYS_INLINE void Py_INCREF(PyObject *op) _Py_atomic_add_ssize(&op->ob_ref_shared, (1 << _Py_REF_SHARED_SHIFT)); } #elif SIZEOF_VOID_P > 4 - // Portable saturated add, branching on the carry flag and set low bits PY_UINT32_T cur_refcnt = op->ob_refcnt_split[PY_BIG_ENDIAN]; - PY_UINT32_T new_refcnt = cur_refcnt + 1; - if (new_refcnt == 0) { + if (((int32_t)cur_refcnt) < 0) { + // the object is immortal _Py_INCREF_IMMORTAL_STAT_INC(); - // cur_refcnt is equal to _Py_IMMORTAL_REFCNT: the object is immortal, - // do nothing return; } - op->ob_refcnt_split[PY_BIG_ENDIAN] = new_refcnt; + op->ob_refcnt_split[PY_BIG_ENDIAN] = cur_refcnt + 1; #else - // Explicitly check immortality against the immortal value if (_Py_IsImmortal(op)) { _Py_INCREF_IMMORTAL_STAT_INC(); return; diff --git a/Lib/test/test_builtin.py b/Lib/test/test_builtin.py index d884f54940b4718..eb5906f8944c8ef 100644 --- a/Lib/test/test_builtin.py +++ b/Lib/test/test_builtin.py @@ -2574,9 +2574,9 @@ def __del__(self): class ImmortalTests(unittest.TestCase): if sys.maxsize < (1 << 32): - IMMORTAL_REFCOUNT = (1 << 30) - 1 + IMMORTAL_REFCOUNT = 3 << 29 else: - IMMORTAL_REFCOUNT = (1 << 32) - 1 + IMMORTAL_REFCOUNT = 3 << 30 IMMORTALS = (None, True, False, Ellipsis, NotImplemented, *range(-5, 257)) diff --git a/Misc/NEWS.d/next/Core_and_Builtins/2024-10-10-12-04-56.gh-issue-125174._8h6T7.rst b/Misc/NEWS.d/next/Core_and_Builtins/2024-10-10-12-04-56.gh-issue-125174._8h6T7.rst new file mode 100644 index 000000000000000..c7eaac32601bb3b --- /dev/null +++ b/Misc/NEWS.d/next/Core_and_Builtins/2024-10-10-12-04-56.gh-issue-125174._8h6T7.rst @@ -0,0 +1,4 @@ +Make the handling of reference counts of immortal objects more robust. +Immortal objects with reference counts that deviate from their original +reference count by up to a billion (half a billion on 32 bit builds) are +still counted as immortal. diff --git a/Modules/_asynciomodule.c b/Modules/_asynciomodule.c index 870084100a1b857..0a769c46b87ac8b 100644 --- a/Modules/_asynciomodule.c +++ b/Modules/_asynciomodule.c @@ -1387,7 +1387,7 @@ FutureObj_get_state(FutureObj *fut, void *Py_UNUSED(ignored)) default: assert (0); } - assert(_Py_IsImmortalLoose(ret)); + assert(_Py_IsImmortal(ret)); return ret; } diff --git a/Objects/bytesobject.c b/Objects/bytesobject.c index bf58e55e100b3a5..dcc1aba76abbed4 100644 --- a/Objects/bytesobject.c +++ b/Objects/bytesobject.c @@ -46,7 +46,7 @@ Py_LOCAL_INLINE(Py_ssize_t) _PyBytesWriter_GetSize(_PyBytesWriter *writer, static inline PyObject* bytes_get_empty(void) { PyObject *empty = &EMPTY->ob_base.ob_base; - assert(_Py_IsImmortalLoose(empty)); + assert(_Py_IsImmortal(empty)); return empty; } @@ -119,7 +119,7 @@ PyBytes_FromStringAndSize(const char *str, Py_ssize_t size) } if (size == 1 && str != NULL) { op = CHARACTER(*str & 255); - assert(_Py_IsImmortalLoose(op)); + assert(_Py_IsImmortal(op)); return (PyObject *)op; } if (size == 0) { @@ -155,7 +155,7 @@ PyBytes_FromString(const char *str) } else if (size == 1) { op = CHARACTER(*str & 255); - assert(_Py_IsImmortalLoose(op)); + assert(_Py_IsImmortal(op)); return (PyObject *)op; } diff --git a/Objects/dictobject.c b/Objects/dictobject.c index adfd91d1e4d63b4..12722eca6be5e5b 100644 --- a/Objects/dictobject.c +++ b/Objects/dictobject.c @@ -416,6 +416,8 @@ _PyDict_DebugMallocStats(FILE *out) #define DK_MASK(dk) (DK_SIZE(dk)-1) +#define _Py_DICT_IMMORTAL_INITIAL_REFCNT PY_SSIZE_T_MIN + static void free_keys_object(PyDictKeysObject *keys, bool use_qsbr); /* PyDictKeysObject has refcounts like PyObject does, so we have the @@ -428,7 +430,8 @@ static void free_keys_object(PyDictKeysObject *keys, bool use_qsbr); static inline void dictkeys_incref(PyDictKeysObject *dk) { - if (FT_ATOMIC_LOAD_SSIZE_RELAXED(dk->dk_refcnt) == _Py_IMMORTAL_REFCNT) { + if (FT_ATOMIC_LOAD_SSIZE_RELAXED(dk->dk_refcnt) < 0) { + assert(FT_ATOMIC_LOAD_SSIZE_RELAXED(dk->dk_refcnt) == _Py_DICT_IMMORTAL_INITIAL_REFCNT); return; } #ifdef Py_REF_DEBUG @@ -440,7 +443,8 @@ dictkeys_incref(PyDictKeysObject *dk) static inline void dictkeys_decref(PyInterpreterState *interp, PyDictKeysObject *dk, bool use_qsbr) { - if (FT_ATOMIC_LOAD_SSIZE_RELAXED(dk->dk_refcnt) == _Py_IMMORTAL_REFCNT) { + if (FT_ATOMIC_LOAD_SSIZE_RELAXED(dk->dk_refcnt) < 0) { + assert(FT_ATOMIC_LOAD_SSIZE_RELAXED(dk->dk_refcnt) == _Py_DICT_IMMORTAL_INITIAL_REFCNT); return; } assert(FT_ATOMIC_LOAD_SSIZE(dk->dk_refcnt) > 0); @@ -586,7 +590,7 @@ estimate_log2_keysize(Py_ssize_t n) * (which cannot fail and thus can do no allocation). */ static PyDictKeysObject empty_keys_struct = { - _Py_IMMORTAL_REFCNT, /* dk_refcnt */ + _Py_DICT_IMMORTAL_INITIAL_REFCNT, /* dk_refcnt */ 0, /* dk_log2_size */ 0, /* dk_log2_index_bytes */ DICT_KEYS_UNICODE, /* dk_kind */ diff --git a/Objects/object.c b/Objects/object.c index a97a900890320d8..27d06cc081259d5 100644 --- a/Objects/object.c +++ b/Objects/object.c @@ -2453,7 +2453,7 @@ _Py_SetImmortalUntracked(PyObject *op) op->ob_ref_local = _Py_IMMORTAL_REFCNT_LOCAL; op->ob_ref_shared = 0; #else - op->ob_refcnt = _Py_IMMORTAL_REFCNT; + op->ob_refcnt = _Py_IMMORTAL_INITIAL_REFCNT; #endif } diff --git a/Objects/structseq.c b/Objects/structseq.c index 6092742835400bb..56a7851b98788d1 100644 --- a/Objects/structseq.c +++ b/Objects/structseq.c @@ -708,7 +708,7 @@ _PyStructSequence_FiniBuiltin(PyInterpreterState *interp, PyTypeObject *type) assert(type->tp_name != NULL); assert(type->tp_base == &PyTuple_Type); assert((type->tp_flags & _Py_TPFLAGS_STATIC_BUILTIN)); - assert(_Py_IsImmortalLoose(type)); + assert(_Py_IsImmortal(type)); // Cannot delete a type if it still has subclasses if (_PyType_HasSubclasses(type)) { diff --git a/Objects/typeobject.c b/Objects/typeobject.c index 5380633fa1149ec..d90bb5825fd437d 100644 --- a/Objects/typeobject.c +++ b/Objects/typeobject.c @@ -476,7 +476,7 @@ set_tp_bases(PyTypeObject *self, PyObject *bases, int initial) assert(PyTuple_GET_SIZE(bases) == 1); assert(PyTuple_GET_ITEM(bases, 0) == (PyObject *)self->tp_base); assert(self->tp_base->tp_flags & _Py_TPFLAGS_STATIC_BUILTIN); - assert(_Py_IsImmortalLoose(self->tp_base)); + assert(_Py_IsImmortal(self->tp_base)); } _Py_SetImmortal(bases); } @@ -493,7 +493,7 @@ clear_tp_bases(PyTypeObject *self, int final) Py_CLEAR(self->tp_bases); } else { - assert(_Py_IsImmortalLoose(self->tp_bases)); + assert(_Py_IsImmortal(self->tp_bases)); _Py_ClearImmortal(self->tp_bases); } } @@ -558,7 +558,7 @@ clear_tp_mro(PyTypeObject *self, int final) Py_CLEAR(self->tp_mro); } else { - assert(_Py_IsImmortalLoose(self->tp_mro)); + assert(_Py_IsImmortal(self->tp_mro)); _Py_ClearImmortal(self->tp_mro); } } @@ -5966,7 +5966,7 @@ fini_static_type(PyInterpreterState *interp, PyTypeObject *type, int isbuiltin, int final) { assert(type->tp_flags & _Py_TPFLAGS_STATIC_BUILTIN); - assert(_Py_IsImmortalLoose((PyObject *)type)); + assert(_Py_IsImmortal((PyObject *)type)); type_dealloc_common(type); diff --git a/Python/bytecodes.c b/Python/bytecodes.c index 87cca3fc1d373c3..34fdfcb05e3c187 100644 --- a/Python/bytecodes.c +++ b/Python/bytecodes.c @@ -381,7 +381,7 @@ dummy_func( EXIT_IF(!PyLong_CheckExact(value_o)); STAT_INC(TO_BOOL, hit); if (_PyLong_IsZero((PyLongObject *)value_o)) { - assert(_Py_IsImmortalLoose(value_o)); + assert(_Py_IsImmortal(value_o)); DEAD(value); res = PyStackRef_False; } @@ -412,7 +412,7 @@ dummy_func( EXIT_IF(!PyUnicode_CheckExact(value_o)); STAT_INC(TO_BOOL, hit); if (value_o == &_Py_STR(empty)) { - assert(_Py_IsImmortalLoose(value_o)); + assert(_Py_IsImmortal(value_o)); DEAD(value); res = PyStackRef_False; } diff --git a/Python/executor_cases.c.h b/Python/executor_cases.c.h index 57e15f33ca77034..ef110e2e2a794a0 100644 --- a/Python/executor_cases.c.h +++ b/Python/executor_cases.c.h @@ -402,7 +402,7 @@ } STAT_INC(TO_BOOL, hit); if (_PyLong_IsZero((PyLongObject *)value_o)) { - assert(_Py_IsImmortalLoose(value_o)); + assert(_Py_IsImmortal(value_o)); res = PyStackRef_False; } else { @@ -455,7 +455,7 @@ } STAT_INC(TO_BOOL, hit); if (value_o == &_Py_STR(empty)) { - assert(_Py_IsImmortalLoose(value_o)); + assert(_Py_IsImmortal(value_o)); res = PyStackRef_False; } else { diff --git a/Python/generated_cases.c.h b/Python/generated_cases.c.h index 7656ce6bb7e313e..7023aea369db497 100644 --- a/Python/generated_cases.c.h +++ b/Python/generated_cases.c.h @@ -7840,7 +7840,7 @@ DEOPT_IF(!PyLong_CheckExact(value_o), TO_BOOL); STAT_INC(TO_BOOL, hit); if (_PyLong_IsZero((PyLongObject *)value_o)) { - assert(_Py_IsImmortalLoose(value_o)); + assert(_Py_IsImmortal(value_o)); res = PyStackRef_False; } else { @@ -7902,7 +7902,7 @@ DEOPT_IF(!PyUnicode_CheckExact(value_o), TO_BOOL); STAT_INC(TO_BOOL, hit); if (value_o == &_Py_STR(empty)) { - assert(_Py_IsImmortalLoose(value_o)); + assert(_Py_IsImmortal(value_o)); res = PyStackRef_False; } else { diff --git a/Python/import.c b/Python/import.c index 460b1fe225c72e5..acf849f14562b9f 100644 --- a/Python/import.c +++ b/Python/import.c @@ -1051,7 +1051,7 @@ del_cached_def(struct extensions_cache_value *value) However, this decref would be problematic if the module def were dynamically allocated, it were the last ref, and this function were called with an interpreter other than the def's owner. */ - assert(value->def == NULL || _Py_IsImmortalLoose(value->def)); + assert(value->def == NULL || _Py_IsImmortal(value->def)); Py_XDECREF(value->def->m_base.m_copy); value->def->m_base.m_copy = NULL; diff --git a/Tools/cases_generator/analyzer.py b/Tools/cases_generator/analyzer.py index 9c2981a68ac9099..60f5d010a7a083b 100644 --- a/Tools/cases_generator/analyzer.py +++ b/Tools/cases_generator/analyzer.py @@ -614,7 +614,6 @@ def has_error_without_pop(op: parser.InstDef) -> bool: "_Py_EnterRecursiveCallTstateUnchecked", "_Py_ID", "_Py_IsImmortal", - "_Py_IsImmortalLoose", "_Py_LeaveRecursiveCallPy", "_Py_LeaveRecursiveCallTstate", "_Py_NewRef", From 3b87fb74c907510402678bf1b7c4a94df0e5e65a Mon Sep 17 00:00:00 2001 From: Justin Kunimune Date: Thu, 10 Oct 2024 13:56:05 -0400 Subject: [PATCH 21/80] Note argparse exit code in documentation (GH-119568) Co-authored-by: Savannah Ostrowski --- Doc/library/argparse.rst | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/Doc/library/argparse.rst b/Doc/library/argparse.rst index e9a08984f77c3ae..4eb6fad41f11ef9 100644 --- a/Doc/library/argparse.rst +++ b/Doc/library/argparse.rst @@ -541,7 +541,8 @@ exit_on_error ^^^^^^^^^^^^^ Normally, when you pass an invalid argument list to the :meth:`~ArgumentParser.parse_args` -method of an :class:`ArgumentParser`, it will exit with error info. +method of an :class:`ArgumentParser`, it will print a *message* to :data:`sys.stderr` and exit with a status +code of 2. If the user would like to catch errors manually, the feature can be enabled by setting ``exit_on_error`` to ``False``:: From bb594e801b6a84823badbb85b88f0fc8b221d7bf Mon Sep 17 00:00:00 2001 From: Victor Stinner Date: Thu, 10 Oct 2024 20:41:14 +0200 Subject: [PATCH 22/80] gh-125196: Use PyUnicodeWriter for repr(dict) (#125270) --- Objects/dictobject.c | 72 ++++++++++++++++++++++---------------------- 1 file changed, 36 insertions(+), 36 deletions(-) diff --git a/Objects/dictobject.c b/Objects/dictobject.c index 12722eca6be5e5b..b27599d2815c82e 100644 --- a/Objects/dictobject.c +++ b/Objects/dictobject.c @@ -3200,16 +3200,12 @@ static PyObject * dict_repr_lock_held(PyObject *self) { PyDictObject *mp = (PyDictObject *)self; - Py_ssize_t i; PyObject *key = NULL, *value = NULL; - _PyUnicodeWriter writer; - int first; - ASSERT_DICT_LOCKED(mp); - i = Py_ReprEnter((PyObject *)mp); - if (i != 0) { - return i > 0 ? PyUnicode_FromString("{...}") : NULL; + int res = Py_ReprEnter((PyObject *)mp); + if (res != 0) { + return (res > 0 ? PyUnicode_FromString("{...}") : NULL); } if (mp->ma_used == 0) { @@ -3217,66 +3213,70 @@ dict_repr_lock_held(PyObject *self) return PyUnicode_FromString("{}"); } - _PyUnicodeWriter_Init(&writer); - writer.overallocate = 1; - /* "{" + "1: 2" + ", 3: 4" * (len - 1) + "}" */ - writer.min_length = 1 + 4 + (2 + 4) * (mp->ma_used - 1) + 1; + // "{" + "1: 2" + ", 3: 4" * (len - 1) + "}" + Py_ssize_t prealloc = 1 + 4 + 6 * (mp->ma_used - 1) + 1; + PyUnicodeWriter *writer = PyUnicodeWriter_Create(prealloc); + if (writer == NULL) { + goto error; + } - if (_PyUnicodeWriter_WriteChar(&writer, '{') < 0) + if (PyUnicodeWriter_WriteChar(writer, '{') < 0) { goto error; + } /* Do repr() on each key+value pair, and insert ": " between them. Note that repr may mutate the dict. */ - i = 0; - first = 1; + Py_ssize_t i = 0; + int first = 1; while (_PyDict_Next((PyObject *)mp, &i, &key, &value, NULL)) { - PyObject *s; - int res; - - /* Prevent repr from deleting key or value during key format. */ + // Prevent repr from deleting key or value during key format. Py_INCREF(key); Py_INCREF(value); if (!first) { - if (_PyUnicodeWriter_WriteASCIIString(&writer, ", ", 2) < 0) + // Write ", " + if (PyUnicodeWriter_WriteChar(writer, ',') < 0) { + goto error; + } + if (PyUnicodeWriter_WriteChar(writer, ' ') < 0) { goto error; + } } first = 0; - s = PyObject_Repr(key); - if (s == NULL) - goto error; - res = _PyUnicodeWriter_WriteStr(&writer, s); - Py_DECREF(s); - if (res < 0) + // Write repr(key) + if (PyUnicodeWriter_WriteRepr(writer, key) < 0) { goto error; + } - if (_PyUnicodeWriter_WriteASCIIString(&writer, ": ", 2) < 0) + // Write ": " + if (PyUnicodeWriter_WriteChar(writer, ':') < 0) { goto error; - - s = PyObject_Repr(value); - if (s == NULL) + } + if (PyUnicodeWriter_WriteChar(writer, ' ') < 0) { goto error; - res = _PyUnicodeWriter_WriteStr(&writer, s); - Py_DECREF(s); - if (res < 0) + } + + // Write repr(value) + if (PyUnicodeWriter_WriteRepr(writer, value) < 0) { goto error; + } Py_CLEAR(key); Py_CLEAR(value); } - writer.overallocate = 0; - if (_PyUnicodeWriter_WriteChar(&writer, '}') < 0) + if (PyUnicodeWriter_WriteChar(writer, '}') < 0) { goto error; + } Py_ReprLeave((PyObject *)mp); - return _PyUnicodeWriter_Finish(&writer); + return PyUnicodeWriter_Finish(writer); error: Py_ReprLeave((PyObject *)mp); - _PyUnicodeWriter_Dealloc(&writer); + PyUnicodeWriter_Discard(writer); Py_XDECREF(key); Py_XDECREF(value); return NULL; From 427dcf24de4e06d239745d74d08c4b2e541dca5a Mon Sep 17 00:00:00 2001 From: Sam Gross Date: Thu, 10 Oct 2024 16:21:29 -0400 Subject: [PATCH 23/80] gh-125268: Use static string for "1e309" in AST (#125272) When formatting the AST as a string, infinite values are replaced by 1e309, which evaluates to infinity. The initialization of this string replacement was not thread-safe in the free threading build. --- Include/internal/pycore_global_objects.h | 3 -- .../pycore_global_objects_fini_generated.h | 1 + Include/internal/pycore_global_strings.h | 1 + .../internal/pycore_runtime_init_generated.h | 1 + .../internal/pycore_unicodeobject_generated.h | 4 +++ Parser/asdl_c.py | 2 -- Python/Python-ast.c | 2 -- Python/ast_unparse.c | 29 +++---------------- 8 files changed, 11 insertions(+), 32 deletions(-) diff --git a/Include/internal/pycore_global_objects.h b/Include/internal/pycore_global_objects.h index 913dce6f1ec0fe3..e3f7ac707f0c37e 100644 --- a/Include/internal/pycore_global_objects.h +++ b/Include/internal/pycore_global_objects.h @@ -66,9 +66,6 @@ struct _Py_static_objects { struct _Py_interp_cached_objects { PyObject *interned_strings; - /* AST */ - PyObject *str_replace_inf; - /* object.__reduce__ */ PyObject *objreduce; PyObject *type_slots_pname; diff --git a/Include/internal/pycore_global_objects_fini_generated.h b/Include/internal/pycore_global_objects_fini_generated.h index de68ef932572348..2fd7d5d13a98b29 100644 --- a/Include/internal/pycore_global_objects_fini_generated.h +++ b/Include/internal/pycore_global_objects_fini_generated.h @@ -562,6 +562,7 @@ _PyStaticObjects_CheckRefcnt(PyInterpreterState *interp) { _PyStaticObject_CheckRefcnt((PyObject *)&_Py_STR(json_decoder)); _PyStaticObject_CheckRefcnt((PyObject *)&_Py_STR(kwdefaults)); _PyStaticObject_CheckRefcnt((PyObject *)&_Py_STR(list_err)); + _PyStaticObject_CheckRefcnt((PyObject *)&_Py_STR(str_replace_inf)); _PyStaticObject_CheckRefcnt((PyObject *)&_Py_STR(type_params)); _PyStaticObject_CheckRefcnt((PyObject *)&_Py_STR(utf_8)); _PyStaticObject_CheckRefcnt((PyObject *)&_Py_ID(CANCELLED)); diff --git a/Include/internal/pycore_global_strings.h b/Include/internal/pycore_global_strings.h index 1591cb0a3f114f1..fc3871570cc49d9 100644 --- a/Include/internal/pycore_global_strings.h +++ b/Include/internal/pycore_global_strings.h @@ -48,6 +48,7 @@ struct _Py_global_strings { STRUCT_FOR_STR(json_decoder, "json.decoder") STRUCT_FOR_STR(kwdefaults, ".kwdefaults") STRUCT_FOR_STR(list_err, "list index out of range") + STRUCT_FOR_STR(str_replace_inf, "1e309") STRUCT_FOR_STR(type_params, ".type_params") STRUCT_FOR_STR(utf_8, "utf-8") } literals; diff --git a/Include/internal/pycore_runtime_init_generated.h b/Include/internal/pycore_runtime_init_generated.h index c9d20d0b5aacdbf..3b80e265b0ca50a 100644 --- a/Include/internal/pycore_runtime_init_generated.h +++ b/Include/internal/pycore_runtime_init_generated.h @@ -557,6 +557,7 @@ extern "C" { INIT_STR(json_decoder, "json.decoder"), \ INIT_STR(kwdefaults, ".kwdefaults"), \ INIT_STR(list_err, "list index out of range"), \ + INIT_STR(str_replace_inf, "1e309"), \ INIT_STR(type_params, ".type_params"), \ INIT_STR(utf_8, "utf-8"), \ } diff --git a/Include/internal/pycore_unicodeobject_generated.h b/Include/internal/pycore_unicodeobject_generated.h index d335373e88ee74d..eb2eca06ec4d4f6 100644 --- a/Include/internal/pycore_unicodeobject_generated.h +++ b/Include/internal/pycore_unicodeobject_generated.h @@ -2936,6 +2936,10 @@ _PyUnicode_InitStaticStrings(PyInterpreterState *interp) { _PyUnicode_InternStatic(interp, &string); assert(_PyUnicode_CheckConsistency(string, 1)); assert(PyUnicode_GET_LENGTH(string) != 1); + string = &_Py_STR(str_replace_inf); + _PyUnicode_InternStatic(interp, &string); + assert(_PyUnicode_CheckConsistency(string, 1)); + assert(PyUnicode_GET_LENGTH(string) != 1); string = &_Py_STR(anon_null); _PyUnicode_InternStatic(interp, &string); assert(_PyUnicode_CheckConsistency(string, 1)); diff --git a/Parser/asdl_c.py b/Parser/asdl_c.py index f50c28afcfe2055..32eac3afafa5d5c 100755 --- a/Parser/asdl_c.py +++ b/Parser/asdl_c.py @@ -2242,8 +2242,6 @@ def generate_ast_fini(module_state, f): for s in module_state: f.write(" Py_CLEAR(state->" + s + ');\n') f.write(textwrap.dedent(""" - Py_CLEAR(_Py_INTERP_CACHED_OBJECT(interp, str_replace_inf)); - state->finalized = 1; state->once = (_PyOnceFlag){0}; } diff --git a/Python/Python-ast.c b/Python/Python-ast.c index 89c52b9dc73cacb..38d74b48d232f8b 100644 --- a/Python/Python-ast.c +++ b/Python/Python-ast.c @@ -281,8 +281,6 @@ void _PyAST_Fini(PyInterpreterState *interp) Py_CLEAR(state->vararg); Py_CLEAR(state->withitem_type); - Py_CLEAR(_Py_INTERP_CACHED_OBJECT(interp, str_replace_inf)); - state->finalized = 1; state->once = (_PyOnceFlag){0}; } diff --git a/Python/ast_unparse.c b/Python/ast_unparse.c index 86f7a582b981a30..8017cfc7fcf2689 100644 --- a/Python/ast_unparse.c +++ b/Python/ast_unparse.c @@ -2,7 +2,6 @@ #include "pycore_ast.h" // expr_ty #include "pycore_pystate.h" // _PyInterpreterState_GET() #include "pycore_runtime.h" // _Py_ID() -#include // DBL_MAX_10_EXP #include /* This limited unparser is used to convert annotations back to strings @@ -13,10 +12,6 @@ _Py_DECLARE_STR(dbl_open_br, "{{"); _Py_DECLARE_STR(dbl_close_br, "}}"); -/* We would statically initialize this if doing so were simple enough. */ -#define _str_replace_inf(interp) \ - _Py_INTERP_CACHED_OBJECT(interp, str_replace_inf) - /* Forward declarations for recursion via helper functions. */ static PyObject * expr_as_unicode(expr_ty e, int level); @@ -78,13 +73,13 @@ append_repr(_PyUnicodeWriter *writer, PyObject *obj) } if ((PyFloat_CheckExact(obj) && isinf(PyFloat_AS_DOUBLE(obj))) || - PyComplex_CheckExact(obj)) + PyComplex_CheckExact(obj)) { - PyInterpreterState *interp = _PyInterpreterState_GET(); + _Py_DECLARE_STR(str_replace_inf, "1e309"); // evaluates to inf PyObject *new_repr = PyUnicode_Replace( repr, &_Py_ID(inf), - _str_replace_inf(interp), + &_Py_STR(str_replace_inf), -1 ); Py_DECREF(repr); @@ -918,20 +913,6 @@ append_ast_expr(_PyUnicodeWriter *writer, expr_ty e, int level) return -1; } -static int -maybe_init_static_strings(void) -{ - PyInterpreterState *interp = _PyInterpreterState_GET(); - if (_str_replace_inf(interp) == NULL) { - PyObject *tmp = PyUnicode_FromFormat("1e%d", 1 + DBL_MAX_10_EXP); - if (tmp == NULL) { - return -1; - } - _str_replace_inf(interp) = tmp; - } - return 0; -} - static PyObject * expr_as_unicode(expr_ty e, int level) { @@ -939,9 +920,7 @@ expr_as_unicode(expr_ty e, int level) _PyUnicodeWriter_Init(&writer); writer.min_length = 256; writer.overallocate = 1; - if (-1 == maybe_init_static_strings() || - -1 == append_ast_expr(&writer, e, level)) - { + if (-1 == append_ast_expr(&writer, e, level)) { _PyUnicodeWriter_Dealloc(&writer); return NULL; } From dd0ee201da34d1d4a631d77b420728f9233f53f9 Mon Sep 17 00:00:00 2001 From: Adam Turner <9087854+AA-Turner@users.noreply.github.com> Date: Thu, 10 Oct 2024 21:26:01 +0100 Subject: [PATCH 24/80] Doc: Upgrade Sphinx to 8.1 (#125276) --- Doc/conf.py | 31 ++++++++++++++++++++++++------- Doc/requirements.txt | 2 +- 2 files changed, 25 insertions(+), 8 deletions(-) diff --git a/Doc/conf.py b/Doc/conf.py index 287e0da46eb11cb..d7197b17865854f 100644 --- a/Doc/conf.py +++ b/Doc/conf.py @@ -11,6 +11,8 @@ import sys import time +import sphinx + sys.path.append(os.path.abspath('tools/extensions')) sys.path.append(os.path.abspath('includes')) @@ -62,7 +64,10 @@ # General substitutions. project = 'Python' -copyright = f"2001-{time.strftime('%Y')}, Python Software Foundation" +if sphinx.version_info[:2] >= (8, 1): + copyright = "2001-%Y, Python Software Foundation" +else: + copyright = f"2001-{time.strftime('%Y')}, Python Software Foundation" # We look for the Include/patchlevel.h file in the current Python source tree # and replace the values accordingly. @@ -361,10 +366,14 @@ } # This 'Last updated on:' timestamp is inserted at the bottom of every page. -html_time = int(os.environ.get('SOURCE_DATE_EPOCH', time.time())) -html_last_updated_fmt = time.strftime( - '%b %d, %Y (%H:%M UTC)', time.gmtime(html_time) -) +html_last_updated_fmt = '%b %d, %Y (%H:%M UTC)' +if sphinx.version_info[:2] >= (8, 1): + html_last_updated_use_utc = True +else: + html_time = int(os.environ.get('SOURCE_DATE_EPOCH', time.time())) + html_last_updated_fmt = time.strftime( + html_last_updated_fmt, time.gmtime(html_time) + ) # Path to find HTML templates. templates_path = ['tools/templates'] @@ -596,13 +605,21 @@ # mapping unique short aliases to a base URL and a prefix. # https://www.sphinx-doc.org/en/master/usage/extensions/extlinks.html extlinks = { - "cve": ("https://cve.mitre.org/cgi-bin/cvename.cgi?name=CVE-%s", "CVE-%s"), - "cwe": ("https://cwe.mitre.org/data/definitions/%s.html", "CWE-%s"), "pypi": ("https://pypi.org/project/%s/", "%s"), "source": (SOURCE_URI, "%s"), } extlinks_detect_hardcoded_links = True +if sphinx.version_info[:2] < (8, 1): + # Sphinx 8.1 has in-built CVE and CWE roles. + extlinks |= { + "cve": ( + "https://cve.mitre.org/cgi-bin/cvename.cgi?name=CVE-%s", + "CVE-%s", + ), + "cwe": ("https://cwe.mitre.org/data/definitions/%s.html", "CWE-%s"), + } + # Options for c_annotations # ------------------------- diff --git a/Doc/requirements.txt b/Doc/requirements.txt index bf1028020b7af76..5105786ccf283c4 100644 --- a/Doc/requirements.txt +++ b/Doc/requirements.txt @@ -6,7 +6,7 @@ # Sphinx version is pinned so that new versions that introduce new warnings # won't suddenly cause build failures. Updating the version is fine as long # as no warnings are raised by doing so. -sphinx~=8.0.0 +sphinx~=8.1.0 blurb From a726ce73ca69b3a5ccc2cbe23061070e686b1150 Mon Sep 17 00:00:00 2001 From: Alex Waygood Date: Fri, 11 Oct 2024 00:53:45 +0100 Subject: [PATCH 25/80] Add some doctest cleanups for `turtle` and `configparser` (#125288) Co-authored-by: Adam Turner <9087854+AA-Turner@users.noreply.github.com> --- Doc/library/configparser.rst | 1 + Doc/library/turtle.rst | 5 +++++ 2 files changed, 6 insertions(+) diff --git a/Doc/library/configparser.rst b/Doc/library/configparser.rst index b5c18bbccffb785..3aad6f7b5d2d20d 100644 --- a/Doc/library/configparser.rst +++ b/Doc/library/configparser.rst @@ -54,6 +54,7 @@ can be customized by end users easily. import os os.remove("example.ini") + os.remove("override.ini") Quick Start diff --git a/Doc/library/turtle.rst b/Doc/library/turtle.rst index da801d4dc1f5b36..efa4b6f8f1d3f9b 100644 --- a/Doc/library/turtle.rst +++ b/Doc/library/turtle.rst @@ -14,6 +14,11 @@ from turtle import * turtle = Turtle() +.. testcleanup:: + + import os + os.remove("my_drawing.ps") + -------------- Introduction From 2f8301cbfbdd2976d254a4a772b4879069dd4298 Mon Sep 17 00:00:00 2001 From: Richard Hansen Date: Thu, 10 Oct 2024 22:39:17 -0400 Subject: [PATCH 26/80] gh-124872: Rename blurb file to reference the correct issue (#125285) --- ....7tinr0.rst => 2024-10-10-02-56-24.gh-issue-124872.0mDDOq.rst} | 0 1 file changed, 0 insertions(+), 0 deletions(-) rename Misc/NEWS.d/next/Documentation/{2024-09-29-18-14-52.gh-issue-119333.7tinr0.rst => 2024-10-10-02-56-24.gh-issue-124872.0mDDOq.rst} (100%) diff --git a/Misc/NEWS.d/next/Documentation/2024-09-29-18-14-52.gh-issue-119333.7tinr0.rst b/Misc/NEWS.d/next/Documentation/2024-10-10-02-56-24.gh-issue-124872.0mDDOq.rst similarity index 100% rename from Misc/NEWS.d/next/Documentation/2024-09-29-18-14-52.gh-issue-119333.7tinr0.rst rename to Misc/NEWS.d/next/Documentation/2024-10-10-02-56-24.gh-issue-124872.0mDDOq.rst From c1913effeed4e4da4d5310a40ab518945001ffba Mon Sep 17 00:00:00 2001 From: Savannah Ostrowski Date: Thu, 10 Oct 2024 23:30:27 -0700 Subject: [PATCH 27/80] gh-125296: Fix strange fragment identifier for `name or flags` in argparse docs (#125297) --- Doc/library/argparse.rst | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Doc/library/argparse.rst b/Doc/library/argparse.rst index 4eb6fad41f11ef9..d337de87ca8f396 100644 --- a/Doc/library/argparse.rst +++ b/Doc/library/argparse.rst @@ -602,7 +602,7 @@ The add_argument() method The following sections describe how each of these are used. -.. _name_or_flags: +.. _`name or flags`: name or flags ^^^^^^^^^^^^^ From b12e99261e656585ffbaa395af7c5dbaee5ad1ad Mon Sep 17 00:00:00 2001 From: Sam Gross Date: Fri, 11 Oct 2024 03:56:01 -0400 Subject: [PATCH 28/80] gh-125221: Fix free-threading data race in `object.__reduce_ex__` (#125267) --- ...-10-10-14-47-13.gh-issue-125221.nfSQzT.rst | 2 ++ Objects/object.c | 8 ++++++++ Objects/typeobject.c | 20 +++++-------------- 3 files changed, 15 insertions(+), 15 deletions(-) create mode 100644 Misc/NEWS.d/next/Core_and_Builtins/2024-10-10-14-47-13.gh-issue-125221.nfSQzT.rst diff --git a/Misc/NEWS.d/next/Core_and_Builtins/2024-10-10-14-47-13.gh-issue-125221.nfSQzT.rst b/Misc/NEWS.d/next/Core_and_Builtins/2024-10-10-14-47-13.gh-issue-125221.nfSQzT.rst new file mode 100644 index 000000000000000..c79650c3a64febc --- /dev/null +++ b/Misc/NEWS.d/next/Core_and_Builtins/2024-10-10-14-47-13.gh-issue-125221.nfSQzT.rst @@ -0,0 +1,2 @@ +Fix possible race condition when calling :meth:`~object.__reduce_ex__` for the +first time in the free threading build. diff --git a/Objects/object.c b/Objects/object.c index 27d06cc081259d5..4a4c5bf7d7f08a4 100644 --- a/Objects/object.c +++ b/Objects/object.c @@ -2372,6 +2372,14 @@ _PyTypes_InitTypes(PyInterpreterState *interp) } } + // Cache __reduce__ from PyBaseObject_Type object + PyObject *baseobj_dict = _PyType_GetDict(&PyBaseObject_Type); + PyObject *baseobj_reduce = PyDict_GetItemWithError(baseobj_dict, &_Py_ID(__reduce__)); + if (baseobj_reduce == NULL && PyErr_Occurred()) { + return _PyStatus_ERR("Can't get __reduce__ from base object"); + } + _Py_INTERP_CACHED_OBJECT(interp, objreduce) = baseobj_reduce; + // Must be after static types are initialized if (_Py_initialize_generic(interp) < 0) { return _PyStatus_ERR("Can't initialize generic types"); diff --git a/Objects/typeobject.c b/Objects/typeobject.c index d90bb5825fd437d..6ca4406ec0ea2d7 100644 --- a/Objects/typeobject.c +++ b/Objects/typeobject.c @@ -7359,18 +7359,7 @@ static PyObject * object___reduce_ex___impl(PyObject *self, int protocol) /*[clinic end generated code: output=2e157766f6b50094 input=f326b43fb8a4c5ff]*/ { -#define objreduce \ - (_Py_INTERP_CACHED_OBJECT(_PyInterpreterState_GET(), objreduce)) - PyObject *reduce, *res; - - if (objreduce == NULL) { - PyObject *dict = lookup_tp_dict(&PyBaseObject_Type); - objreduce = PyDict_GetItemWithError(dict, &_Py_ID(__reduce__)); - if (objreduce == NULL && PyErr_Occurred()) { - return NULL; - } - } - + PyObject *reduce; if (PyObject_GetOptionalAttr(self, &_Py_ID(__reduce__), &reduce) < 0) { return NULL; } @@ -7384,10 +7373,12 @@ object___reduce_ex___impl(PyObject *self, int protocol) Py_DECREF(reduce); return NULL; } - override = (clsreduce != objreduce); + + PyInterpreterState *interp = _PyInterpreterState_GET(); + override = (clsreduce != _Py_INTERP_CACHED_OBJECT(interp, objreduce)); Py_DECREF(clsreduce); if (override) { - res = _PyObject_CallNoArgs(reduce); + PyObject *res = _PyObject_CallNoArgs(reduce); Py_DECREF(reduce); return res; } @@ -7396,7 +7387,6 @@ object___reduce_ex___impl(PyObject *self, int protocol) } return _common_reduce(self, protocol); -#undef objreduce } static PyObject * From 0135848059162ad81478a7776fec622d68a36524 Mon Sep 17 00:00:00 2001 From: Jan Kaliszewski Date: Fri, 11 Oct 2024 10:15:46 +0200 Subject: [PATCH 29/80] gh-125058: update `_thread` docs regarding interruptibility of `lock.acquire()` (#125141) --- Doc/library/_thread.rst | 8 +++++--- 1 file changed, 5 insertions(+), 3 deletions(-) diff --git a/Doc/library/_thread.rst b/Doc/library/_thread.rst index 5fd604c05380ac5..6a66fc4c64bc450 100644 --- a/Doc/library/_thread.rst +++ b/Doc/library/_thread.rst @@ -219,9 +219,11 @@ In addition to these methods, lock objects can also be used via the * Calling :func:`sys.exit` or raising the :exc:`SystemExit` exception is equivalent to calling :func:`_thread.exit`. -* It is not possible to interrupt the :meth:`~threading.Lock.acquire` method on - a lock --- the :exc:`KeyboardInterrupt` exception will happen after the lock - has been acquired. +* It is platform-dependent whether the :meth:`~threading.Lock.acquire` method + on a lock can be interrupted (so that the :exc:`KeyboardInterrupt` exception + will happen immediately, rather than only after the lock has been acquired or + the operation has timed out). It can be interrupted on POSIX, but not on + Windows. * When the main thread exits, it is system defined whether the other threads survive. On most systems, they are killed without executing From 18c74497681e0107d7cde53e63ea42feb38f2176 Mon Sep 17 00:00:00 2001 From: Serhiy Storchaka Date: Fri, 11 Oct 2024 11:43:29 +0300 Subject: [PATCH 30/80] gh-61011: Fix inheritance of nested mutually exclusive groups in argparse (GH-125210) Previously, all nested mutually exclusive groups lost their connection to the group containing them and were displayed as belonging directly to the parser. Co-authored-by: Danica J. Sutherland --- Lib/argparse.py | 6 +++- Lib/test/test_argparse.py | 29 +++++++++++++++++++ Misc/ACKS | 1 + ...4-10-09-21-42-43.gh-issue-61011.pQXZb1.rst | 4 +++ 4 files changed, 39 insertions(+), 1 deletion(-) create mode 100644 Misc/NEWS.d/next/Library/2024-10-09-21-42-43.gh-issue-61011.pQXZb1.rst diff --git a/Lib/argparse.py b/Lib/argparse.py index d1f8fa2ace8611a..2d8a7ef343a4ef8 100644 --- a/Lib/argparse.py +++ b/Lib/argparse.py @@ -1521,7 +1521,11 @@ def _add_container_actions(self, container): # NOTE: if add_mutually_exclusive_group ever gains title= and # description= then this code will need to be expanded as above for group in container._mutually_exclusive_groups: - mutex_group = self.add_mutually_exclusive_group( + if group._container is container: + cont = self + else: + cont = title_group_map[group._container.title] + mutex_group = cont.add_mutually_exclusive_group( required=group.required) # map the actions to their new mutex group diff --git a/Lib/test/test_argparse.py b/Lib/test/test_argparse.py index c9e79eb18a08fb9..1ebbc21bc1755b5 100644 --- a/Lib/test/test_argparse.py +++ b/Lib/test/test_argparse.py @@ -2942,6 +2942,35 @@ def test_groups_parents(self): def test_wrong_type_parents(self): self.assertRaises(TypeError, ErrorRaisingArgumentParser, parents=[1]) + def test_mutex_groups_parents(self): + parent = ErrorRaisingArgumentParser(add_help=False) + g = parent.add_argument_group(title='g', description='gd') + g.add_argument('-w') + g.add_argument('-x') + m = g.add_mutually_exclusive_group() + m.add_argument('-y') + m.add_argument('-z') + parser = ErrorRaisingArgumentParser(prog='PROG', parents=[parent]) + + self.assertRaises(ArgumentParserError, parser.parse_args, + ['-y', 'Y', '-z', 'Z']) + + parser_help = parser.format_help() + self.assertEqual(parser_help, textwrap.dedent('''\ + usage: PROG [-h] [-w W] [-x X] [-y Y | -z Z] + + options: + -h, --help show this help message and exit + + g: + gd + + -w W + -x X + -y Y + -z Z + ''')) + # ============================== # Mutually exclusive group tests # ============================== diff --git a/Misc/ACKS b/Misc/ACKS index d94cbacf8884680..a1769d9601a2ea4 100644 --- a/Misc/ACKS +++ b/Misc/ACKS @@ -1814,6 +1814,7 @@ Reuben Sumner Eryk Sun Sanjay Sundaresan Marek Šuppa +Danica J. Sutherland Hisao Suzuki Kalle Svensson Andrew Svetlov diff --git a/Misc/NEWS.d/next/Library/2024-10-09-21-42-43.gh-issue-61011.pQXZb1.rst b/Misc/NEWS.d/next/Library/2024-10-09-21-42-43.gh-issue-61011.pQXZb1.rst new file mode 100644 index 000000000000000..20f9c0b9c78b121 --- /dev/null +++ b/Misc/NEWS.d/next/Library/2024-10-09-21-42-43.gh-issue-61011.pQXZb1.rst @@ -0,0 +1,4 @@ +Fix inheritance of nested mutually exclusive groups from parent parser in +:class:`argparse.ArgumentParser`. Previously, all nested mutually exclusive +groups lost their connection to the group containing them and were displayed +as belonging directly to the parser. From b3aa1b5fe260382788a2df416599325ad680a5ee Mon Sep 17 00:00:00 2001 From: Y5 <124019959+y5c4l3@users.noreply.github.com> Date: Fri, 11 Oct 2024 09:08:03 +0000 Subject: [PATCH 31/80] gh-125235: Keep `_tkinter` TCL paths pointing to base installation on Windows (#125250) Signed-off-by: y5c4l3 --- .../next/Library/2024-10-10-18-33-31.gh-issue-125235.0kOB5I.rst | 2 ++ Modules/_tkinter.c | 2 +- 2 files changed, 3 insertions(+), 1 deletion(-) create mode 100644 Misc/NEWS.d/next/Library/2024-10-10-18-33-31.gh-issue-125235.0kOB5I.rst diff --git a/Misc/NEWS.d/next/Library/2024-10-10-18-33-31.gh-issue-125235.0kOB5I.rst b/Misc/NEWS.d/next/Library/2024-10-10-18-33-31.gh-issue-125235.0kOB5I.rst new file mode 100644 index 000000000000000..f64d15917da1fc5 --- /dev/null +++ b/Misc/NEWS.d/next/Library/2024-10-10-18-33-31.gh-issue-125235.0kOB5I.rst @@ -0,0 +1,2 @@ +Keep :mod:`tkinter` TCL paths in venv pointing to base installation on +Windows. diff --git a/Modules/_tkinter.c b/Modules/_tkinter.c index 4f05cab375ed6b7..b0b70ccb8cc3d3b 100644 --- a/Modules/_tkinter.c +++ b/Modules/_tkinter.c @@ -143,7 +143,7 @@ _get_tcl_lib_path(void) struct stat stat_buf; int stat_return_value; - PyObject *prefix = PySys_GetObject("prefix"); // borrowed reference + PyObject *prefix = PySys_GetObject("base_prefix"); // borrowed reference if (prefix == NULL) { return NULL; } From 2115d76acc14effb3dbb9fedcf21048b2ad62c5e Mon Sep 17 00:00:00 2001 From: sobolevn Date: Fri, 11 Oct 2024 17:39:18 +0300 Subject: [PATCH 32/80] gh-124787: Fix `TypeAliasType` and incorrect `type_params` (#124795) Co-authored-by: Jelle Zijlstra --- Lib/test/test_type_aliases.py | 44 ++++++++- ...-09-30-20-46-32.gh-issue-124787.3FnJnP.rst | 4 + Objects/typevarobject.c | 97 ++++++++++++++++--- 3 files changed, 133 insertions(+), 12 deletions(-) create mode 100644 Misc/NEWS.d/next/Library/2024-09-30-20-46-32.gh-issue-124787.3FnJnP.rst diff --git a/Lib/test/test_type_aliases.py b/Lib/test/test_type_aliases.py index ebb65d8c6cf81b3..230bbe646baf281 100644 --- a/Lib/test/test_type_aliases.py +++ b/Lib/test/test_type_aliases.py @@ -4,7 +4,9 @@ from test.support import check_syntax_error, run_code from test.typinganndata import mod_generics_cache -from typing import Callable, TypeAliasType, TypeVar, get_args +from typing import ( + Callable, TypeAliasType, TypeVar, TypeVarTuple, ParamSpec, get_args, +) class TypeParamsInvalidTest(unittest.TestCase): @@ -225,6 +227,46 @@ def test_not_generic(self): ): TA[int] + def test_type_params_order_with_defaults(self): + HasNoDefaultT = TypeVar("HasNoDefaultT") + WithDefaultT = TypeVar("WithDefaultT", default=int) + + HasNoDefaultP = ParamSpec("HasNoDefaultP") + WithDefaultP = ParamSpec("WithDefaultP", default=HasNoDefaultP) + + HasNoDefaultTT = TypeVarTuple("HasNoDefaultTT") + WithDefaultTT = TypeVarTuple("WithDefaultTT", default=HasNoDefaultTT) + + for type_params in [ + (HasNoDefaultT, WithDefaultT), + (HasNoDefaultP, WithDefaultP), + (HasNoDefaultTT, WithDefaultTT), + ]: + with self.subTest(type_params=type_params): + TypeAliasType("A", int, type_params=type_params) # ok + + msg = "follows default type parameter" + for type_params in [ + (WithDefaultT, HasNoDefaultT), + (WithDefaultP, HasNoDefaultP), + (WithDefaultTT, HasNoDefaultTT), + (WithDefaultT, HasNoDefaultP), # different types + ]: + with self.subTest(type_params=type_params): + with self.assertRaisesRegex(TypeError, msg): + TypeAliasType("A", int, type_params=type_params) + + def test_expects_type_like(self): + T = TypeVar("T") + + msg = "Expected a type param" + with self.assertRaisesRegex(TypeError, msg): + TypeAliasType("A", int, type_params=(1,)) + with self.assertRaisesRegex(TypeError, msg): + TypeAliasType("A", int, type_params=(1, 2)) + with self.assertRaisesRegex(TypeError, msg): + TypeAliasType("A", int, type_params=(T, 2)) + def test_keywords(self): TA = TypeAliasType(name="TA", value=int) self.assertEqual(TA.__name__, "TA") diff --git a/Misc/NEWS.d/next/Library/2024-09-30-20-46-32.gh-issue-124787.3FnJnP.rst b/Misc/NEWS.d/next/Library/2024-09-30-20-46-32.gh-issue-124787.3FnJnP.rst new file mode 100644 index 000000000000000..d9d1bbcf5a2fe4b --- /dev/null +++ b/Misc/NEWS.d/next/Library/2024-09-30-20-46-32.gh-issue-124787.3FnJnP.rst @@ -0,0 +1,4 @@ +Fix :class:`typing.TypeAliasType` with incorrect ``type_params`` argument. +Now it raises a :exc:`TypeError` when a type parameter without a default +follows one with a default, and when an entry in the ``type_params`` tuple +is not a type parameter object. diff --git a/Objects/typevarobject.c b/Objects/typevarobject.c index 51d93ed8b5ba8c8..91cc37c9a726365 100644 --- a/Objects/typevarobject.c +++ b/Objects/typevarobject.c @@ -1799,6 +1799,24 @@ _Py_make_typevartuple(PyThreadState *Py_UNUSED(ignored), PyObject *v) return (PyObject *)typevartuple_alloc(v, NULL, NULL); } +static PyObject * +get_type_param_default(PyThreadState *ts, PyObject *typeparam) { + // Does not modify refcount of existing objects. + if (Py_IS_TYPE(typeparam, ts->interp->cached_objects.typevar_type)) { + return typevar_default((typevarobject *)typeparam, NULL); + } + else if (Py_IS_TYPE(typeparam, ts->interp->cached_objects.paramspec_type)) { + return paramspec_default((paramspecobject *)typeparam, NULL); + } + else if (Py_IS_TYPE(typeparam, ts->interp->cached_objects.typevartuple_type)) { + return typevartuple_default((typevartupleobject *)typeparam, NULL); + } + else { + PyErr_Format(PyExc_TypeError, "Expected a type param, got %R", typeparam); + return NULL; + } +} + static void typealias_dealloc(PyObject *self) { @@ -1906,25 +1924,75 @@ static PyGetSetDef typealias_getset[] = { {0} }; -static typealiasobject * -typealias_alloc(PyObject *name, PyObject *type_params, PyObject *compute_value, - PyObject *value, PyObject *module) -{ - typealiasobject *ta = PyObject_GC_New(typealiasobject, &_PyTypeAlias_Type); - if (ta == NULL) { +static PyObject * +typealias_check_type_params(PyObject *type_params, int *err) { + // Can return type_params or NULL without exception set. + // Does not change the reference count of type_params, + // sets `*err` to 1 when error happens and sets an exception, + // otherwise `*err` is set to 0. + *err = 0; + if (type_params == NULL) { return NULL; } - ta->name = Py_NewRef(name); + + assert(PyTuple_Check(type_params)); + Py_ssize_t length = PyTuple_GET_SIZE(type_params); + if (!length) { // 0-length tuples are the same as `NULL`. + return NULL; + } + + PyThreadState *ts = _PyThreadState_GET(); + int default_seen = 0; + for (Py_ssize_t index = 0; index < length; index++) { + PyObject *type_param = PyTuple_GET_ITEM(type_params, index); + PyObject *dflt = get_type_param_default(ts, type_param); + if (dflt == NULL) { + *err = 1; + return NULL; + } + if (dflt == &_Py_NoDefaultStruct) { + if (default_seen) { + *err = 1; + PyErr_Format(PyExc_TypeError, + "non-default type parameter '%R' " + "follows default type parameter", + type_param); + return NULL; + } + } else { + default_seen = 1; + Py_DECREF(dflt); + } + } + + return type_params; +} + +static PyObject * +typelias_convert_type_params(PyObject *type_params) +{ if ( type_params == NULL || Py_IsNone(type_params) || (PyTuple_Check(type_params) && PyTuple_GET_SIZE(type_params) == 0) ) { - ta->type_params = NULL; + return NULL; } else { - ta->type_params = Py_NewRef(type_params); + return type_params; } +} + +static typealiasobject * +typealias_alloc(PyObject *name, PyObject *type_params, PyObject *compute_value, + PyObject *value, PyObject *module) +{ + typealiasobject *ta = PyObject_GC_New(typealiasobject, &_PyTypeAlias_Type); + if (ta == NULL) { + return NULL; + } + ta->name = Py_NewRef(name); + ta->type_params = Py_XNewRef(type_params); ta->compute_value = Py_XNewRef(compute_value); ta->value = Py_XNewRef(value); ta->module = Py_XNewRef(module); @@ -2002,11 +2070,18 @@ typealias_new_impl(PyTypeObject *type, PyObject *name, PyObject *value, PyErr_SetString(PyExc_TypeError, "type_params must be a tuple"); return NULL; } + + int err = 0; + PyObject *checked_params = typealias_check_type_params(type_params, &err); + if (err) { + return NULL; + } + PyObject *module = caller(); if (module == NULL) { return NULL; } - PyObject *ta = (PyObject *)typealias_alloc(name, type_params, NULL, value, + PyObject *ta = (PyObject *)typealias_alloc(name, checked_params, NULL, value, module); Py_DECREF(module); return ta; @@ -2072,7 +2147,7 @@ _Py_make_typealias(PyThreadState* unused, PyObject *args) assert(PyTuple_GET_SIZE(args) == 3); PyObject *name = PyTuple_GET_ITEM(args, 0); assert(PyUnicode_Check(name)); - PyObject *type_params = PyTuple_GET_ITEM(args, 1); + PyObject *type_params = typelias_convert_type_params(PyTuple_GET_ITEM(args, 1)); PyObject *compute_value = PyTuple_GET_ITEM(args, 2); assert(PyFunction_Check(compute_value)); return (PyObject *)typealias_alloc(name, type_params, compute_value, NULL, NULL); From 08f6bf717118963815d9a3e60578104470fdf3e1 Mon Sep 17 00:00:00 2001 From: Donghee Na Date: Sat, 12 Oct 2024 00:27:26 +0900 Subject: [PATCH 33/80] gh-124612: Update autoconf container image (#125320) --- Tools/build/regen-configure.sh | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Tools/build/regen-configure.sh b/Tools/build/regen-configure.sh index efc80c8527885c5..1a24b07c3ff7073 100755 --- a/Tools/build/regen-configure.sh +++ b/Tools/build/regen-configure.sh @@ -5,7 +5,7 @@ set -e -x # The check_generated_files job of .github/workflows/build.yml must kept in # sync with this script. Use the same container image than the job so the job # doesn't need to run autoreconf in a container. -IMAGE="ghcr.io/python/autoconf:2024.10.06.11200919239" +IMAGE="ghcr.io/python/autoconf:2024.10.11.11293396815" AUTORECONF="autoreconf -ivf -Werror" WORK_DIR="/src" From a00221e5a70e54a281ba0e2cff8d85cd37ae305f Mon Sep 17 00:00:00 2001 From: AN Long Date: Sat, 12 Oct 2024 01:55:36 +0800 Subject: [PATCH 34/80] gh-116738: Make `_csv` module thread-safe (#118344) --- Modules/_csv.c | 20 +++++++++++--------- 1 file changed, 11 insertions(+), 9 deletions(-) diff --git a/Modules/_csv.c b/Modules/_csv.c index 913560ce4a0ee30..1a4dc3f1f55acee 100644 --- a/Modules/_csv.c +++ b/Modules/_csv.c @@ -14,6 +14,7 @@ module instead. #endif #include "Python.h" +#include "pycore_pyatomic_ft_wrappers.h" #include // offsetof() #include @@ -34,7 +35,7 @@ typedef struct { PyTypeObject *dialect_type; PyTypeObject *reader_type; PyTypeObject *writer_type; - long field_limit; /* max parsed field size */ + Py_ssize_t field_limit; /* max parsed field size */ PyObject *str_write; } _csvstate; @@ -706,10 +707,11 @@ parse_grow_buff(ReaderObj *self) static int parse_add_char(ReaderObj *self, _csvstate *module_state, Py_UCS4 c) { - if (self->field_len >= module_state->field_limit) { + Py_ssize_t field_limit = FT_ATOMIC_LOAD_SSIZE_RELAXED(module_state->field_limit); + if (self->field_len >= field_limit) { PyErr_Format(module_state->error_obj, - "field larger than field limit (%ld)", - module_state->field_limit); + "field larger than field limit (%zd)", + field_limit); return -1; } if (self->field_len == self->field_size && !parse_grow_buff(self)) @@ -1659,20 +1661,20 @@ _csv_field_size_limit_impl(PyObject *module, PyObject *new_limit) /*[clinic end generated code: output=f2799ecd908e250b input=cec70e9226406435]*/ { _csvstate *module_state = get_csv_state(module); - long old_limit = module_state->field_limit; + Py_ssize_t old_limit = FT_ATOMIC_LOAD_SSIZE_RELAXED(module_state->field_limit); if (new_limit != NULL) { if (!PyLong_CheckExact(new_limit)) { PyErr_Format(PyExc_TypeError, "limit must be an integer"); return NULL; } - module_state->field_limit = PyLong_AsLong(new_limit); - if (module_state->field_limit == -1 && PyErr_Occurred()) { - module_state->field_limit = old_limit; + Py_ssize_t new_limit_value = PyLong_AsSsize_t(new_limit); + if (new_limit_value == -1 && PyErr_Occurred()) { return NULL; } + FT_ATOMIC_STORE_SSIZE_RELAXED(module_state->field_limit, new_limit_value); } - return PyLong_FromLong(old_limit); + return PyLong_FromSsize_t(old_limit); } static PyType_Slot error_slots[] = { From cc2938a18967c9d462ebb18bc09f73e4364aa7d2 Mon Sep 17 00:00:00 2001 From: Jelle Zijlstra Date: Fri, 11 Oct 2024 12:41:59 -0700 Subject: [PATCH 35/80] gh-124917: Allow keyword args to os.path.exists/lexists on Windows (#124918) --- Lib/test/test_genericpath.py | 4 + ...-10-02-21-11-18.gh-issue-124917.Lnwh5b.rst | 2 + Modules/clinic/posixmodule.c.h | 78 ++++++++++++++++--- Modules/posixmodule.c | 6 +- 4 files changed, 77 insertions(+), 13 deletions(-) create mode 100644 Misc/NEWS.d/next/Library/2024-10-02-21-11-18.gh-issue-124917.Lnwh5b.rst diff --git a/Lib/test/test_genericpath.py b/Lib/test/test_genericpath.py index bf04b3fecf7057a..6d2593cb4cf2288 100644 --- a/Lib/test/test_genericpath.py +++ b/Lib/test/test_genericpath.py @@ -156,6 +156,10 @@ def test_exists(self): self.assertIs(self.pathmodule.lexists(filename + '\x00'), False) self.assertIs(self.pathmodule.lexists(bfilename + b'\x00'), False) + # Keyword arguments are accepted + self.assertIs(self.pathmodule.exists(path=filename), True) + self.assertIs(self.pathmodule.lexists(path=filename), True) + @unittest.skipUnless(hasattr(os, "pipe"), "requires os.pipe()") @unittest.skipIf(is_emscripten, "Emscripten pipe fds have no stat") def test_exists_fd(self): diff --git a/Misc/NEWS.d/next/Library/2024-10-02-21-11-18.gh-issue-124917.Lnwh5b.rst b/Misc/NEWS.d/next/Library/2024-10-02-21-11-18.gh-issue-124917.Lnwh5b.rst new file mode 100644 index 000000000000000..f208793859bbf86 --- /dev/null +++ b/Misc/NEWS.d/next/Library/2024-10-02-21-11-18.gh-issue-124917.Lnwh5b.rst @@ -0,0 +1,2 @@ +Allow calling :func:`os.path.exists` and :func:`os.path.lexists` with +keyword arguments on Windows. Fixes a regression in 3.13.0. diff --git a/Modules/clinic/posixmodule.c.h b/Modules/clinic/posixmodule.c.h index 749fe54598cc391..d9d919ea75d853a 100644 --- a/Modules/clinic/posixmodule.c.h +++ b/Modules/clinic/posixmodule.c.h @@ -2015,25 +2015,55 @@ os__path_splitroot(PyObject *module, PyObject *const *args, Py_ssize_t nargs, Py #if defined(MS_WINDOWS) PyDoc_STRVAR(os__path_exists__doc__, -"_path_exists($module, path, /)\n" +"_path_exists($module, /, path)\n" "--\n" "\n" "Test whether a path exists. Returns False for broken symbolic links."); #define OS__PATH_EXISTS_METHODDEF \ - {"_path_exists", (PyCFunction)os__path_exists, METH_O, os__path_exists__doc__}, + {"_path_exists", _PyCFunction_CAST(os__path_exists), METH_FASTCALL|METH_KEYWORDS, os__path_exists__doc__}, static int os__path_exists_impl(PyObject *module, path_t *path); static PyObject * -os__path_exists(PyObject *module, PyObject *arg) +os__path_exists(PyObject *module, PyObject *const *args, Py_ssize_t nargs, PyObject *kwnames) { PyObject *return_value = NULL; + #if defined(Py_BUILD_CORE) && !defined(Py_BUILD_CORE_MODULE) + + #define NUM_KEYWORDS 1 + static struct { + PyGC_Head _this_is_not_used; + PyObject_VAR_HEAD + PyObject *ob_item[NUM_KEYWORDS]; + } _kwtuple = { + .ob_base = PyVarObject_HEAD_INIT(&PyTuple_Type, NUM_KEYWORDS) + .ob_item = { &_Py_ID(path), }, + }; + #undef NUM_KEYWORDS + #define KWTUPLE (&_kwtuple.ob_base.ob_base) + + #else // !Py_BUILD_CORE + # define KWTUPLE NULL + #endif // !Py_BUILD_CORE + + static const char * const _keywords[] = {"path", NULL}; + static _PyArg_Parser _parser = { + .keywords = _keywords, + .fname = "_path_exists", + .kwtuple = KWTUPLE, + }; + #undef KWTUPLE + PyObject *argsbuf[1]; path_t path = PATH_T_INITIALIZE_P("_path_exists", "path", 0, 0, 1, 1); int _return_value; - if (!path_converter(arg, &path)) { + args = _PyArg_UnpackKeywords(args, nargs, NULL, kwnames, &_parser, 1, 1, 0, argsbuf); + if (!args) { + goto exit; + } + if (!path_converter(args[0], &path)) { goto exit; } _return_value = os__path_exists_impl(module, &path); @@ -2054,25 +2084,55 @@ os__path_exists(PyObject *module, PyObject *arg) #if defined(MS_WINDOWS) PyDoc_STRVAR(os__path_lexists__doc__, -"_path_lexists($module, path, /)\n" +"_path_lexists($module, /, path)\n" "--\n" "\n" "Test whether a path exists. Returns True for broken symbolic links."); #define OS__PATH_LEXISTS_METHODDEF \ - {"_path_lexists", (PyCFunction)os__path_lexists, METH_O, os__path_lexists__doc__}, + {"_path_lexists", _PyCFunction_CAST(os__path_lexists), METH_FASTCALL|METH_KEYWORDS, os__path_lexists__doc__}, static int os__path_lexists_impl(PyObject *module, path_t *path); static PyObject * -os__path_lexists(PyObject *module, PyObject *arg) +os__path_lexists(PyObject *module, PyObject *const *args, Py_ssize_t nargs, PyObject *kwnames) { PyObject *return_value = NULL; + #if defined(Py_BUILD_CORE) && !defined(Py_BUILD_CORE_MODULE) + + #define NUM_KEYWORDS 1 + static struct { + PyGC_Head _this_is_not_used; + PyObject_VAR_HEAD + PyObject *ob_item[NUM_KEYWORDS]; + } _kwtuple = { + .ob_base = PyVarObject_HEAD_INIT(&PyTuple_Type, NUM_KEYWORDS) + .ob_item = { &_Py_ID(path), }, + }; + #undef NUM_KEYWORDS + #define KWTUPLE (&_kwtuple.ob_base.ob_base) + + #else // !Py_BUILD_CORE + # define KWTUPLE NULL + #endif // !Py_BUILD_CORE + + static const char * const _keywords[] = {"path", NULL}; + static _PyArg_Parser _parser = { + .keywords = _keywords, + .fname = "_path_lexists", + .kwtuple = KWTUPLE, + }; + #undef KWTUPLE + PyObject *argsbuf[1]; path_t path = PATH_T_INITIALIZE_P("_path_lexists", "path", 0, 0, 1, 1); int _return_value; - if (!path_converter(arg, &path)) { + args = _PyArg_UnpackKeywords(args, nargs, NULL, kwnames, &_parser, 1, 1, 0, argsbuf); + if (!args) { + goto exit; + } + if (!path_converter(args[0], &path)) { goto exit; } _return_value = os__path_lexists_impl(module, &path); @@ -12837,4 +12897,4 @@ os__create_environ(PyObject *module, PyObject *Py_UNUSED(ignored)) #ifndef OS__SUPPORTS_VIRTUAL_TERMINAL_METHODDEF #define OS__SUPPORTS_VIRTUAL_TERMINAL_METHODDEF #endif /* !defined(OS__SUPPORTS_VIRTUAL_TERMINAL_METHODDEF) */ -/*[clinic end generated code: output=b93bbaaa8eb5b0ce input=a9049054013a1b77]*/ +/*[clinic end generated code: output=18d75b737513dae6 input=a9049054013a1b77]*/ diff --git a/Modules/posixmodule.c b/Modules/posixmodule.c index 334350285f3b6f7..c0af78ba075e85d 100644 --- a/Modules/posixmodule.c +++ b/Modules/posixmodule.c @@ -5391,7 +5391,6 @@ _testFileType(path_t *path, int testedType) os._path_exists -> bool path: path_t(allow_fd=True, suppress_value_error=True) - / Test whether a path exists. Returns False for broken symbolic links. @@ -5399,7 +5398,7 @@ Test whether a path exists. Returns False for broken symbolic links. static int os__path_exists_impl(PyObject *module, path_t *path) -/*[clinic end generated code: output=8da13acf666e16ba input=29198507a6082a57]*/ +/*[clinic end generated code: output=8da13acf666e16ba input=142beabfc66783eb]*/ { return _testFileExists(path, TRUE); } @@ -5409,7 +5408,6 @@ os__path_exists_impl(PyObject *module, path_t *path) os._path_lexists -> bool path: path_t(allow_fd=True, suppress_value_error=True) - / Test whether a path exists. Returns True for broken symbolic links. @@ -5417,7 +5415,7 @@ Test whether a path exists. Returns True for broken symbolic links. static int os__path_lexists_impl(PyObject *module, path_t *path) -/*[clinic end generated code: output=e7240ed5fc45bff3 input=03d9fed8bc6ce96f]*/ +/*[clinic end generated code: output=e7240ed5fc45bff3 input=208205112a3cc1ed]*/ { return _testFileExists(path, FALSE); } From 76b29d271b3132bf8e13bc724f10be8c630057ba Mon Sep 17 00:00:00 2001 From: Rafael Fontenelle Date: Fri, 11 Oct 2024 17:00:31 -0300 Subject: [PATCH 36/80] Fix typo in ``Doclibrary/functions.rst`` (#125327) --- Doc/library/functions.rst | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Doc/library/functions.rst b/Doc/library/functions.rst index 7f8df704a333276..0638df04c6ff407 100644 --- a/Doc/library/functions.rst +++ b/Doc/library/functions.rst @@ -686,7 +686,7 @@ are always available. They are listed here in alphabetical order. The *closure* argument specifies a closure--a tuple of cellvars. It's only valid when the *object* is a code object containing :term:`free (closure) variables `. - The length of the tuple must exactly match the length of the code object'S + The length of the tuple must exactly match the length of the code object's :attr:`~codeobject.co_freevars` attribute. .. audit-event:: exec code_object exec From 89515be596a0ca05fd9ab4ddf76c8013dd093545 Mon Sep 17 00:00:00 2001 From: Irit Katriel <1055913+iritkatriel@users.noreply.github.com> Date: Fri, 11 Oct 2024 21:18:37 +0100 Subject: [PATCH 37/80] gh-119786: Move garbage collection doc from devguide to InternalDocs (#125282) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Co-Authored-By: Carol Willing carolcode@willingconsulting.com Co-Authored-By: Ezio Melotti ezio.melotti@gmail.com Co-Authored-By: Hugo van Kemenade hugovk@users.noreply.github.com Co-Authored-By: Itamar Ostricher itamarost@gmail.com Co-Authored-By: Jesús Cea jcea@jcea.es Co-Authored-By: Joannah Nanjekye 33177550+nanjekyejoannah@users.noreply.github.com Co-Authored-By: Ned Batchelder ned@nedbatchelder.com Co-Authored-By: Pablo Galindo Salgado Pablogsal@gmail.com Co-Authored-By: Pamela Fox pamela.fox@gmail.com Co-Authored-By: Sam Gross colesbury@gmail.com Co-Authored-By: Stefan Pochmann 609905+pochmann@users.noreply.github.com Co-Authored-By: T. Wouters thomas@python.org Co-Authored-By: q-ata 24601033+q-ata@users.noreply.github.com Co-Authored-By: slateny 46876382+slateny@users.noreply.github.com Co-Authored-By: Борис Верховский boris.verk@gmail.com Co-authored-by: Adam Turner <9087854+AA-Turner@users.noreply.github.com> Co-authored-by: Jacob Coffee --- InternalDocs/README.md | 2 + InternalDocs/garbage_collector.md | 596 ++++++++++++++++++ .../images/python-cyclic-gc-1-new-page.png | Bin 0 -> 4415 bytes .../images/python-cyclic-gc-2-new-page.png | Bin 0 -> 4337 bytes .../images/python-cyclic-gc-3-new-page.png | Bin 0 -> 4876 bytes .../images/python-cyclic-gc-4-new-page.png | Bin 0 -> 4863 bytes .../images/python-cyclic-gc-5-new-page.png | Bin 0 -> 5712 bytes 7 files changed, 598 insertions(+) create mode 100644 InternalDocs/garbage_collector.md create mode 100644 InternalDocs/images/python-cyclic-gc-1-new-page.png create mode 100644 InternalDocs/images/python-cyclic-gc-2-new-page.png create mode 100644 InternalDocs/images/python-cyclic-gc-3-new-page.png create mode 100644 InternalDocs/images/python-cyclic-gc-4-new-page.png create mode 100644 InternalDocs/images/python-cyclic-gc-5-new-page.png diff --git a/InternalDocs/README.md b/InternalDocs/README.md index 8956ecafed2039b..805e2f97937e1eb 100644 --- a/InternalDocs/README.md +++ b/InternalDocs/README.md @@ -22,4 +22,6 @@ it is not, please report that through the [The Source Code Locations Table](locations.md) +[Garbage collector design](garbage_collector.md) + [Exception Handling](exception_handling.md) diff --git a/InternalDocs/garbage_collector.md b/InternalDocs/garbage_collector.md new file mode 100644 index 000000000000000..fd0246fa1a60e29 --- /dev/null +++ b/InternalDocs/garbage_collector.md @@ -0,0 +1,596 @@ + +Garbage collector design +======================== + +Abstract +======== + +The main garbage collection algorithm used by CPython is reference counting. The basic idea is +that CPython counts how many different places there are that have a reference to an +object. Such a place could be another object, or a global (or static) C variable, or +a local variable in some C function. When an object’s reference count becomes zero, +the object is deallocated. If it contains references to other objects, their +reference counts are decremented. Those other objects may be deallocated in turn, if +this decrement makes their reference count become zero, and so on. The reference +count field can be examined using the ``sys.getrefcount()`` function (notice that the +value returned by this function is always 1 more as the function also has a reference +to the object when called): + +```pycon + >>> x = object() + >>> sys.getrefcount(x) + 2 + >>> y = x + >>> sys.getrefcount(x) + 3 + >>> del y + >>> sys.getrefcount(x) + 2 +``` + +The main problem with the reference counting scheme is that it does not handle reference +cycles. For instance, consider this code: + +```pycon + >>> container = [] + >>> container.append(container) + >>> sys.getrefcount(container) + 3 + >>> del container +``` + +In this example, ``container`` holds a reference to itself, so even when we remove +our reference to it (the variable "container") the reference count never falls to 0 +because it still has its own internal reference. Therefore it would never be +cleaned just by simple reference counting. For this reason some additional machinery +is needed to clean these reference cycles between objects once they become +unreachable. This is the cyclic garbage collector, usually called just Garbage +Collector (GC), even though reference counting is also a form of garbage collection. + +Starting in version 3.13, CPython contains two GC implementations: + +- The default build implementation relies on the + [global interpreter lock](https://docs.python.org/3/glossary.html#term-global-interpreter-lock) + for thread safety. +- The free-threaded build implementation pauses other executing threads when + performing a collection for thread safety. + +Both implementations use the same basic algorithms, but operate on different +data structures. The the section on +[Differences between GC implementations](#Differences-between-GC-implementations) +for the details. + + +Memory layout and object structure +================================== + +The garbage collector requires additional fields in Python objects to support +garbage collection. These extra fields are different in the default and the +free-threaded builds. + + +GC for the default build +------------------------ + +Normally the C structure supporting a regular Python object looks as follows: + +``` + object -----> +--+--+--+--+--+--+--+--+--+--+--+--+--+--+--+--+ \ + | ob_refcnt | | + +--+--+--+--+--+--+--+--+--+--+--+--+--+--+--+--+ | PyObject_HEAD + | *ob_type | | + +--+--+--+--+--+--+--+--+--+--+--+--+--+--+--+--+ / + | ... | +``` + +In order to support the garbage collector, the memory layout of objects is altered +to accommodate extra information **before** the normal layout: + +``` + +--+--+--+--+--+--+--+--+--+--+--+--+--+--+--+--+ \ + | *_gc_next | | + +--+--+--+--+--+--+--+--+--+--+--+--+--+--+--+--+ | PyGC_Head + | *_gc_prev | | + object -----> +--+--+--+--+--+--+--+--+--+--+--+--+--+--+--+--+ / + | ob_refcnt | \ + +--+--+--+--+--+--+--+--+--+--+--+--+--+--+--+--+ | PyObject_HEAD + | *ob_type | | + +--+--+--+--+--+--+--+--+--+--+--+--+--+--+--+--+ / + | ... | +``` + + +In this way the object can be treated as a normal python object and when the extra +information associated to the GC is needed the previous fields can be accessed by a +simple type cast from the original object: `((PyGC_Head *)(the_object)-1)`. + +As is explained later in the +[Optimization: reusing fields to save memory](#optimization-reusing-fields-to-save-memory) +section, these two extra fields are normally used to keep doubly linked lists of all the +objects tracked by the garbage collector (these lists are the GC generations, more on +that in the [Optimization: generations](#Optimization-generations) section), but +they are also reused to fulfill other purposes when the full doubly linked list +structure is not needed as a memory optimization. + +Doubly linked lists are used because they efficiently support the most frequently required operations. In +general, the collection of all objects tracked by GC is partitioned into disjoint sets, each in its own +doubly linked list. Between collections, objects are partitioned into "generations", reflecting how +often they've survived collection attempts. During collections, the generation(s) being collected +are further partitioned into, for example, sets of reachable and unreachable objects. Doubly linked lists +support moving an object from one partition to another, adding a new object, removing an object +entirely (objects tracked by GC are most often reclaimed by the refcounting system when GC +isn't running at all!), and merging partitions, all with a small constant number of pointer updates. +With care, they also support iterating over a partition while objects are being added to - and +removed from - it, which is frequently required while GC is running. + +GC for the free-threaded build +------------------------------ + +In the free-threaded build, Python objects contain a 1-byte field +``ob_gc_bits`` that is used to track garbage collection related state. The +field exists in all objects, including ones that do not support cyclic +garbage collection. The field is used to identify objects that are tracked +by the collector, ensure that finalizers are called only once per object, +and, during garbage collection, differentiate reachable vs. unreachable objects. + +``` + object -----> +--+--+--+--+--+--+--+--+--+--+--+--+--+--+--+--+ \ + | ob_tid | | + +--+--+--+--+--+--+--+--+--+--+--+--+--+--+--+--+ | + | pad | ob_mutex | ob_gc_bits | ob_ref_local | | + +--+--+--+--+--+--+--+--+--+--+--+--+--+--+--+--+ | PyObject_HEAD + | ob_ref_shared | | + +--+--+--+--+--+--+--+--+--+--+--+--+--+--+--+--+ | + | *ob_type | | + +--+--+--+--+--+--+--+--+--+--+--+--+--+--+--+--+ / + | ... | +``` + +Note that not all fields are to scale. ``pad`` is two bytes, ``ob_mutex`` and +``ob_gc_bits`` are each one byte, and ``ob_ref_local`` is four bytes. The +other fields, ``ob_tid``, ``ob_ref_shared``, and ``ob_type``, are all +pointer-sized (that is, eight bytes on a 64-bit platform). + + +The garbage collector also temporarily repurposes the ``ob_tid`` (thread ID) +and ``ob_ref_local`` (local reference count) fields for other purposes during +collections. + + +C APIs +------ + +Specific APIs are offered to allocate, deallocate, initialize, track, and untrack +objects with GC support. These APIs can be found in the +[Garbage Collector C API documentation](https://docs.python.org/3/c-api/gcsupport.html). + +Apart from this object structure, the type object for objects supporting garbage +collection must include the ``Py_TPFLAGS_HAVE_GC`` in its ``tp_flags`` slot and +provide an implementation of the ``tp_traverse`` handler. Unless it can be proven +that the objects cannot form reference cycles with only objects of its type or unless +the type is immutable, a ``tp_clear`` implementation must also be provided. + + +Identifying reference cycles +============================ + +The algorithm that CPython uses to detect those reference cycles is +implemented in the ``gc`` module. The garbage collector **only focuses** +on cleaning container objects (that is, objects that can contain a reference +to one or more objects). These can be arrays, dictionaries, lists, custom +class instances, classes in extension modules, etc. One could think that +cycles are uncommon but the truth is that many internal references needed by +the interpreter create cycles everywhere. Some notable examples: + +- Exceptions contain traceback objects that contain a list of frames that + contain the exception itself. +- Module-level functions reference the module's dict (which is needed to resolve globals), + which in turn contains entries for the module-level functions. +- Instances have references to their class which itself references its module, and the module + contains references to everything that is inside (and maybe other modules) + and this can lead back to the original instance. +- When representing data structures like graphs, it is very typical for them to + have internal links to themselves. + +To correctly dispose of these objects once they become unreachable, they need +to be identified first. To understand how the algorithm works, let’s take +the case of a circular linked list which has one link referenced by a +variable ``A``, and one self-referencing object which is completely +unreachable: + +```pycon + >>> import gc + + >>> class Link: + ... def __init__(self, next_link=None): + ... self.next_link = next_link + + >>> link_3 = Link() + >>> link_2 = Link(link_3) + >>> link_1 = Link(link_2) + >>> link_3.next_link = link_1 + >>> A = link_1 + >>> del link_1, link_2, link_3 + + >>> link_4 = Link() + >>> link_4.next_link = link_4 + >>> del link_4 + + # Collect the unreachable Link object (and its .__dict__ dict). + >>> gc.collect() + 2 +``` + +The GC starts with a set of candidate objects it wants to scan. In the +default build, these "objects to scan" might be all container objects or a +smaller subset (or "generation"). In the free-threaded build, the collector +always scans all container objects. + +The objective is to identify all the unreachable objects. The collector does +this by identifying reachable objects; the remaining objects must be +unreachable. The first step is to identify all of the "to scan" objects that +are **directly** reachable from outside the set of candidate objects. These +objects have a refcount larger than the number of incoming references from +within the candidate set. + +Every object that supports garbage collection will have an extra reference +count field initialized to the reference count (``gc_ref`` in the figures) +of that object when the algorithm starts. This is because the algorithm needs +to modify the reference count to do the computations and in this way the +interpreter will not modify the real reference count field. + +![gc-image1](images/python-cyclic-gc-1-new-page.png) + +The GC then iterates over all containers in the first list and decrements by one the +`gc_ref` field of any other object that container is referencing. Doing +this makes use of the ``tp_traverse`` slot in the container class (implemented +using the C API or inherited by a superclass) to know what objects are referenced by +each container. After all the objects have been scanned, only the objects that have +references from outside the “objects to scan” list will have ``gc_ref > 0``. + +![gc-image2](images/python-cyclic-gc-2-new-page.png) + +Notice that having ``gc_ref == 0`` does not imply that the object is unreachable. +This is because another object that is reachable from the outside (``gc_ref > 0``) +can still have references to it. For instance, the ``link_2`` object in our example +ended having ``gc_ref == 0`` but is referenced still by the ``link_1`` object that +is reachable from the outside. To obtain the set of objects that are really +unreachable, the garbage collector re-scans the container objects using the +``tp_traverse`` slot; this time with a different traverse function that marks objects with +``gc_ref == 0`` as "tentatively unreachable" and then moves them to the +tentatively unreachable list. The following image depicts the state of the lists in a +moment when the GC processed the ``link_3`` and ``link_4`` objects but has not +processed ``link_1`` and ``link_2`` yet. + +![gc-image3](images/python-cyclic-gc-3-new-page.png) + +Then the GC scans the next ``link_1`` object. Because it has ``gc_ref == 1``, +the gc does not do anything special because it knows it has to be reachable (and is +already in what will become the reachable list): + +![gc-image4](images/python-cyclic-gc-4-new-page.png) + +When the GC encounters an object which is reachable (``gc_ref > 0``), it traverses +its references using the ``tp_traverse`` slot to find all the objects that are +reachable from it, moving them to the end of the list of reachable objects (where +they started originally) and setting its ``gc_ref`` field to 1. This is what happens +to ``link_2`` and ``link_3`` below as they are reachable from ``link_1``. From the +state in the previous image and after examining the objects referred to by ``link_1`` +the GC knows that ``link_3`` is reachable after all, so it is moved back to the +original list and its ``gc_ref`` field is set to 1 so that if the GC visits it again, +it will know that it's reachable. To avoid visiting an object twice, the GC marks all +objects that have already been visited once (by unsetting the ``PREV_MASK_COLLECTING`` +flag) so that if an object that has already been processed is referenced by some other +object, the GC does not process it twice. + +![gc-image5](images/python-cyclic-gc-5-new-page.png) + +Notice that an object that was marked as "tentatively unreachable" and was later +moved back to the reachable list will be visited again by the garbage collector +as now all the references that that object has need to be processed as well. This +process is really a breadth first search over the object graph. Once all the objects +are scanned, the GC knows that all container objects in the tentatively unreachable +list are really unreachable and can thus be garbage collected. + +Pragmatically, it's important to note that no recursion is required by any of this, +and neither does it in any other way require additional memory proportional to the +number of objects, number of pointers, or the lengths of pointer chains. Apart from +``O(1)`` storage for internal C needs, the objects themselves contain all the storage +the GC algorithms require. + +Why moving unreachable objects is better +---------------------------------------- + +It sounds logical to move the unreachable objects under the premise that most objects +are usually reachable, until you think about it: the reason it pays isn't actually +obvious. + +Suppose we create objects A, B, C in that order. They appear in the young generation +in the same order. If B points to A, and C to B, and C is reachable from outside, +then the adjusted refcounts after the first step of the algorithm runs will be 0, 0, +and 1 respectively because the only reachable object from the outside is C. + +When the next step of the algorithm finds A, A is moved to the unreachable list. The +same for B when it's first encountered. Then C is traversed, B is moved *back* to +the reachable list. B is eventually traversed, and then A is moved back to the reachable +list. + +So instead of not moving at all, the reachable objects B and A are each moved twice. +Why is this a win? A straightforward algorithm to move the reachable objects instead +would move A, B, and C once each. The key is that this dance leaves the objects in +order C, B, A - it's reversed from the original order. On all *subsequent* scans, +none of them will move. Since most objects aren't in cycles, this can save an +unbounded number of moves across an unbounded number of later collections. The only +time the cost can be higher is the first time the chain is scanned. + +Destroying unreachable objects +============================== + +Once the GC knows the list of unreachable objects, a very delicate process starts +with the objective of completely destroying these objects. Roughly, the process +follows these steps in order: + +1. Handle and clear weak references (if any). Weak references to unreachable objects + are set to ``None``. If the weak reference has an associated callback, the callback + is enqueued to be called once the clearing of weak references is finished. We only + invoke callbacks for weak references that are themselves reachable. If both the weak + reference and the pointed-to object are unreachable we do not execute the callback. + This is partly for historical reasons: the callback could resurrect an unreachable + object and support for weak references predates support for object resurrection. + Ignoring the weak reference's callback is fine because both the object and the weakref + are going away, so it's legitimate to say the weak reference is going away first. +2. If an object has legacy finalizers (``tp_del`` slot) move it to the + ``gc.garbage`` list. +3. Call the finalizers (``tp_finalize`` slot) and mark the objects as already + finalized to avoid calling finalizers twice if the objects are resurrected or + if other finalizers have removed the object first. +4. Deal with resurrected objects. If some objects have been resurrected, the GC + finds the new subset of objects that are still unreachable by running the cycle + detection algorithm again and continues with them. +5. Call the ``tp_clear`` slot of every object so all internal links are broken and + the reference counts fall to 0, triggering the destruction of all unreachable + objects. + +Optimization: generations +========================= + +In order to limit the time each garbage collection takes, the GC +implementation for the default build uses a popular optimization: +generations. The main idea behind this concept is the assumption that most +objects have a very short lifespan and can thus be collected soon after their +creation. This has proven to be very close to the reality of many Python +programs as many temporary objects are created and destroyed very quickly. + +To take advantage of this fact, all container objects are segregated into +three spaces/generations. Every new +object starts in the first generation (generation 0). The previous algorithm is +executed only over the objects of a particular generation and if an object +survives a collection of its generation it will be moved to the next one +(generation 1), where it will be surveyed for collection less often. If +the same object survives another GC round in this new generation (generation 1) +it will be moved to the last generation (generation 2) where it will be +surveyed the least often. + +The GC implementation for the free-threaded build does not use multiple +generations. Every collection operates on the entire heap. + +In order to decide when to run, the collector keeps track of the number of object +allocations and deallocations since the last collection. When the number of +allocations minus the number of deallocations exceeds ``threshold_0``, +collection starts. Initially only generation 0 is examined. If generation 0 has +been examined more than ``threshold_1`` times since generation 1 has been +examined, then generation 1 is examined as well. With generation 2, +things are a bit more complicated; see +[Collecting the oldest generation](#Collecting-the-oldest-generation) for +more information. These thresholds can be examined using the +[`gc.get_threshold()`](https://docs.python.org/3/library/gc.html#gc.get_threshold) +function: + +```pycon + >>> import gc + >>> gc.get_threshold() + (700, 10, 10) +``` + +The content of these generations can be examined using the +``gc.get_objects(generation=NUM)`` function and collections can be triggered +specifically in a generation by calling ``gc.collect(generation=NUM)``. + +```pycon + >>> import gc + >>> class MyObj: + ... pass + ... + + # Move everything to the last generation so it's easier to inspect + # the younger generations. + + >>> gc.collect() + 0 + + # Create a reference cycle. + + >>> x = MyObj() + >>> x.self = x + + # Initially the object is in the youngest generation. + + >>> gc.get_objects(generation=0) + [..., <__main__.MyObj object at 0x7fbcc12a3400>, ...] + + # After a collection of the youngest generation the object + # moves to the next generation. + + >>> gc.collect(generation=0) + 0 + >>> gc.get_objects(generation=0) + [] + >>> gc.get_objects(generation=1) + [..., <__main__.MyObj object at 0x7fbcc12a3400>, ...] +``` + +Collecting the oldest generation +-------------------------------- + +In addition to the various configurable thresholds, the GC only triggers a full +collection of the oldest generation if the ratio ``long_lived_pending / long_lived_total`` +is above a given value (hardwired to 25%). The reason is that, while "non-full" +collections (that is, collections of the young and middle generations) will always +examine roughly the same number of objects (determined by the aforementioned +thresholds) the cost of a full collection is proportional to the total +number of long-lived objects, which is virtually unbounded. Indeed, it has +been remarked that doing a full collection every of object +creations entails a dramatic performance degradation in workloads which consist +of creating and storing lots of long-lived objects (for example, building a large list +of GC-tracked objects would show quadratic performance, instead of linear as +expected). Using the above ratio, instead, yields amortized linear performance +in the total number of objects (the effect of which can be summarized thusly: +"each full garbage collection is more and more costly as the number of objects +grows, but we do fewer and fewer of them"). + +Optimization: reusing fields to save memory +=========================================== + +In order to save memory, the two linked list pointers in every object with GC +support are reused for several purposes. This is a common optimization known +as "fat pointers" or "tagged pointers": pointers that carry additional data, +"folded" into the pointer, meaning stored inline in the data representing the +address, taking advantage of certain properties of memory addressing. This is +possible as most architectures align certain types of data +to the size of the data, often a word or multiple thereof. This discrepancy +leaves a few of the least significant bits of the pointer unused, which can be +used for tags or to keep other information – most often as a bit field (each +bit a separate tag) – as long as code that uses the pointer masks out these +bits before accessing memory. For example, on a 32-bit architecture (for both +addresses and word size), a word is 32 bits = 4 bytes, so word-aligned +addresses are always a multiple of 4, hence end in ``00``, leaving the last 2 bits +available; while on a 64-bit architecture, a word is 64 bits = 8 bytes, so +word-aligned addresses end in ``000``, leaving the last 3 bits available. + +The CPython GC makes use of two fat pointers that correspond to the extra fields +of ``PyGC_Head`` discussed in the `Memory layout and object structure`_ section: + +> [!WARNING] +> Because the presence of extra information, "tagged" or "fat" pointers cannot be +> dereferenced directly and the extra information must be stripped off before +> obtaining the real memory address. Special care needs to be taken with +> functions that directly manipulate the linked lists, as these functions +> normally assume the pointers inside the lists are in a consistent state. + + +- The ``_gc_prev`` field is normally used as the "previous" pointer to maintain the + doubly linked list but its lowest two bits are used to keep the flags + ``PREV_MASK_COLLECTING`` and ``_PyGC_PREV_MASK_FINALIZED``. Between collections, + the only flag that can be present is ``_PyGC_PREV_MASK_FINALIZED`` that indicates + if an object has been already finalized. During collections ``_gc_prev`` is + temporarily used for storing a copy of the reference count (``gc_ref``), in + addition to two flags, and the GC linked list becomes a singly linked list until + ``_gc_prev`` is restored. + +- The ``_gc_next`` field is used as the "next" pointer to maintain the doubly linked + list but during collection its lowest bit is used to keep the + ``NEXT_MASK_UNREACHABLE`` flag that indicates if an object is tentatively + unreachable during the cycle detection algorithm. This is a drawback to using only + doubly linked lists to implement partitions: while most needed operations are + constant-time, there is no efficient way to determine which partition an object is + currently in. Instead, when that's needed, ad hoc tricks (like the + ``NEXT_MASK_UNREACHABLE`` flag) are employed. + +Optimization: delay tracking containers +======================================= + +Certain types of containers cannot participate in a reference cycle, and so do +not need to be tracked by the garbage collector. Untracking these objects +reduces the cost of garbage collection. However, determining which objects may +be untracked is not free, and the costs must be weighed against the benefits +for garbage collection. There are two possible strategies for when to untrack +a container: + +1. When the container is created. +2. When the container is examined by the garbage collector. + +As a general rule, instances of atomic types aren't tracked and instances of +non-atomic types (containers, user-defined objects...) are. However, some +type-specific optimizations can be present in order to suppress the garbage +collector footprint of simple instances. Some examples of native types that +benefit from delayed tracking: + +- Tuples containing only immutable objects (integers, strings etc, + and recursively, tuples of immutable objects) do not need to be tracked. The + interpreter creates a large number of tuples, many of which will not survive + until garbage collection. It is therefore not worthwhile to untrack eligible + tuples at creation time. Instead, all tuples except the empty tuple are tracked + when created. During garbage collection it is determined whether any surviving + tuples can be untracked. A tuple can be untracked if all of its contents are + already not tracked. Tuples are examined for untracking in all garbage collection + cycles. It may take more than one cycle to untrack a tuple. + +- Dictionaries containing only immutable objects also do not need to be tracked. + Dictionaries are untracked when created. If a tracked item is inserted into a + dictionary (either as a key or value), the dictionary becomes tracked. During a + full garbage collection (all generations), the collector will untrack any dictionaries + whose contents are not tracked. + +The garbage collector module provides the Python function ``is_tracked(obj)``, which returns +the current tracking status of the object. Subsequent garbage collections may change the +tracking status of the object. + +```pycon + >>> gc.is_tracked(0) + False + >>> gc.is_tracked("a") + False + >>> gc.is_tracked([]) + True + >>> gc.is_tracked({}) + False + >>> gc.is_tracked({"a": 1}) + False + >>> gc.is_tracked({"a": []}) + True +``` + +Differences between GC implementations +====================================== + +This section summarizes the differences between the GC implementation in the +default build and the implementation in the free-threaded build. + +The default build implementation makes extensive use of the ``PyGC_Head`` data +structure, while the free-threaded build implementation does not use that +data structure. + +- The default build implementation stores all tracked objects in a doubly + linked list using ``PyGC_Head``. The free-threaded build implementation + instead relies on the embedded mimalloc memory allocator to scan the heap + for tracked objects. +- The default build implementation uses ``PyGC_Head`` for the unreachable + object list. The free-threaded build implementation repurposes the + ``ob_tid`` field to store a unreachable objects linked list. +- The default build implementation stores flags in the ``_gc_prev`` field of + ``PyGC_Head``. The free-threaded build implementation stores these flags + in ``ob_gc_bits``. + + +The default build implementation relies on the +[global interpreter lock](https://docs.python.org/3/glossary.html#term-global-interpreter-lock) +for thread safety. The free-threaded build implementation has two "stop the +world" pauses, in which all other executing threads are temporarily paused so +that the GC can safely access reference counts and object attributes. + +The default build implementation is a generational collector. The +free-threaded build is non-generational; each collection scans the entire +heap. + +- Keeping track of object generations is simple and inexpensive in the default + build. The free-threaded build relies on mimalloc for finding tracked + objects; identifying "young" objects without scanning the entire heap would + be more difficult. + + +> [!NOTE] +> **Document history** +> +> Pablo Galindo Salgado - Original author +> +> Irit Katriel - Convert to Markdown diff --git a/InternalDocs/images/python-cyclic-gc-1-new-page.png b/InternalDocs/images/python-cyclic-gc-1-new-page.png new file mode 100644 index 0000000000000000000000000000000000000000..2ddac50f4b5575888d8d19cd9b6f2e3863132776 GIT binary patch literal 4415 zcmZ`-cUV)~vfn9?Pz*(Sks?SF4DHY{v_R-ZsUjdq5iyj2R83TRQ>ubMBya=-k*@S2 z3Mdj0L~1}mA_z$D@Zve|-FwgdzW2whS$p=LHM3{dH*0Xq9R2J5_h(<=`5s@q#(=Bn-Jf z-S2tM!qOTyM=GQ_F{MtvGeD=0kzPv=j*Bz!5MIOG|JTF)DzhYW(esjJ z8f05Ovk=8t?*$Q51LL3qw;pW~)XD##-`3zt|49Z~^5l;x(40MO)Z%!tAv7mko!A0n zmG#6YS#c$AIH392w1k_$~Q!)rSP0RDWQ!sH=4h`Nb>*26BS5k7PBYnJF~ zbT4h4YeC+M?a%Iny|4wBj~)hRJUqkjTZ&4xuGN!2BhK(*%O4{t+91coptKo@;$BTd z8tby(2ExM#R#22;W_ASN3no4_?B;Et>+u|1uR`eH#BUM!{+2a9O!2SDGaL z+x{Le<&h{En)7esAe?j22{GF+P z8UCG4tAF_PcUmjyag17G>?K%Ygo#Mdu;kiXRH~!owO1KUka-VjL$^T~iu-|^u@FXV z*t)x+@Ra1!*_1hMv}rjUt#m~jP)}G?nl@feRtmFl^L;= z$w$i9gtOE2uM7y16@5beh=!|xal#WO^1>CK3c3wRQPDQmnCt>Q7sNH$b#l;) zb)+(&o){N*nz=H6uIKq_vQ?(XgJ1~pri#cb=ka%tsO0yaD6c9ES|SKSB*m4xoHb>m z$_9BqP2~1b(B(d}1jpSZTH$@p%HrR&7YP!UPvAn@1#CgiS#>w|7F~|cuC?9XU99#x z$Ghr;dli0#a>^~k9QBm<6${xGj~lH05#tcZbhGmK?PS*qcRgf-H#pL+UL?|%V(#{G z2t*ZTug-}8@;so^DG_%kkv4?~h09NmYa(`53@k#7>4#RGi|03&*kkMp#~^v#%ugi8 zVN|f|WyD79+FkpDyJhG7UTz1r=5@BeD6gcRA-165an3iQxUae=c-m{u@q88<9TY1d zVULl6)!4o>M8jSY;fHcTREPMUfU3z8isR+49X@S`b$x;2*nBH&sw~D3yFabBtx!&T z+E|`(IYttvd@BxUOacm%qfsZRSw2l^cFZ~a`1)#@SN?VQEc|w^)J*8LlYu5Lv^VlP zD=zu+Q#4av36X_j0`^}w(C@vtH|w!vEkl%u@A=ABGz3vElJN@3$n|>t$FF`S#v{ia z(KvSA2ym1#Gm*+KapVLNWJVh6T%OAQw)M!2x74+-Nzuug{(c4)v7ziD;r_=Ur9o2p zZ93zl?Pq4T;F-FG znp{`A*bDVz|?V`}Lei!y3+2TSH}ph&!A zqox-2Aik@nulVg@6stEGr=~R$?}brw=3a8WNqbPFO#abgyQu-3IQMDXl5LMrx2?>w z)?K8J3+ExnBJ%-ipr2LO2uOYI@cit($04`Xa2K(v*0-i}Pt7t3V{@BE7$I|S@&<2& zd3&{qlKEddrbr;%za?J$9saHoq;8Wr-4UulLT9nhTbY z9SN)Jx0p)gW9q~zR1dP;uG321FcX!EgNsl#2wYVGTmi;w*G;8S@_jPOULQ%ByN90T zt9Rem1pENF0O;*+?-P1v^bW21RVGh8NssFpsG8X7E>Aj{EEzI%ZF}l-r}W7r-L!yw z%9fH7%f{C(EZ-YX;q zyHp$UHq+e_sIbgZM)L_y^y8AW%r(cWs&mP= zVDI9!p>wm5%dMV>s5WdZ`n@cLWuBbf>QJh4_KdtaFUl7?q=9jqn8{tN$VwNCSn8Ln zltHvz0N@b_Ua7XLt zzuuy7V$k}qZN^HneP)UTI&xm_ht zoVnB^x#18saQk(B+R-HN+#ek^h&34t(t)<>88N$x}nsZE#MGvQXeL9`VO$Vv~|fgxZUxG(+O(b+i^Jc6irFph_OD& zd5tV7qf*%xa&Q@f(S5)cdK_qf5sA_H*fm-&^kEp}TZ~mE+U?bY8x46b;`4`*QnFEw zgo?@Lqp?+|#0Ke!Pi@Ot(`BCypCHD?uhaYHVYUA6^WbU$H4*;p$GIae8b=*K&ArG* zzuLN{vNw|*EfNDmmePc}z6MdUOq3I7@^q51Pg9KnWuP|fuI`fmx^2fv>X|_XHTKLlue~_{KBZ$4PXp$#_#s+x*UPa zwMM%4Ar|Wf!v}}16|3E($eVS;^&qlVXGwajZC^~edaw{D+i3G!azPsw71~7cTNguU-QSiIGhDAhI#41P8wN=Z;8Jr4N3QG@%5e zw&Ao_myO|hIm%$EP}6Zdnq#vbKKmlFw;r|_9Vx^~+XpqF^JuLQ&{(2<$>%zO6NMT{P!G%_z@~aqz z3x2?YHLXaGd`21UD+LSO~EC;)9T#smRQaXX?+8EXbl_n_`chHawo* zh`N#(Wmj6bLNPy8zn9OV?$`Otm}3mGgnKGwXcTtr>t8!<{yDh&!1A~;V(01A0b6ZS z%8L5%dPPL@JPl8E6>p&odBe@s0;PdMm+Q7y>9pQ#l!xNmCyxQ$zj_0}aj;22_vQ!)t#z_Gs7bqTm{2b&`T*6Y zNj@A55xJ+s;$|r~0#kRv9SD;v;Tw^yLI!$7$|sdlZE_QTej6*58y4Ocn~ZSoGh93r zxI~>_MtOq!+0=_w1qQU8o=a`$(Hi2Z%9yCq*MfwjS@FiVqN7E$2FY>(vb2pl79&H$ zRp^$04WfI8rg1q-ZEN*_5aE^^G0%quH=3@ggc+|Z|0+&)VWzSdcZrU^T+!6V+<@Y4 ziRaf)y;OshyGpX>?w`s_cZ6uQ+R7Tf9P4S>FMY;~rXy}EUzBnDu7vDU$vR2C;%eK literal 0 HcmV?d00001 diff --git a/InternalDocs/images/python-cyclic-gc-2-new-page.png b/InternalDocs/images/python-cyclic-gc-2-new-page.png new file mode 100644 index 0000000000000000000000000000000000000000..159aeeb05024a3247381e1c3aec2b986035b5e44 GIT binary patch literal 4337 zcmai2c|26z|GzV1UlU^N*_SX`LzplkQPwOGiO9~_w~?}rl$3;wY$aRPjEpP|m2?rJ zmR-!@|}8q%k%v4d;R`+pL1U4-uLI6d*1h+^LgL&^H!%h*@V~t0N^w?GqC{x zC`fM)v(O8`)bGhCG@A?yWf?=9IGv3e#VmmljFh{G#RiPKn)u5jej&5;;o z8)=E~t&T2Aeq=+&9f}n5;8Aw~cqzZYcC67;rB*^zaJuDiizk4>< zkypd{HI5{l{iu!+E4ww&`yu@rH~L6v^a0C;#QkF4Q8Ax;&#ka-P(%4D9neZqraj+Q zW0Mc($GG5_SwfG4WBmty56JNFpLpyhNPcn$X?uy;M@}@U!ax)}3Z!MI`Sv4OtRaBY zOKGTOycVtl^4GCKM=8d^Gzh@KG{)<**a#`QVs!s_I|OBVLg&h$_bYccI@^USK;QI@ zd{wgimy)Z7W=4co$(u;qI_4%JwEkb%D*3{KgJxcywn!H<+|zNG?mKzNDcLe!&^$%3 zbh=Gh8vM8f$r==tNaG%SfCeTlAm_1i;K!>l-8AMisw(HkL@@@pmpSR<2!nOo1qx70 z+&{qM^LrWpi3}vK&;R20O0#ItLBWci>v~K$1fHzCy<>yikak+MfMlIAIU;)cV0?G3 zGt+ILk?>Rwl2sis&_T?*$^N%$e`5WP_upD2$%1@;@LN!8 zhKD=e$kgd(u-nvjv3b;Ra-v<;yJVMx>~rI>Lw_%G@U(d2njA8A!S?RSB!#}b^UJRd zf3|QVVi~X-D9gN{&PI`YNZWBW@4cPOY(y*vc0X#E$`9_Wh1x+aUHzim88=Qp&}Sl2 zsHhN5G=1r+%8V%x+}yN#n$&;ILl*qE)_-*8CC@|kpH^L?Gx7j@@#hD*bnzkNP^t5D zrKjPOR`iJ*Ay4PAXxQ_Ut*4u9$6?M@-@vJmd`QUlH1gbR(e^NlHgt+iK;Qd}fq z9}=u|!6ri)!vvINzVwvCZEWfy7Gt4Pk&OLTA3l^+Go!@{!%P(iz`jB$p43%N^n>FA z0p31Wb)V-AtuRsw!*(S&Ng`x_Ql6gTSc&IhlM67i&OF(?C$cV7;%f>r`!>`?>f3`b zE-`KkT+nSNPeVD1J{?tNo-}`|-Vmup+Y2PrbjxRUe|lgn>{p|& zdgvus6?CDdb7E-AtLGC7dU%jaxk}`<9G_<3Bq#X+4=JZXq#$YYTihij><63F6`3xJ z$Tq`YvN%)-K0-hC2ieB5KHLA+_WO=OL*#H1yA+gUvcE|XQvEl7bHk2Wz#AGf$lz~X(ebpht!FA-G%PP zXCf`aP1-Z;K-S5tVb&c{S?^!z?r^DH3@9BTY72*W&Of>rD$?Vqb~mnE7Y)PlTw%bi zYKc7K80#80cq7o(N}R8R#B*s{t`>R>~q{WFM91BD}6(_0CSe9 zQY!U@qJi_r==;?s#;T%t05$IUI=AQ6aY}5Nh}UrV=LFXuK^yXp+}rmnx81a-S7GP^ zgqDL&sL4v)-3HgiExZE9?8+_wlW>7ZSBVJAxI4AK6qI({mfPW2I{^UCn0ytq6HI#AuHG>9V2f&L`;r@dQn?sE zpXpv^A)kEDU{Gp7q)<4^IxS7F0U_gRF`V(P4o>vO$t`@Yc03h4TiEXWv7X6ih_9Tz zNcx2_Z*qZde$IB;;dAb7=3durk>_Yxo#eXRr95HM+4cDuvH3cO}C9oW9>YXC# zlHO`24BX0H2FbT_+41q4NYjWFUs~9}tGafYN`A@wh(-!}Tp$?|dMwaQZp}0&KVGF; z3=%W*?E9dEd9opOC^T^YgH!u}UOn+yO>zQTQ`)Z1ewa4t zH8Qh5D5h^~+n5hSUT9A+#N}N~7)8{##WyVpm5XqHNg}`|#ShO!oArG8xT1_1{Gj@R z^S<`TX3)N)9&H+kNnTw`tYOZ$Avry@O!p@?O64_ie#YxpdJV)(Thh8M9!N{R zV_33YhC8ZART#JVcFW?#DN`Jja#h*%^1CNLVI^cNkU8o(DWTW)oln~w2zkV{G)zU{ zGI^CMUdn`Be*O5wLN7nZuu~+|;@oJ;zH8+=vqE3z9;L1*ExpI9}&va*iwfp#1wAGRCS@cn6sruI%E+egyACipOqeFPO&jPVT)Wk{Our zU-~AT_fmgT~-chP>VgCCMMsy~E3Yp9#qXJ&cnyM4) z{yYkENC_`0h-jr^dW6WBX!boNI_7+{L%2Y_1I<|{(OQpmAjne|-gqqm^6U zN8qP3HeHJOOy?Wr-}?Lr*5R4&a=e5Kb*4si>|Zvf`DdwKUr>Mamy2DX!O(n_59y}a1%DtW@5GEg!`_iv3d{*&|RO_WoiAM3A(Rjn~R#&T3 zed;2t2z3-A%Bqo6%hdERe9PCxn>>9);S?MF+!)1J5zifewCZ~?M9-Cn2TnhJ&>e96 z!l3!x6s#t4UKdu-JG=$MDc!td$mLlqqB~f{g+6n_v8dR7`}xH7t8(Q+1!#OC8@d@` zN8GW$1sfhsidbpt@@fUE)3({}qCz9RGgiOHGNQ{74FU?INh&SYZ$q?>Tvu|<;(?4EBJIyTKq;GWM4jJ}#1Lo-yg*&Xha^Jsk} zfsRiqQ4;lF4=ibuB)zj+%?0wke>0yp1?z@-=j{YB|EVMiN3Y*Bg^;*{#~M zkQBDPRjUD;q{3O09OCPSf&|_LvQQ!!V;Y8a=AAXYx&-IKfH|-mX}qZyd0^_T7)r7z8UaBAyYKJ-@ss0Wt`kSe~+jlY17oB%%{>_Vx!- z2O~jt-yO#?+W6k?vUw~6=#1mU6U`?)Wki`RiF-+pI=u+?qk&FJ-kT+#)vlvO?V^e}-zAd-rILnC+_S|gFq8e+tJ&n|HZU>X)^NSG6c*b}kTvC~@ zxOxV%6AHHH(_PfR%<8}W?B?wXx`o+gAnO&SGd#rK@4Wc2$xPgXD(E>AAhq$V=vbBl z!$W6vO@sO?e@P99c~-}|P=Yt9r)(WWo1%pV$T7c literal 0 HcmV?d00001 diff --git a/InternalDocs/images/python-cyclic-gc-3-new-page.png b/InternalDocs/images/python-cyclic-gc-3-new-page.png new file mode 100644 index 0000000000000000000000000000000000000000..29fab0498e5b106f813e08aa2799602269cd74d1 GIT binary patch literal 4876 zcma)AcT`i$w?1hgARUw{9f{J5KxmO50qIqw$(14qh(JsbDTxXJq)U?;L_n&b6afQr z0YPa&q)JgiiPEkRq)6li@9)05)_d!%_5L{Lo3-cLduH~WS!ebaZ*7TYXBA=v0D#@p z#K;x^z!XON?GeTcF!q0ZnE@=I1^lB3#Xj=55SyDZAlKN{sm-} zJ!`}HQU+LYVz}2x);DtOQ-JVC`l|Pn?_Z53u?fOgU7U=cA}lNEgk!9c(^U93WCfNv zx5BQ@1(V>Y2c^0vdAZ%QVT=KDCOE<=jBo@;lEdFo`Ft5eAy5C)cWR6mQXmbdR6c>I z-?t+no<$G6!9ftS7>WN=Nm21(6Aj*m6v zG2hDuqamMe-%5tDQD8b8dg`&H6ie9asgw+x+5!_BlEW}_gTQl;hm;}s%kGopIO?=# zF@vj5VuK~bsY{3vhSbY47T^5G_%RVAx!d<@5$0>fkJNDgJ$jn1HS;Ici|%-LlOM*v zEyFvn|JUOFULzy9Bz|8;a*bH>u~!>IB84v;#f9V zNQ%3rHTl<*-On9WbJe)WsuNK4gh_|ykptC^{lKxjz{d#lN+Lk+sa14+%!tlN>;K5^ z;nw{-kAJpx{wVd-3!z6;5{PvYSHck}5l`TR43srigzkDc_A5_*TL^<{I$?Oy`Zcr&|kzYj#A( zNYIw8U}&K&?%gcy@XecV)vS0>1ox_js6DRptTcsb*YtCYN%1pw{{I8*!c-w9pD#8H zZU>Z-R?SFdv?z&q7R3SEV*}~cgJ(h1l|Fvh9te3{#}ZyTGq=8WUF4$}kqR zh=Ll1VI$~A(QqJyoiUcrzmk6tg{p#$F+T=e;)eff_YqI;$IA7vaTK85e-adGoA);q^dHQ|Y~0^jk8QZk|`$x&Eo zg;gSU-CnNQsgP?f5(*4XyO^(+IBw&Xcy7A_MJ7DIdKyk?0|t-Q%|5NSb#zpu(|q_f?BK_{umQ}r1r-xiD$by9ao*7+s?1p9L5{XI8wJk zOP(zk%r;AGf==55*XTzT$mwePpJkTOQ{)@5P5DQ5y|{`->u+f+*v5}!q3h&TMwvR) z#`HxMv#+ExIQ+g6XmjBv53%vig52PWZr^_5pYpnQpZ2V$+m6n zVCrIA-#H9c9(XxjfJ(N?m5|j3Q5;kKz};tlv_Y8IiUW3%?&;xw`Qm+gzfr9qDh>CaRVzuCj?JGXjg)+ zQE?gCedj;DHk)Rr-kD7^PHS7ypFq%v9LG>1A_k`xwbVTaGqyDF#*gqQg6tG0wWpT+ zel1cbrUHwVf8&v)#0lA>Br37dME9IqL>|Ls{4`P9%O7~vQG}={QV$pR)I;PtJBA~j zpZW!vC-MAxE3_854r-8*yxQFQ2&ps*&GfRa29wn&ycWo(X9?GIQKP{5&oGgnJ@^y4 zfKe4vlPQ-aqLp6E@S^4Sd=w6doG@+5q*qq3P&6lNZdHdEAzfcWL~a#^%`;I82YoME zX3xmYT)qM4xwLv6F}vjzGAK86+o;QH(wGcze#ZR*&VYTW=WT*Wm z*`k=Y-13EYW$S>D{f(Xbmts4jb{EmPzg#2eKRaBe^qBW}+bptH$M1^7Q;9|67%twV z5o>ni4J10fubgm7uK300>Hwy2a?wxq5<$w>RFv8r0tG37sK%6b@p`|r?*^0?xT}w* z;;kl4%3QyW%{(_ifuaI^a7BXQai3Hu7&+#ha}&*Jomd6uAW0LzX0b@ag32gf+Aam`T{eZkZZ zK_R7gh{t&V>E8K0Eb__s&?27(*UwH{{b4M=9ne2RtrqhA*A#%ICCvzqEX~o~lb0N& z+KK?Wbn9eiS9A^YsU9PixDNma6xU-ec^`^z&VVz1^x1-{li%RKU4eP8-H z?~rbODeZoA-Qx3U<%mA}cCL{9eyf83uM6Xofn2BymRjV*ueN>PyL$aatIe}?pK4?o zzf4=e*-43IAK!@}r#^AioBgn)I2a~GT4J7-w1a)(;hs<)jFtydQ5LtQH}m|@6USm( zILul(Xelj*Gf$FM$~3Spx_q|AD)tuudikP;sL_K_$x+ueY(gBj%8;8PTrBJOr<*at z?ao`6dqua@OZ@6zZCoq^gth$5}!nij!AJ@`(Hq0S#r z$Ic#l9XD9Vm}D7hpWdi_ZEAr}*5XqOgZH*agrrwoPWOhjs!-+81RK(eso{s4_GS;#ngx?sTjvB1d}_M zpnJf?>gBqgi&55BzMTCQg&ba9X?C;}LVGB(_DAhT`}myjEa>M`**rqpuob*2G}YT` zFRr7I62VF7?@j6x$vQy$_&fj-LvgZY(m2M*JG@{9mVjMx4PznqWshGgr&~jnDNv6``*I|Iul@n~?MxN9aV^=i(&%0_wtzmuYk@qT`;WTj;JF}j&zcd? zy{d`QNi~<9VW)Y;N>SqY=0F90Je5FRPopTok}nzAC8qWW1kJ5FrqtXsP8_5l&wFd@ zg)&nN`3ca)w3PTjbHNxxi_n_w&5^4B?O2=PPO9Q4udl?Bz&2nblw?PO4x8}VvKoEn z3h{dB!p9mU4ifJl2tWWyo%T)y>+`SgN3v-HPAMjWU)>6HdI$Ca(o+BOZaeW$0>Lod zikCVGA{ceIS|Wq$PctDWIeq!WlbUN$9n3-jRez=TF7kYlb9_X~cOa9FJM=`Q#+KZ3J$p-~2#B_2$*`y`1PX&zkmIi}m3f*%3A`Fr@6Ot?Hy51)D z#)zl_X72l|$P}g=6A$pW?|mQgZF2~Thm*~N7tH$29jAK5WvS|GYMrs})ctM?z8fnO zYuRs;e{sAtBmH|Qy$X>Y#7^@7c_o-Y@iq5RD_O}4PJQJWlIIf3fSBy=gI<8VwXruG zfc``^QO@~5g~|&gN5y+DH9EvOoqh|YT3{8((RK$nd>69SlA5)e(gl00a|yLs9Go$Q z8y}a@S+4q4{8_GbUU^FIExhiWw7Wu5BkS=wyKqXNGj>fo5niY}0zxOWP#64j*k@Z% zd6UO^-g&HsA-J!AmoLG z>sl(Gvt8hF!0Y?MW}B&y`<92S&obG&NeIW8G(aJXOjezuXW`(E(Olo_b(d zF$;pZjt-W#q&!IADK;bCeHX~!nMWD64y0`uVwCzK=P|OmuvqxgiwfkE2sshViRpSJxM!2BN33vB1Ghrxmfy7kFsq{_ZVu*~ z*}I?DtL6eOtPrak+Ds=_=W_c0Xc~vltxR?_e*J*X`4MEi?8;$^RIH-6O9VWOyD(8B zK@4DSoq99iE;d#vXn|v8e>TPSEpQ~r=vNz9FZJS(DV%Z(yc*bZ@lB*!#}&!F?`{P5 z%V=2m6_R(m&`s*c?X)<#XgiXDFbvjeKpC?vm30ZQ_h~QbUubCoLb^=80+QJ#Mt5DJsz~qyZn7m+C#ZWkHH&RY`Hg2ikKXapFy2~KYn6zhDBSgSmsi22X& zG1HeqSiPTROyGM{Y`T2Z6N^Ke1@``O^)hY5y;)ZDd{-x!bU|BW%lY7dHT;chvQpWX zWy(2vEi#&gG!j&9&8R)azzutD|JJKxd548NL%2AEs0;sAvkv~Gg1yMbPsd0lh#$wM zbO>MWjj6s{zcO^*e?E7seEpSJ=SO|gLJy~PN5=$4@U30Ty}znFqaPB=e&S;#)XIwD z=^lbK1_hOXduZ1m6yPup(-*PFjO9{;S{?(mC5IWarm}j=>y)|Xr zBRmxB_gC4T?QcQhw_5X$ymF`tRQxRt90EgZAV2>l*7sDPT7U=so}n|zV9q&Vy#By= zBRoUA833p%tEej|YbvNJ+p8)gG?Wo4XHF|CBb1ei4-yVltA8B=gS`BFZvNj6Ke|;L Q7!H8xIZLBjLyz141yIB0(EtDd literal 0 HcmV?d00001 diff --git a/InternalDocs/images/python-cyclic-gc-4-new-page.png b/InternalDocs/images/python-cyclic-gc-4-new-page.png new file mode 100644 index 0000000000000000000000000000000000000000..51a2b1065ea64eaf009a96ce6fecd3132424ea0f GIT binary patch literal 4863 zcma)AXIN8Nw?0WE)W8S|NY{Xl^r8ZRK$LC)DN+R-YA{j_C}1ENg@{5B5k^!X6k+HJ z0Y+jdA{`}y6oCY000}}2NNACA<9Fw4KkoC~ALqRL+2^di)_T|8d+oJTF526`MC3#O z0D#%qS~>v$n9gqx3h`fnHRg9eJ_twHdR*tD?LQvt9*h~k0rop*V+rgL)B9ThK=QPm z82#6q+FnvrGKR}ilyOU0y zY+V+9bjatnv44;l36k}t$h^E*-KqRb^RdToT1q``4)%zjQR5@b*weiPHH>@Y2F`{^8;(ZxEGL1ZeRPC@)!%C-*AK#zFskKZ|H-ou3>c?44}EH&+LWMIAxb5(i}$3dChM z?u`r63*AJGT^zoUyxq_U-b}OlaGjeKUYaILw>LQa;PyuE-~eVFB5Cj|pcSV_hC9{R zrZX|;%nd_<=}yW;yfjjo{*e~b&$5dKBMQ>>c#A`4_{#5~Hbhbm1z|;gfsPkE(Cp=_ zk%1sAGMBPQpRPc8s9I+*h1G#i=+wBs56x%t-%kFgum5!NKYc~<1%B}QzjWli$aH>T z=_`abF1^Toj?c~vu<{jUk%78dBc8c~a^hud4DDfEv(XjQ_CW@946;K;FMHEb+L=q}txm9*+-at9*R-lvs|Z zDHpH4%Qk@THrX_p3Q|K`Wiyuc^EP;cUcKBv zYl`&7?`qmp-_L}Sch=%nQNFutI?RL5YVxx*P>D)qbAkjCm>VU(6tzwRaOtku0sHy! z{xKK+KY#%gOV>#JS+b*s*S`1zqw(V9w%vzOi3R&TMd>iSwxY`c@&?e^fknEfqf{;LTXW)tP| zTBuxr1;d+}9+pp~U+>quWkWXE_t46k#U`xF0WJ`-gq{tLgi05AI5#H1N_{_WKPOGv z695vnvpvMKRZtu-_M(EO#nhV>Bwi=Nv~sq(#DvU|YoY^v+Kr~?mI{`z%PvF3x} zV^$V9tY3NK>V`e}l7l6Js(>bHv;C%}6PkVeDC+c#;?esytg)++)_>&HGAkJIgqU&_ zr=$+l{K@F}3b{Yh@@ukWs}5P1$X2{COfY{af$)mG3r?l;$jSzYV zn2x(ZHa|)CcJTiwVQj{-e!D4#k{>kEABM^$L89KGNhuW<@VjL1!Bi7~ou%BB&>>nu zg;88#@?CTe)e+lu3_!GbrQTY-SPf9r;C^@7j)C0ZWOH<*d!0?TMf}61%YkHbUApHV zRfL4mG@svsfhUU7BJtcbyjgr;-En$pOy;RT2{H|Crzj7?t!Q8SV^_cUF#WorB!Ux( zeE0H#5+eBFF9C4!fKt;_Fz%XR#n(2ko8j4z+>OpzYm_pK(xB@*y>+_ioihqSeETWDiZgv{QiSDCMe+=L>a_6u_$7ps2}+1dF<>5_VTb)u6hXuVi{iu^ z@$wKH+Rc3YtADtNa~PD{);cJU&7gvE?bBOzgt-u&AI4bLUtuwg4ueg$$b-475Tw1p zEo)F5hJ1aw2=G>9y-yYLrpai4xo60wB3ORcA@sq!mbn6~P=a=EaLIcidKRIf$=$cf z9?z6!k?r)|yq>9|@~gk-_$CU`y@1>YTkFMHCInK#bB(i=V6GxN8&<2Jcl`ve48UR_ z8yxF*vg7D^lQ|^`WBq@yo(k#WQXX`KYokM;nWqpQzol1LxDrL5Ae*1}@ zqY5$OEo>=k_I%yWnM}GLr?Nj{QfE%{vcp|0~DGD^%qpU zm(TGTya+dzeXL7Eek0p_WT8R*_&WrJchMD-A>YI5V}@iKf$`&w0bTcu!}7=Z3u5PT zoiPH-kU#zA2=Bn%n}>t2 zb{fGm9K1NuZ$a>!#2)rZapaw2j4r6j$>vwzqKB_;(2DXc*iLaBx9{v16g6KPz~i+8CD(>WRG04|)W1HoEBY9_yaJ=C<~f~q4z|GK6d zk48(2LcGS>(ee6Y`9W=riL73?GbCl|a(?~zn&@~{1~CS*0G3HsV9*C!2f}Y8%k{b@ zBqWe6RC@I9obQ}zW49Y2d-d%~3M|cc9F#r$o&~pYL|)#4QWPbp&bumw7;2*%JBxpU zgy?Dmy!nJ{*RH&dG9j|JUklUKYQ_GvCs%x_*<1eFyFSI+3U1k5-Mcm#CP1!I^Y~ch zJ;nNL$*tMjY_x{@Py?j{HO#~JQ5sI*abR-G*i~xC=tdpK{pHQ%Fc;)VXsRr4ZvKD> zy$>%Q!Sp%+it{2D6<+EeRzD_8uZx3W`wZ=c=+Sul7ZFR2`&hlG%p`?EfEFo+RL{K` zTb&GeLs&~R&S5?72+AOHoAZYx8dDW*rhNAESQ)gT(OESQIswZO%T~?2pgO}9FKZn6 zo)KUks!ZEllSXX>Kd*6~?+ZdXloox$9;0U_{+b*78_s;ITq4O-L3J!sb6r%!QuVq1KM1U=m5FS>Wb9}noiWkq~c-baijNv(*p@K z7=5{Gq1vJpJE__C;o$5qvFof0hD$5WS9%Y4SsQU+2Noh#k1H^jjDKgo=|3o?V}t`( zX-$q2Xw0Mfk@8_w{6i39YwsH&3d}+uA7mO~0vNT&v6Qh^Ky8fizv(71LMD{S@OO@jO$IB3FA`v@1IO znTTKbi8B?~sOY9Hg3_fMM^gTzoX81y!*aB(_#A>Z_%ZYsQdSmmR2d(mM-pjQFt`&^ zQ2DaBeW7B+6Z4ZDTlRr%MdhXJa0^m|GJQW>O0(*CumHpq>O{Rf+_<8JWB*JO9}bzOTiQ}Y7t=6%Ev4DBm(@i&h!DtSmhr3Owd;4 z%_iao)5Ov`wK$3&g&Qw|QTeCTs|4c!yU;RTU$}HhJ0|V^UG#2RXsyC?-HjXb1p{`4 z!yR7M06wM89~b1fGrKvY5#!FGjl%F@V`)#|6#W2K%#F&{Oyry9>n%PpM+6zKD(%L|6(d26|LCG|-`)5adrs-6C+dNE&?gAwXDFrvLfw!J?xy+E7}mlV~c zk`%TDKFfCBiga>3^Gy8FCn&3sVDvcN7bnsE>~e~29F{nJQmNstLW$PVPT8!PyBV5D ze(p5<(wRpb&A%XGvl;s{*W<>}960{>W=N+LJ;77G5LUI`ET~^)mP#i2wurLC6f0{L z=+q%&xRR;~a??UgQ5~qm7#cQr0{_V{hbH(7{_#~qYmlI2RdpVzjY3Z59;z@Er zbb9?zjC^#_`k!mz_K&?pPMFNzbeG$m03Bpxe8R9h>okcC#Q0~LS1Qk~4(`Yf#Mn({ z%yqv%>fduEt4{{?+vy5zy}YXP!56)mOC9&tEoXP?*-kZC^07vdI!bm=z7-H@_0DY- zcL=HCB?I*qB>u{XMp8J}d^xFEf)pjzU+ojQ4Z{<(7I*mmvV2+tL*~}ng=WUF?+A>s z0yo6WB;R^l0VQZ;1n;vk6pW+1II(h)@Oj!l&Yrvgd!(t%P(nleLq6UIGn#*#_i_T? zWjf%iv*v7>uBNYj@OgF~uEuuwYBBEJ4-*!rJn_{+6gOO|UJxj5=7EKLFcdDjU6D@D zl3*Sn%YYS60GY0U;`o@?iEmO3@jCS5o%L|;i`xWcfYCkhdN8digsRYY&s|5Wr3JHx zSImFGAc`|DL$IA|5YmL~rZ_7ofy5e9Kr634Taxip*QFOxlxd59fNbzuO z+f#zHRHdas%p)1B2%{38=M)X||NouRD08~fs7FgmDk5z2UoYP`HR`qa3;?wz!G=EmDUuNZx%pi$CZIprNv1WZM zCJrK@$M^$aB^X5e!=r+6k7v5MIKuk}6h;GTad*3@eCGDXuFmH0_iY#aFe4>U%`A{$0@ko)B3wL z`e#ayX0qka>ACrcM#OG}z3vxcpx@0mym2_!vc=gPt&llWuU50>WOKW@(d!`95t-~- zyC8I+rUK`*P2O#)ju=CxP=0qxM|x%Z$lPOR5FjL6g*{N)SNp0ANMvs zUV9RccxLc;?>f@1$~0rl&0%X|UA&dT1h`plJI{|Ce<0u%rM literal 0 HcmV?d00001 diff --git a/InternalDocs/images/python-cyclic-gc-5-new-page.png b/InternalDocs/images/python-cyclic-gc-5-new-page.png new file mode 100644 index 0000000000000000000000000000000000000000..fe67a6896fe4b07c03291ca8b21781fde8785757 GIT binary patch literal 5712 zcmZ{oc|25W{Ksd(U>H&MeTi&kD-6RRYgw`{4T`ZO6oaB+CgB#@ibBH?ne3G1rjSZx zJ@y(GNf>)HBujpC@9*CG+g`sv&iOp&`99z0oaghL=e(ZtzHMh?&dn*p34uVkEiF#l zLm)7aSsr9(o*-sHPcJfsUr3MiX71YkS z46$;&&OfOq;KkV&VAeD?n6jEu#b}*Z-deqM^y;-f+eey7Npi>dh~_sRV&$;Or(ty# ze+x-XsESiz@uTdi-t6t>$w z?tqJflHqqz=fsXi>wp3X)|aM<0RvPBaoE)TL;G*cej%#ZWR?J^yBrn?$}oiwFoo4I zy9ZeRiDl4llWn9qz{hKd0HiE7*%L(=Sj$wrCK|Z07epI#mLCA%?Z=F3hX(xC6KHE1K*+6MMs3EA3G_fVf7p{ zopk@Yk-5&Z$QnEt$r3I1N%5~5!x`=WJ}!~~(=Vv|pTcyOB=FBD|Ic8VvHxrm^6`VB z>*F={09~pb0|!-A_;9C18CZZ;pi_TQx8`}uCsq83$p#z&*?isjtt#SQ%+;dOS2DFwE<5o-+B=AxsXVWaCp= z>uK#VnD~V+CSO=Mbe)xQgOOnL!)eyumioLPa)XO4*Ac+bNI0^4{AAaeT%wP*3ye}- zu%LvEOXTVjINl_9aZWghyCAVQ9Ky+Tg86L7f8;NFnA85JOaBU; z3P&d$GPYng-N!*D8fBhJF*BxE!KQM)Fm`Z?2Rv@YrHYL+$Bs2d)WaA^C{EQ;0PqJ^ zE~qmCol;&$OM%Xa&AH|HYAynoIbQdDM=wRGiIpCOaaFHzy;dTF?JvXqF(rm)X%ciz zgEvW~FY~8$IW&86O?U)tO3w-Tq^i_N2!#o8$t9#;n*$E5wE5IP{+%ZV(AxBoaIc&mHr&Em0-=x)oWFQeB*m-=|q-OvgM*1<0eb6?4 zo|Rro&R?#a!o&L4s&*?Zc0X^bD&|M-^Ohj}i)YTpY4%7q!E^6MNCKLquTHwJ z}eWu5d}US28D?~qLlHJ)ZF6RE|R!%`wp#f%FnG3piiS#QU56_MC1|Hs1uSEH`7QG~=s;SL4?8@2ZHAE9Y(MErPitJDJ_t(^F24pmdi^G*yy>7k1HP;Lg|#G zVmRiSK8T4nqks2UP3j@n9P8yy{;_pR5?NPY7rFXnZ}Mw!r#Y08E)d)+_Ye=^UyG;8;o%muPF(SRyRID8m(z8 z$%=%a6D$|Jc`{WN-=Ab=TF6BYj*Iv5@|h?aoC^P%ZYh6Q5<*BxDNStAkgiQ6%jsn; z43n`B?5w;uN}-rrR$(N6#|amW_a{5G)2(_Kz?|{Ot-H~veS;#3@d$0tIAkJ{~2Vz3@gi9(ZmU^{1ER9l+K3i-@yi%yla|zcZc~=i&FwoAB+n4 zw=6qr%{$K})dX_{S;s&rZ_Jc&!v|9c!fg-B)~5$kLG4x-Lbs0D(3ab}cMjH;3f10B zYMJN8IIQ3!w?hDA&&o(*a!JmKMX5D7t@LE~mf*qaiLPe_!R=ilJQ~L*`S|lu^z%92 z&4)G;UL7-{c%J|D>%;7EDio*hWlQ7%pXlDq%k8U?WNAtl+{DTS5yvlio_D8i@?z(0 zh&M{U;dnf0wV7WpioWyAnTONlGo2;^QLcDl?qzIpgZ$B;r{v}FR|kP_6axu$5P9wQ z`C>iR;W`CV^$90M&WaN%E`-g~O1`Iuz%Y$3>zs{~?BWx<^(jeT$yFM9PtFIP(Lcn# zR+z*hb#`jfC58ob-sM+z?s=86m5mVQKkK(Cg8QP+K~v^ue{ECAqpL3p4k$fex407? zF7=h=F!#pqdryz)32_M@M-=BM>9KItk-5T&$$H)SzZlP>^4{{udc^H@9C);>vKb?i zV%-($3Kx*T!Cv+Zf2PZ%kUv))BI>LEGtjJQ&Ut4(V@72b4u#h871(rK!uSlidkFxLm` zXMOZoW>>!>o^Sd_oMg%K+>Rf;1F4kVHvbL7KC=KPUEtYbh{jT4$y5p$cfA!Ss(cW3 z$`~6CcR;emUYMGkr@mp6K8NE(||!$-fpFm?WiRI|*PvIber#KT}&&dSB8 zNAulb_tW(4rOsBZnT4m|>KgWa&dqEjmi@X|UaX9TZ?_#R34@Vk*X$kgL~}inTBOx} ze=PO(-B?)DzNLk)F&Zi^0V8m>DL2Y|)p45L|9G(2grFL-%kgGMxKu+J9r+o&7XI0j zcY-}6sFosQliv)SRyST*#Dr_UMgkoT^3SV6cT_=_2q@$JVG*y;1GhsTphp2p{gxEq zlAV_QUBYwoGoyf(!CxEkQyD6L?qQQwZjlTqjj~V?dMaM-X@~E2MDkC)i_Ie+|4jw= zsHdrEfd{s77Qfb|m^gY=*?Pzc{r0yvu^a?n-J{Jb3Qty!H;Ohd8lfykfNMCRb4VFY z0qm~3?YP*D2Ur&*x05Zekd{_?0=bf*M(cA-&>aDrguolooRGx)7u3e(HxR60B!Zb6$wJIiCd_@55hyfgfL zdQ4m2RaHyuxwA;VmRh8`@wjgHg{c&ceh7i6iEo6`1zu#SIclVys@w-XRpfGZb72kv zp;e03rHPH>MSH-#X%VxIB`W4i>#Nw9)AqQ$C2qPdiZelwCt>Y+`jeG+8MLaGCcY^twGpkw`gmPh zEp7G%$wmp!+B_be#6zcgy&C%?E%w=K$%BPB)TN*O2kDaydcoBjW331M@wpRINIq1#+HfGkO z<_?VTl5I%ZMRm651X8Ox5j{m`D8nh-J?%%5Ap7pvdkM?;AQ(S38QtZ^=@*dwD?m8= z^kUl$FHXAi_VWm+ZNsSh)L27dYW?UvU+qzZLMX1H_=lXZs^wOslx5J%LmHOjVl{i) zXrQuwG(p2NUR>di4&%u`#;Z;^(ss2+C{*#^skD##>VjSof4q8~;(PpGfzqD0YB`F=+QB)vGaF0-SA9(v2m zD7)v%!zd0)X~7LYu?QzickxFz^{mHK4O-X&%1yp;=HD{l4fM9qP!_7@rE*x@id4Pr zi~S}No$u&z;|BF)LW@*PTqxH>%LjLPsqjbfoYKb##ZC%a7=~koIYM zmop%$v>&!H6j>ts>Fro#j5h2piQNFh+hXu41F>}BO1;FV0y1*BV+8w|&LD6jir zy*q@`a7%17vnRQ1U@I=&Qi)>W?v#xcrA~ZoyOiM%yzP@lu-zQ~IfD(-gHFUg!K=F% z=G4B$4$2xTfDWBTlW@W#^wjFh-*^+2-S65>^U?bpXLYksU(;ZeEF51c&PZ-IGU?cP z*hJAvNswxe(}6Tl-j-=WEfvohUnv}&Wv8gK4D$vIt?K}H%dTk4JC8ysU_W-Kmw)h4 zuTDO{+s_JYsKKjPmL}U$#KsX#-$7+(mM~BM;Sg7m%9PI1^+dhVqsL8BUqJ&N3<997 zy2<4P@O>ugy9e`?kjkLdd!;*#Ds^|K%T}?{itlqZDf)f$HMOl)nkTMwteuvk_!&)A zoi2|S4%^-*tvcO-L<$vtDzy?9O1BRKsp;_wnkDT9QI*yGY=kLPwPXW~8zNuZ?je0Y z5|S}gY@jM$kG^$CrIXZ^@=?Y+z6KIqg>dE2f;`oEzb-BJY^d_$<;ms-Vx`H&H{kK! z#Y5Rv%1V)4VfVY1hfwNT z<%(24Jfird_~p8}*k?aLcISR( zfzGM#J5P2=RWy~~xPcaud|-@NBe2KYrVfhtTwezL=fu_epP{7nBmfqLFxC36^5HbD9{^+0UkD5UyMIH=sPI4l(<6B?wq-}UIHOP zz%^XE3oF_^2M<+yRIVcz-16yE!62_qYSrLwN zA{pp^6;dgKOq`t_{xbXrhFmoz0-hro7$hD~PcqIdzPZ?A3cNfF&X|dNDPR2&$diC( z2jvi4a^)@v2u3{hhc%N8(CaRU#;*y}*Dq&1D|{Wd|0L&FU4NowN}Xq4nq06 zh0RUVXHjD^m8I-Nd4Ss49k~m)mT)cIE`IidI$@YSt2QR^A@(&gNXfW>gTCd41;HmdEn~#_NihACS>I&lmmz4Qi;myqczM4#*1lG3RG~EIHVNI_Y z_;0ig=AO_+DVApk+=!PYw~QeXDwlQ8!DFJ3uGtwTOM`>epL+F3@@EWRrRp*P)Q$jn z0+0Ha_56+h{u6%vONICVFL=NStYO~X7=$JXW!#ln`Zt>VTmFWl{Y3|ef%~pM z3N_a7Ojdi8VCqWny+FXDeZufefoQ0yYpJSft7Ulni@p(`R@Nn d2oCiP^t=B5CtNi{nllq1mZmnRtBk#G{s-d0T~Yu5 literal 0 HcmV?d00001 From 21ac0a7f4cf6d11da728b33ed5e8cfa65a5a8ae7 Mon Sep 17 00:00:00 2001 From: Victorien <65306057+Viicos@users.noreply.github.com> Date: Sat, 12 Oct 2024 01:05:13 +0200 Subject: [PATCH 38/80] gh-116938: Clarify documentation of `dict` and `dict.update` regarding the positional argument they accept (#125213) Co-authored-by: Alex Waygood --- Doc/library/stdtypes.rst | 25 +++++++++++++------------ Lib/_collections_abc.py | 2 +- 2 files changed, 14 insertions(+), 13 deletions(-) diff --git a/Doc/library/stdtypes.rst b/Doc/library/stdtypes.rst index 833c71c4ce4b9a6..a6e2e3b8928ebe6 100644 --- a/Doc/library/stdtypes.rst +++ b/Doc/library/stdtypes.rst @@ -4505,14 +4505,14 @@ can be used interchangeably to index the same dictionary entry. ``dict([('foo', 100), ('bar', 200)])``, ``dict(foo=100, bar=200)`` If no positional argument is given, an empty dictionary is created. - If a positional argument is given and it is a mapping object, a dictionary - is created with the same key-value pairs as the mapping object. Otherwise, - the positional argument must be an :term:`iterable` object. Each item in - the iterable must itself be an iterable with exactly two objects. The - first object of each item becomes a key in the new dictionary, and the - second object the corresponding value. If a key occurs more than once, the - last value for that key becomes the corresponding value in the new - dictionary. + If a positional argument is given and it defines a ``keys()`` method, a + dictionary is created by calling :meth:`~object.__getitem__` on the argument with + each returned key from the method. Otherwise, the positional argument must be an + :term:`iterable` object. Each item in the iterable must itself be an iterable + with exactly two elements. The first element of each item becomes a key in the + new dictionary, and the second element the corresponding value. If a key occurs + more than once, the last value for that key becomes the corresponding value in + the new dictionary. If keyword arguments are given, the keyword arguments and their values are added to the dictionary created from the positional argument. If a key @@ -4669,10 +4669,11 @@ can be used interchangeably to index the same dictionary entry. Update the dictionary with the key/value pairs from *other*, overwriting existing keys. Return ``None``. - :meth:`update` accepts either another dictionary object or an iterable of - key/value pairs (as tuples or other iterables of length two). If keyword - arguments are specified, the dictionary is then updated with those - key/value pairs: ``d.update(red=1, blue=2)``. + :meth:`update` accepts either another object with a ``keys()`` method (in + which case :meth:`~object.__getitem__` is called with every key returned from + the method). or an iterable of key/value pairs (as tuples or other iterables + of length two). If keyword arguments are specified, the dictionary is then + updated with those key/value pairs: ``d.update(red=1, blue=2)``. .. method:: values() diff --git a/Lib/_collections_abc.py b/Lib/_collections_abc.py index c2edf6c8856c212..06667b7434ccefa 100644 --- a/Lib/_collections_abc.py +++ b/Lib/_collections_abc.py @@ -962,7 +962,7 @@ def clear(self): def update(self, other=(), /, **kwds): ''' D.update([E, ]**F) -> None. Update D from mapping/iterable E and F. - If E present and has a .keys() method, does: for k in E: D[k] = E[k] + If E present and has a .keys() method, does: for k in E.keys(): D[k] = E[k] If E present and lacks .keys() method, does: for (k, v) in E: D[k] = v In either case, this is followed by: for k, v in F.items(): D[k] = v ''' From 979c0df7c0adfb744159a5fc184043dc733d8534 Mon Sep 17 00:00:00 2001 From: Thomas Grainger Date: Sat, 12 Oct 2024 00:31:06 +0100 Subject: [PATCH 39/80] gh-124309: fix staggered race on eager tasks (#124847) This patch is entirely by Thomas and Peter Co-authored-by: Thomas Grainger Co-authored-by: Peter Bierma --- Lib/asyncio/staggered.py | 17 +++++-- .../test_asyncio/test_eager_task_factory.py | 46 +++++++++++++++++++ Lib/test/test_asyncio/test_staggered.py | 27 +++++++++++ ...-10-01-13-46-58.gh-issue-124390.dK1Zcm.rst | 1 + 4 files changed, 88 insertions(+), 3 deletions(-) create mode 100644 Misc/NEWS.d/next/Library/2024-10-01-13-46-58.gh-issue-124390.dK1Zcm.rst diff --git a/Lib/asyncio/staggered.py b/Lib/asyncio/staggered.py index 326c6f708944af2..0f4df8855a80b91 100644 --- a/Lib/asyncio/staggered.py +++ b/Lib/asyncio/staggered.py @@ -69,7 +69,11 @@ async def staggered_race(coro_fns, delay, *, loop=None): exceptions = [] running_tasks = [] - async def run_one_coro(previous_failed) -> None: + async def run_one_coro(ok_to_start, previous_failed) -> None: + # in eager tasks this waits for the calling task to append this task + # to running_tasks, in regular tasks this wait is a no-op that does + # not yield a future. See gh-124309. + await ok_to_start.wait() # Wait for the previous task to finish, or for delay seconds if previous_failed is not None: with contextlib.suppress(exceptions_mod.TimeoutError): @@ -85,8 +89,12 @@ async def run_one_coro(previous_failed) -> None: return # Start task that will run the next coroutine this_failed = locks.Event() - next_task = loop.create_task(run_one_coro(this_failed)) + next_ok_to_start = locks.Event() + next_task = loop.create_task(run_one_coro(next_ok_to_start, this_failed)) running_tasks.append(next_task) + # next_task has been appended to running_tasks so next_task is ok to + # start. + next_ok_to_start.set() assert len(running_tasks) == this_index + 2 # Prepare place to put this coroutine's exceptions if not won exceptions.append(None) @@ -116,8 +124,11 @@ async def run_one_coro(previous_failed) -> None: if i != this_index: t.cancel() - first_task = loop.create_task(run_one_coro(None)) + ok_to_start = locks.Event() + first_task = loop.create_task(run_one_coro(ok_to_start, None)) running_tasks.append(first_task) + # first_task has been appended to running_tasks so first_task is ok to start. + ok_to_start.set() try: # Wait for a growing list of tasks to all finish: poor man's version of # curio's TaskGroup or trio's nursery diff --git a/Lib/test/test_asyncio/test_eager_task_factory.py b/Lib/test/test_asyncio/test_eager_task_factory.py index 0777f39b5724860..31d2a00dbb8c9c7 100644 --- a/Lib/test/test_asyncio/test_eager_task_factory.py +++ b/Lib/test/test_asyncio/test_eager_task_factory.py @@ -213,6 +213,52 @@ async def run(): self.run_coro(run()) + def test_staggered_race_with_eager_tasks(self): + # See https://github.com/python/cpython/issues/124309 + + async def fail(): + await asyncio.sleep(0) + raise ValueError("no good") + + async def run(): + winner, index, excs = await asyncio.staggered.staggered_race( + [ + lambda: asyncio.sleep(2, result="sleep2"), + lambda: asyncio.sleep(1, result="sleep1"), + lambda: fail() + ], + delay=0.25 + ) + self.assertEqual(winner, 'sleep1') + self.assertEqual(index, 1) + self.assertIsNone(excs[index]) + self.assertIsInstance(excs[0], asyncio.CancelledError) + self.assertIsInstance(excs[2], ValueError) + + self.run_coro(run()) + + def test_staggered_race_with_eager_tasks_no_delay(self): + # See https://github.com/python/cpython/issues/124309 + async def fail(): + raise ValueError("no good") + + async def run(): + winner, index, excs = await asyncio.staggered.staggered_race( + [ + lambda: fail(), + lambda: asyncio.sleep(1, result="sleep1"), + lambda: asyncio.sleep(0, result="sleep0"), + ], + delay=None + ) + self.assertEqual(winner, 'sleep1') + self.assertEqual(index, 1) + self.assertIsNone(excs[index]) + self.assertIsInstance(excs[0], ValueError) + self.assertEqual(len(excs), 2) + + self.run_coro(run()) + class PyEagerTaskFactoryLoopTests(EagerTaskFactoryLoopTests, test_utils.TestCase): Task = tasks._PyTask diff --git a/Lib/test/test_asyncio/test_staggered.py b/Lib/test/test_asyncio/test_staggered.py index e6e32f7dbbbcba0..74941f704c48901 100644 --- a/Lib/test/test_asyncio/test_staggered.py +++ b/Lib/test/test_asyncio/test_staggered.py @@ -95,3 +95,30 @@ async def coro(index): self.assertEqual(len(excs), 2) self.assertIsInstance(excs[0], ValueError) self.assertIsInstance(excs[1], ValueError) + + + async def test_multiple_winners(self): + event = asyncio.Event() + + async def coro(index): + await event.wait() + return index + + async def do_set(): + event.set() + await asyncio.Event().wait() + + winner, index, excs = await staggered_race( + [ + lambda: coro(0), + lambda: coro(1), + do_set, + ], + delay=0.1, + ) + self.assertIs(winner, 0) + self.assertIs(index, 0) + self.assertEqual(len(excs), 3) + self.assertIsNone(excs[0], None) + self.assertIsInstance(excs[1], asyncio.CancelledError) + self.assertIsInstance(excs[2], asyncio.CancelledError) diff --git a/Misc/NEWS.d/next/Library/2024-10-01-13-46-58.gh-issue-124390.dK1Zcm.rst b/Misc/NEWS.d/next/Library/2024-10-01-13-46-58.gh-issue-124390.dK1Zcm.rst new file mode 100644 index 000000000000000..89610fa44bf7438 --- /dev/null +++ b/Misc/NEWS.d/next/Library/2024-10-01-13-46-58.gh-issue-124390.dK1Zcm.rst @@ -0,0 +1 @@ +Fixed :exc:`AssertionError` when using :func:`!asyncio.staggered.staggered_race` with :attr:`asyncio.eager_task_factory`. From 5a074aab845f82f4a150c27b905dae05c337d381 Mon Sep 17 00:00:00 2001 From: Rafael Fontenelle Date: Fri, 11 Oct 2024 21:40:33 -0300 Subject: [PATCH 40/80] Doc: Fix a typo in "Function Examples" in the control-flow tutorial (#125338) --- Doc/tutorial/controlflow.rst | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Doc/tutorial/controlflow.rst b/Doc/tutorial/controlflow.rst index fd765e58ff2485e..9b73ac475c78d5d 100644 --- a/Doc/tutorial/controlflow.rst +++ b/Doc/tutorial/controlflow.rst @@ -832,7 +832,7 @@ parameters as there is a ``/`` in the function definition:: File "", line 1, in TypeError: pos_only_arg() got some positional-only arguments passed as keyword arguments: 'arg' -The third function ``kwd_only_args`` only allows keyword arguments as indicated +The third function ``kwd_only_arg`` only allows keyword arguments as indicated by a ``*`` in the function definition:: >>> kwd_only_arg(3) From 5d8739e956cd20d3860133b384518a3c5c74e5ae Mon Sep 17 00:00:00 2001 From: Kumar Aditya Date: Sat, 12 Oct 2024 12:40:34 +0530 Subject: [PATCH 41/80] gh-111924: use atomics for interp id refcounting (#125321) --- Include/internal/pycore_interp.h | 6 ++-- Python/pystate.c | 54 ++++---------------------------- 2 files changed, 8 insertions(+), 52 deletions(-) diff --git a/Include/internal/pycore_interp.h b/Include/internal/pycore_interp.h index d7e584094f78395..36cd71e5a007d54 100644 --- a/Include/internal/pycore_interp.h +++ b/Include/internal/pycore_interp.h @@ -102,9 +102,8 @@ struct _is { PyInterpreterState *next; int64_t id; - int64_t id_refcount; + Py_ssize_t id_refcount; int requires_idref; - PyThread_type_lock id_mutex; #define _PyInterpreterState_WHENCE_NOTSET -1 #define _PyInterpreterState_WHENCE_UNKNOWN 0 @@ -318,8 +317,7 @@ _PyInterpreterState_SetFinalizing(PyInterpreterState *interp, PyThreadState *tst PyAPI_FUNC(int64_t) _PyInterpreterState_ObjectToID(PyObject *); PyAPI_FUNC(PyInterpreterState *) _PyInterpreterState_LookUpID(int64_t); PyAPI_FUNC(PyInterpreterState *) _PyInterpreterState_LookUpIDObject(PyObject *); -PyAPI_FUNC(int) _PyInterpreterState_IDInitref(PyInterpreterState *); -PyAPI_FUNC(int) _PyInterpreterState_IDIncref(PyInterpreterState *); +PyAPI_FUNC(void) _PyInterpreterState_IDIncref(PyInterpreterState *); PyAPI_FUNC(void) _PyInterpreterState_IDDecref(PyInterpreterState *); PyAPI_FUNC(int) _PyInterpreterState_IsReady(PyInterpreterState *interp); diff --git a/Python/pystate.c b/Python/pystate.c index 45e79ade7b60350..5d94b7714bd607f 100644 --- a/Python/pystate.c +++ b/Python/pystate.c @@ -523,12 +523,6 @@ _PyRuntimeState_ReInitThreads(_PyRuntimeState *runtime) _PyTypes_AfterFork(); - /* bpo-42540: id_mutex is freed by _PyInterpreterState_Delete, which does - * not force the default allocator. */ - if (_PyThread_at_fork_reinit(&runtime->interpreters.main->id_mutex) < 0) { - return _PyStatus_ERR("Failed to reinitialize runtime locks"); - } - PyStatus status = gilstate_tss_reinit(runtime); if (_PyStatus_EXCEPTION(status)) { return status; @@ -629,6 +623,8 @@ init_interpreter(PyInterpreterState *interp, assert(id > 0 || (id == 0 && interp == runtime->interpreters.main)); interp->id = id; + interp->id_refcount = 0; + assert(runtime->interpreters.head == interp); assert(next != NULL || (interp == runtime->interpreters.main)); interp->next = next; @@ -989,10 +985,6 @@ PyInterpreterState_Delete(PyInterpreterState *interp) } HEAD_UNLOCK(runtime); - if (interp->id_mutex != NULL) { - PyThread_free_lock(interp->id_mutex); - } - _Py_qsbr_fini(interp); _PyObject_FiniState(interp); @@ -1031,9 +1023,6 @@ _PyInterpreterState_DeleteExceptMain(_PyRuntimeState *runtime) // the "current" tstate to be set? PyInterpreterState_Clear(interp); // XXX must activate? zapthreads(interp); - if (interp->id_mutex != NULL) { - PyThread_free_lock(interp->id_mutex); - } PyInterpreterState *prev_interp = interp; interp = interp->next; free_interpreter(prev_interp); @@ -1247,9 +1236,6 @@ PyInterpreterState_GetID(PyInterpreterState *interp) PyObject * _PyInterpreterState_GetIDObject(PyInterpreterState *interp) { - if (_PyInterpreterState_IDInitref(interp) != 0) { - return NULL; - }; int64_t interpid = interp->id; if (interpid < 0) { return NULL; @@ -1259,50 +1245,22 @@ _PyInterpreterState_GetIDObject(PyInterpreterState *interp) } -int -_PyInterpreterState_IDInitref(PyInterpreterState *interp) -{ - if (interp->id_mutex != NULL) { - return 0; - } - interp->id_mutex = PyThread_allocate_lock(); - if (interp->id_mutex == NULL) { - PyErr_SetString(PyExc_RuntimeError, - "failed to create init interpreter ID mutex"); - return -1; - } - interp->id_refcount = 0; - return 0; -} - -int +void _PyInterpreterState_IDIncref(PyInterpreterState *interp) { - if (_PyInterpreterState_IDInitref(interp) < 0) { - return -1; - } - - PyThread_acquire_lock(interp->id_mutex, WAIT_LOCK); - interp->id_refcount += 1; - PyThread_release_lock(interp->id_mutex); - return 0; + _Py_atomic_add_ssize(&interp->id_refcount, 1); } void _PyInterpreterState_IDDecref(PyInterpreterState *interp) { - assert(interp->id_mutex != NULL); _PyRuntimeState *runtime = interp->runtime; - PyThread_acquire_lock(interp->id_mutex, WAIT_LOCK); - assert(interp->id_refcount != 0); - interp->id_refcount -= 1; - int64_t refcount = interp->id_refcount; - PyThread_release_lock(interp->id_mutex); + Py_ssize_t refcount = _Py_atomic_add_ssize(&interp->id_refcount, -1); - if (refcount == 0 && interp->requires_idref) { + if (refcount == 1 && interp->requires_idref) { PyThreadState *tstate = _PyThreadState_NewBound(interp, _PyThreadState_WHENCE_FINI); From 4a943c3251d1b3fdf50cfb9264ae74e5bc845c3c Mon Sep 17 00:00:00 2001 From: Victor Stinner Date: Sat, 12 Oct 2024 09:28:34 +0200 Subject: [PATCH 42/80] gh-125196: Use PyUnicodeWriter in parser (#125271) Replace the private _PyUnicodeWriter API with the public PyUnicodeWriter API in _PyPegen_concatenate_strings(). --- Parser/action_helpers.c | 15 ++++++++------- 1 file changed, 8 insertions(+), 7 deletions(-) diff --git a/Parser/action_helpers.c b/Parser/action_helpers.c index 24b817c6f8ff270..cb21777f5661892 100644 --- a/Parser/action_helpers.c +++ b/Parser/action_helpers.c @@ -1615,7 +1615,6 @@ _PyPegen_concatenate_strings(Parser *p, asdl_expr_seq *strings, } /* build folded list */ - _PyUnicodeWriter writer; current_pos = 0; for (i = 0; i < n_flattened_elements; i++) { expr_ty elem = asdl_seq_GET(flattened, i); @@ -1635,14 +1634,17 @@ _PyPegen_concatenate_strings(Parser *p, asdl_expr_seq *strings, "abc" u"abc" -> "abcabc" */ PyObject *kind = elem->v.Constant.kind; - _PyUnicodeWriter_Init(&writer); + PyUnicodeWriter *writer = PyUnicodeWriter_Create(0); + if (writer == NULL) { + return NULL; + } expr_ty last_elem = elem; for (j = i; j < n_flattened_elements; j++) { expr_ty current_elem = asdl_seq_GET(flattened, j); if (current_elem->kind == Constant_kind) { - if (_PyUnicodeWriter_WriteStr( - &writer, current_elem->v.Constant.value)) { - _PyUnicodeWriter_Dealloc(&writer); + if (PyUnicodeWriter_WriteStr(writer, + current_elem->v.Constant.value)) { + PyUnicodeWriter_Discard(writer); return NULL; } last_elem = current_elem; @@ -1652,9 +1654,8 @@ _PyPegen_concatenate_strings(Parser *p, asdl_expr_seq *strings, } i = j - 1; - PyObject *concat_str = _PyUnicodeWriter_Finish(&writer); + PyObject *concat_str = PyUnicodeWriter_Finish(writer); if (concat_str == NULL) { - _PyUnicodeWriter_Dealloc(&writer); return NULL; } if (_PyArena_AddPyObject(p->arena, concat_str) < 0) { From eb2d268ac7480b5e2b4ffb9a644cad7ac75ae954 Mon Sep 17 00:00:00 2001 From: Serhiy Storchaka Date: Sat, 12 Oct 2024 13:10:50 +0300 Subject: [PATCH 43/80] gh-65865: Raise early errors for invalid help strings in argparse (GH-124899) --- Lib/argparse.py | 31 ++++++++++++++----- Lib/test/test_argparse.py | 31 +++++++++++++++++++ ...4-10-02-16-35-07.gh-issue-65865.S2D4wq.rst | 3 ++ 3 files changed, 58 insertions(+), 7 deletions(-) create mode 100644 Misc/NEWS.d/next/Library/2024-10-02-16-35-07.gh-issue-65865.S2D4wq.rst diff --git a/Lib/argparse.py b/Lib/argparse.py index 2d8a7ef343a4ef8..208c1827f9aca75 100644 --- a/Lib/argparse.py +++ b/Lib/argparse.py @@ -588,17 +588,20 @@ def _format_args(self, action, default_metavar): return result def _expand_help(self, action): + help_string = self._get_help_string(action) + if '%' not in help_string: + return help_string params = dict(vars(action), prog=self._prog) for name in list(params): - if params[name] is SUPPRESS: + value = params[name] + if value is SUPPRESS: del params[name] - for name in list(params): - if hasattr(params[name], '__name__'): - params[name] = params[name].__name__ + elif hasattr(value, '__name__'): + params[name] = value.__name__ if params.get('choices') is not None: choices_str = ', '.join([str(c) for c in params['choices']]) params['choices'] = choices_str - return self._get_help_string(action) % params + return help_string % params def _iter_indented_subactions(self, action): try: @@ -1180,9 +1183,13 @@ def add_parser(self, name, *, deprecated=False, **kwargs): help = kwargs.pop('help') choice_action = self._ChoicesPseudoAction(name, aliases, help) self._choices_actions.append(choice_action) + else: + choice_action = None # create the parser and add it to the map parser = self._parser_class(**kwargs) + if choice_action is not None: + parser._check_help(choice_action) self._name_parser_map[name] = parser # make parser available under aliases also @@ -1449,11 +1456,12 @@ def add_argument(self, *args, **kwargs): # raise an error if the metavar does not match the type if hasattr(self, "_get_formatter"): + formatter = self._get_formatter() try: - self._get_formatter()._format_args(action, None) + formatter._format_args(action, None) except TypeError: raise ValueError("length of metavar tuple does not match nargs") - + self._check_help(action) return self._add_action(action) def add_argument_group(self, *args, **kwargs): @@ -1635,6 +1643,14 @@ def _handle_conflict_resolve(self, action, conflicting_actions): if not action.option_strings: action.container._remove_action(action) + def _check_help(self, action): + if action.help and hasattr(self, "_get_formatter"): + formatter = self._get_formatter() + try: + formatter._expand_help(action) + except (ValueError, TypeError, KeyError) as exc: + raise ValueError('badly formed help string') from exc + class _ArgumentGroup(_ActionsContainer): @@ -1852,6 +1868,7 @@ def add_subparsers(self, **kwargs): # create the parsers action and add it to the positionals list parsers_class = self._pop_action_class(kwargs, 'parsers') action = parsers_class(option_strings=[], **kwargs) + self._check_help(action) self._subparsers._add_action(action) # return the created parsers action diff --git a/Lib/test/test_argparse.py b/Lib/test/test_argparse.py index 1ebbc21bc1755b5..000b810454f5849 100644 --- a/Lib/test/test_argparse.py +++ b/Lib/test/test_argparse.py @@ -2623,6 +2623,29 @@ def test_parser_command_help(self): --foo foo help ''')) + def assert_bad_help(self, context_type, func, *args, **kwargs): + with self.assertRaisesRegex(ValueError, 'badly formed help string') as cm: + func(*args, **kwargs) + self.assertIsInstance(cm.exception.__context__, context_type) + + def test_invalid_subparsers_help(self): + parser = ErrorRaisingArgumentParser(prog='PROG') + self.assert_bad_help(ValueError, parser.add_subparsers, help='%Y-%m-%d') + parser = ErrorRaisingArgumentParser(prog='PROG') + self.assert_bad_help(KeyError, parser.add_subparsers, help='%(spam)s') + parser = ErrorRaisingArgumentParser(prog='PROG') + self.assert_bad_help(TypeError, parser.add_subparsers, help='%(prog)d') + + def test_invalid_subparser_help(self): + parser = ErrorRaisingArgumentParser(prog='PROG') + subparsers = parser.add_subparsers() + self.assert_bad_help(ValueError, subparsers.add_parser, '1', + help='%Y-%m-%d') + self.assert_bad_help(KeyError, subparsers.add_parser, '1', + help='%(spam)s') + self.assert_bad_help(TypeError, subparsers.add_parser, '1', + help='%(prog)d') + def test_subparser_title_help(self): parser = ErrorRaisingArgumentParser(prog='PROG', description='main description') @@ -5375,6 +5398,14 @@ def test_invalid_action(self): self.assertValueError('--foo', action="store-true", errmsg='unknown action') + def test_invalid_help(self): + self.assertValueError('--foo', help='%Y-%m-%d', + errmsg='badly formed help string') + self.assertValueError('--foo', help='%(spam)s', + errmsg='badly formed help string') + self.assertValueError('--foo', help='%(prog)d', + errmsg='badly formed help string') + def test_multiple_dest(self): parser = argparse.ArgumentParser() parser.add_argument(dest='foo') diff --git a/Misc/NEWS.d/next/Library/2024-10-02-16-35-07.gh-issue-65865.S2D4wq.rst b/Misc/NEWS.d/next/Library/2024-10-02-16-35-07.gh-issue-65865.S2D4wq.rst new file mode 100644 index 000000000000000..106a8b81140520f --- /dev/null +++ b/Misc/NEWS.d/next/Library/2024-10-02-16-35-07.gh-issue-65865.S2D4wq.rst @@ -0,0 +1,3 @@ +:mod:`argparse` now raises early error for invalid ``help`` arguments to +:meth:`~argparse.ArgumentParser.add_argument`, +:meth:`~argparse.ArgumentParser.add_subparsers` and :meth:`!add_parser`. From a6c0c64de0ade400df7995f1e9480b6fc0f863aa Mon Sep 17 00:00:00 2001 From: Serhiy Storchaka Date: Sat, 12 Oct 2024 14:46:28 +0300 Subject: [PATCH 44/80] gh-59330: Improve error message for dest= for positionals (GH-125215) Also improve the documentation. Specify how dest and metavar are derived from add_argument() positional arguments. Co-authored-by: Simon Law --- Doc/library/argparse.rst | 19 +++++++++++++++++++ Lib/argparse.py | 3 ++- Lib/test/test_argparse.py | 3 ++- 3 files changed, 23 insertions(+), 2 deletions(-) diff --git a/Doc/library/argparse.rst b/Doc/library/argparse.rst index d337de87ca8f396..19f832051a9ee82 100644 --- a/Doc/library/argparse.rst +++ b/Doc/library/argparse.rst @@ -636,6 +636,25 @@ be positional:: usage: PROG [-h] [-f FOO] bar PROG: error: the following arguments are required: bar +By default, argparse automatically handles the internal naming and +display names of arguments, simplifying the process without requiring +additional configuration. +As such, you do not need to specify the dest_ and metavar_ parameters. +The dest_ parameter defaults to the argument name with underscores ``_`` +replacing hyphens ``-`` . The metavar_ parameter defaults to the +upper-cased name. For example:: + + >>> parser = argparse.ArgumentParser(prog='PROG') + >>> parser.add_argument('--foo-bar') + >>> parser.parse_args(['--foo-bar', 'FOO-BAR'] + Namespace(foo_bar='FOO-BAR') + >>> parser.print_help() + usage: [-h] [--foo-bar FOO-BAR] + + optional arguments: + -h, --help show this help message and exit + --foo-bar FOO-BAR + .. _action: diff --git a/Lib/argparse.py b/Lib/argparse.py index 208c1827f9aca75..64dbd7149e769c2 100644 --- a/Lib/argparse.py +++ b/Lib/argparse.py @@ -1424,7 +1424,8 @@ def add_argument(self, *args, **kwargs): chars = self.prefix_chars if not args or len(args) == 1 and args[0][0] not in chars: if args and 'dest' in kwargs: - raise ValueError('dest supplied twice for positional argument') + raise ValueError('dest supplied twice for positional argument,' + ' did you mean metavar?') kwargs = self._get_positional_kwargs(*args, **kwargs) # otherwise, we're adding an optional argument diff --git a/Lib/test/test_argparse.py b/Lib/test/test_argparse.py index 000b810454f5849..61ddb5f16cc44f3 100644 --- a/Lib/test/test_argparse.py +++ b/Lib/test/test_argparse.py @@ -5411,7 +5411,8 @@ def test_multiple_dest(self): parser.add_argument(dest='foo') with self.assertRaises(ValueError) as cm: parser.add_argument('bar', dest='baz') - self.assertIn('dest supplied twice for positional argument', + self.assertIn('dest supplied twice for positional argument,' + ' did you mean metavar?', str(cm.exception)) def test_no_argument_actions(self): From 07c2d15977738165e9dc4248e7edda7c75ecc14b Mon Sep 17 00:00:00 2001 From: Serhiy Storchaka Date: Sat, 12 Oct 2024 14:53:00 +0300 Subject: [PATCH 45/80] gh-85935: Explicitly document the case nargs=0 in argparse (GH-125302) --- Doc/library/argparse.rst | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/Doc/library/argparse.rst b/Doc/library/argparse.rst index 19f832051a9ee82..d58c75eef3e739d 100644 --- a/Doc/library/argparse.rst +++ b/Doc/library/argparse.rst @@ -751,6 +751,9 @@ how the command-line arguments should be handled. The supplied actions are: .. versionadded:: 3.8 +Only actions that consume command-line arguments (e.g. ``'store'``, +``'append'`` or ``'extend'``) can be used with positional arguments. + You may also specify an arbitrary action by passing an Action subclass or other object that implements the same interface. The ``BooleanOptionalAction`` is available in ``argparse`` and adds support for boolean actions such as @@ -878,6 +881,8 @@ See also :ref:`specifying-ambiguous-arguments`. The supported values are: If the ``nargs`` keyword argument is not provided, the number of arguments consumed is determined by the action_. Generally this means a single command-line argument will be consumed and a single item (not a list) will be produced. +Actions that do not consume command-line arguments (e.g. +``'store_const'``) set ``nargs=0``. .. _const: From 63cf4e914f879ee28a75c02e867baa7c6047ea2b Mon Sep 17 00:00:00 2001 From: Serhiy Storchaka Date: Sat, 12 Oct 2024 15:15:37 +0300 Subject: [PATCH 46/80] gh-125254: Fix error report about ambiguous option in argparse (GH-125273) This was a regression introduced in gh-58573. It was only tested for the case when the ambiguous option is the last argument in the command line. --- Lib/argparse.py | 2 +- Lib/test/test_argparse.py | 14 ++++++++++++-- .../2024-10-10-19-57-35.gh-issue-125254.RtZxXS.rst | 1 + 3 files changed, 14 insertions(+), 3 deletions(-) create mode 100644 Misc/NEWS.d/next/Library/2024-10-10-19-57-35.gh-issue-125254.RtZxXS.rst diff --git a/Lib/argparse.py b/Lib/argparse.py index 64dbd7149e769c2..cbecb3b753c2b9c 100644 --- a/Lib/argparse.py +++ b/Lib/argparse.py @@ -2019,7 +2019,7 @@ def consume_optional(start_index): if len(option_tuples) > 1: options = ', '.join([option_string for action, option_string, sep, explicit_arg in option_tuples]) - args = {'option': arg_string, 'matches': options} + args = {'option': arg_strings[start_index], 'matches': options} msg = _('ambiguous option: %(option)s could match %(matches)s') raise ArgumentError(None, msg % args) diff --git a/Lib/test/test_argparse.py b/Lib/test/test_argparse.py index 61ddb5f16cc44f3..1fc97de78f7f895 100644 --- a/Lib/test/test_argparse.py +++ b/Lib/test/test_argparse.py @@ -6730,9 +6730,19 @@ def test_conflicting_mutually_exclusive_args_zero_or_more_with_metavar2(self): def test_ambiguous_option(self): self.parser.add_argument('--foobaz') self.parser.add_argument('--fooble', action='store_true') + self.parser.add_argument('--foogle') self.assertRaisesRegex(argparse.ArgumentError, - "ambiguous option: --foob could match --foobaz, --fooble", - self.parser.parse_args, ['--foob']) + "ambiguous option: --foob could match --foobaz, --fooble", + self.parser.parse_args, ['--foob']) + self.assertRaisesRegex(argparse.ArgumentError, + "ambiguous option: --foob=1 could match --foobaz, --fooble$", + self.parser.parse_args, ['--foob=1']) + self.assertRaisesRegex(argparse.ArgumentError, + "ambiguous option: --foob could match --foobaz, --fooble$", + self.parser.parse_args, ['--foob', '1', '--foogle', '2']) + self.assertRaisesRegex(argparse.ArgumentError, + "ambiguous option: --foob=1 could match --foobaz, --fooble$", + self.parser.parse_args, ['--foob=1', '--foogle', '2']) def test_os_error(self): self.parser.add_argument('file') diff --git a/Misc/NEWS.d/next/Library/2024-10-10-19-57-35.gh-issue-125254.RtZxXS.rst b/Misc/NEWS.d/next/Library/2024-10-10-19-57-35.gh-issue-125254.RtZxXS.rst new file mode 100644 index 000000000000000..abe37fefedc3be7 --- /dev/null +++ b/Misc/NEWS.d/next/Library/2024-10-10-19-57-35.gh-issue-125254.RtZxXS.rst @@ -0,0 +1 @@ +Fix a bug where ArgumentError includes the incorrect ambiguous option in :mod:`argparse`. From 9944ad388c457325456152257b977410c4ec3593 Mon Sep 17 00:00:00 2001 From: Serhiy Storchaka Date: Sat, 12 Oct 2024 16:04:17 +0300 Subject: [PATCH 47/80] gh-85935: Check for nargs=0 for positional arguments in argparse (GH-124839) Raise ValueError in add_argument() if either explicit nargs=0 or action that does not consume arguments (like 'store_const' or 'store_true') is specified for positional argument. --- Lib/argparse.py | 10 +++++++++- Lib/test/test_argparse.py | 7 +++++-- .../2024-10-01-13-11-53.gh-issue-85935.CTwJUy.rst | 4 ++++ 3 files changed, 18 insertions(+), 3 deletions(-) create mode 100644 Misc/NEWS.d/next/Library/2024-10-01-13-11-53.gh-issue-85935.CTwJUy.rst diff --git a/Lib/argparse.py b/Lib/argparse.py index cbecb3b753c2b9c..550415dc93478b4 100644 --- a/Lib/argparse.py +++ b/Lib/argparse.py @@ -1441,11 +1441,17 @@ def add_argument(self, *args, **kwargs): kwargs['default'] = self.argument_default # create the action object, and add it to the parser + action_name = kwargs.get('action') action_class = self._pop_action_class(kwargs) if not callable(action_class): raise ValueError('unknown action "%s"' % (action_class,)) action = action_class(**kwargs) + # raise an error if action for positional argument does not + # consume arguments + if not action.option_strings and action.nargs == 0: + raise ValueError(f'action {action_name!r} is not valid for positional arguments') + # raise an error if the action type is not callable type_func = self._registry_get('type', action.type, action.type) if not callable(type_func): @@ -1554,7 +1560,9 @@ def _get_positional_kwargs(self, dest, **kwargs): # mark positional arguments as required if at least one is # always required nargs = kwargs.get('nargs') - if nargs not in [OPTIONAL, ZERO_OR_MORE, REMAINDER, SUPPRESS, 0]: + if nargs == 0: + raise ValueError('nargs for positionals must be != 0') + if nargs not in [OPTIONAL, ZERO_OR_MORE, REMAINDER, SUPPRESS]: kwargs['required'] = True # return the keyword arguments with no option strings diff --git a/Lib/test/test_argparse.py b/Lib/test/test_argparse.py index 1fc97de78f7f895..f52a4b6bdd8acac 100644 --- a/Lib/test/test_argparse.py +++ b/Lib/test/test_argparse.py @@ -5424,8 +5424,11 @@ def test_no_argument_actions(self): with self.subTest(attrs=attrs): self.assertTypeError('-x', action=action, **attrs) self.assertTypeError('x', action=action, **attrs) + self.assertValueError('x', action=action, + errmsg=f"action '{action}' is not valid for positional arguments") self.assertTypeError('-x', action=action, nargs=0) - self.assertTypeError('x', action=action, nargs=0) + self.assertValueError('x', action=action, nargs=0, + errmsg='nargs for positionals must be != 0') def test_no_argument_no_const_actions(self): # options with zero arguments @@ -5445,7 +5448,7 @@ def test_more_than_one_argument_actions(self): self.assertValueError('-x', nargs=0, action=action, errmsg=f'nargs for {action_name} actions must be != 0') self.assertValueError('spam', nargs=0, action=action, - errmsg=f'nargs for {action_name} actions must be != 0') + errmsg='nargs for positionals must be != 0') # const is disallowed with non-optional arguments for nargs in [1, '*', '+']: diff --git a/Misc/NEWS.d/next/Library/2024-10-01-13-11-53.gh-issue-85935.CTwJUy.rst b/Misc/NEWS.d/next/Library/2024-10-01-13-11-53.gh-issue-85935.CTwJUy.rst new file mode 100644 index 000000000000000..553f206bf26337c --- /dev/null +++ b/Misc/NEWS.d/next/Library/2024-10-01-13-11-53.gh-issue-85935.CTwJUy.rst @@ -0,0 +1,4 @@ +:meth:`argparse.ArgumentParser.add_argument` now raises an exception if +an :ref:`action` that does not consume arguments (like 'store_const' or +'store_true') or explicit ``nargs=0`` are specified for positional +arguments. From dcd58c50844dae0d83517e88518a677914ea594b Mon Sep 17 00:00:00 2001 From: "Bernhard M. Wiedemann" Date: Sat, 12 Oct 2024 19:18:48 +0200 Subject: [PATCH 48/80] gh-125260: Change the default ``gzip.compress()`` mtime to 0 (#125261) This follows GNU gzip, which defaults to using 0 as the mtime for compressing stdin, where no file mtime is involved. This makes the output of gzip.compress() deterministic by default, greatly helping reproducible builds. Co-authored-by: Adam Turner <9087854+AA-Turner@users.noreply.github.com> --- Doc/library/gzip.rst | 9 +++++++-- Lib/gzip.py | 6 +++--- Lib/test/test_gzip.py | 11 +++++++++++ .../2024-10-11-04-04-38.gh-issue-125260.PeZ0Mb.rst | 2 ++ 4 files changed, 23 insertions(+), 5 deletions(-) create mode 100644 Misc/NEWS.d/next/Library/2024-10-11-04-04-38.gh-issue-125260.PeZ0Mb.rst diff --git a/Doc/library/gzip.rst b/Doc/library/gzip.rst index 6b6e158f6eba2cf..f24e73517e57674 100644 --- a/Doc/library/gzip.rst +++ b/Doc/library/gzip.rst @@ -184,11 +184,12 @@ The module defines the following items: attribute instead. -.. function:: compress(data, compresslevel=9, *, mtime=None) +.. function:: compress(data, compresslevel=9, *, mtime=0) Compress the *data*, returning a :class:`bytes` object containing the compressed data. *compresslevel* and *mtime* have the same meaning as in - the :class:`GzipFile` constructor above. + the :class:`GzipFile` constructor above, + but *mtime* defaults to 0 for reproducible output. .. versionadded:: 3.2 .. versionchanged:: 3.8 @@ -203,6 +204,10 @@ The module defines the following items: .. versionchanged:: 3.13 The gzip header OS byte is guaranteed to be set to 255 when this function is used as was the case in 3.10 and earlier. + .. versionchanged:: 3.14 + The *mtime* parameter now defaults to 0 for reproducible output. + For the previous behaviour of using the current time, + pass ``None`` to *mtime*. .. function:: decompress(data) diff --git a/Lib/gzip.py b/Lib/gzip.py index ba753ce3050dd84..1a3c82ce7e0711a 100644 --- a/Lib/gzip.py +++ b/Lib/gzip.py @@ -580,12 +580,12 @@ def _rewind(self): self._new_member = True -def compress(data, compresslevel=_COMPRESS_LEVEL_BEST, *, mtime=None): +def compress(data, compresslevel=_COMPRESS_LEVEL_BEST, *, mtime=0): """Compress data in one shot and return the compressed string. compresslevel sets the compression level in range of 0-9. - mtime can be used to set the modification time. The modification time is - set to the current time by default. + mtime can be used to set the modification time. + The modification time is set to 0 by default, for reproducibility. """ # Wbits=31 automatically includes a gzip header and trailer. gzip_data = zlib.compress(data, level=compresslevel, wbits=31) diff --git a/Lib/test/test_gzip.py b/Lib/test/test_gzip.py index ae384c3849d49eb..bf6e1703db84510 100644 --- a/Lib/test/test_gzip.py +++ b/Lib/test/test_gzip.py @@ -713,6 +713,17 @@ def test_compress_mtime(self): f.read(1) # to set mtime attribute self.assertEqual(f.mtime, mtime) + def test_compress_mtime_default(self): + # test for gh-125260 + datac = gzip.compress(data1, mtime=0) + datac2 = gzip.compress(data1) + self.assertEqual(datac, datac2) + datac3 = gzip.compress(data1, mtime=None) + self.assertNotEqual(datac, datac3) + with gzip.GzipFile(fileobj=io.BytesIO(datac3), mode="rb") as f: + f.read(1) # to set mtime attribute + self.assertGreater(f.mtime, 1) + def test_compress_correct_level(self): for mtime in (0, 42): with self.subTest(mtime=mtime): diff --git a/Misc/NEWS.d/next/Library/2024-10-11-04-04-38.gh-issue-125260.PeZ0Mb.rst b/Misc/NEWS.d/next/Library/2024-10-11-04-04-38.gh-issue-125260.PeZ0Mb.rst new file mode 100644 index 000000000000000..fab524ea0185c2d --- /dev/null +++ b/Misc/NEWS.d/next/Library/2024-10-11-04-04-38.gh-issue-125260.PeZ0Mb.rst @@ -0,0 +1,2 @@ +The :func:`gzip.compress` *mtime* parameter now defaults to 0 for reproducible output. +Patch by Bernhard M. Wiedemann and Adam Turner. From c05f9dde8a12dfd63d3ade93da616042df2dc925 Mon Sep 17 00:00:00 2001 From: Serhiy Storchaka Date: Sat, 12 Oct 2024 20:46:21 +0300 Subject: [PATCH 49/80] gh-53203: Fix strptime() for %c and %x formats on many locales (GH-124946) In some locales (like French or Hebrew) the full or abbreviated names of the default month and weekday used in __calc_date_time can be part of other name or constant part of the %c format. The month name can also match %m with constant suffix (like in Japanese). So the code failed to correctly distinguish formats %a, %A, %b, %B and %m. Cycle all month and all days of the week to find the variable part and distinguish %a from %A and %b from %B or %m. Fixed locales for the following languges: Arabic, Bislama, Breton, Bodo, Kashubian, Chuvash, Estonian, French, Irish, Ge'ez, Gurajati, Manx Gaelic, Hebrew, Hindi, Chhattisgarhi, Haitian Kreyol, Japanese, Kannada, Korean, Marathi, Malay, Norwegian, Nynorsk, Punjabi, Rajasthani, Tok Pisin, Yoruba, Yue Chinese, Yau/Nungon and Chinese. Co-authored-by: Eli Bendersky --- Lib/_strptime.py | 127 +++++++++++++++--- Lib/test/test_strptime.py | 36 +++-- ...4-10-03-20-45-57.gh-issue-53203.3Sk4Ia.rst | 5 + 3 files changed, 134 insertions(+), 34 deletions(-) create mode 100644 Misc/NEWS.d/next/Library/2024-10-03-20-45-57.gh-issue-53203.3Sk4Ia.rst diff --git a/Lib/_strptime.py b/Lib/_strptime.py index a3f8bb544d518dc..89adc174e5ad30d 100644 --- a/Lib/_strptime.py +++ b/Lib/_strptime.py @@ -28,6 +28,18 @@ def _getlang(): # Figure out what the current language is set to. return locale.getlocale(locale.LC_TIME) +def _findall(haystack, needle): + # Find all positions of needle in haystack. + if not needle: + return + i = 0 + while True: + i = haystack.find(needle, i) + if i < 0: + break + yield i + i += len(needle) + class LocaleTime(object): """Stores and handles locale-specific information related to time. @@ -102,7 +114,8 @@ def __calc_am_pm(self): am_pm = [] for hour in (1, 22): time_tuple = time.struct_time((1999,3,17,hour,44,55,2,76,0)) - am_pm.append(time.strftime("%p", time_tuple).lower()) + # br_FR has AM/PM info (' ',' '). + am_pm.append(time.strftime("%p", time_tuple).lower().strip()) self.am_pm = am_pm def __calc_date_time(self): @@ -114,42 +127,114 @@ def __calc_date_time(self): # values within the format string is very important; it eliminates # possible ambiguity for what something represents. time_tuple = time.struct_time((1999,3,17,22,44,55,2,76,0)) - date_time = [None, None, None] - date_time[0] = time.strftime("%c", time_tuple).lower() - date_time[1] = time.strftime("%x", time_tuple).lower() - date_time[2] = time.strftime("%X", time_tuple).lower() - replacement_pairs = [('%', '%%'), (self.f_weekday[2], '%A'), - (self.f_month[3], '%B'), (self.a_weekday[2], '%a'), - (self.a_month[3], '%b'), (self.am_pm[1], '%p'), + time_tuple2 = time.struct_time((1999,1,3,1,1,1,6,3,0)) + replacement_pairs = [ ('1999', '%Y'), ('99', '%y'), ('22', '%H'), ('44', '%M'), ('55', '%S'), ('76', '%j'), ('17', '%d'), ('03', '%m'), ('3', '%m'), # '3' needed for when no leading zero. ('2', '%w'), ('10', '%I')] - replacement_pairs.extend([(tz, "%Z") for tz_values in self.timezone - for tz in tz_values]) - for offset,directive in ((0,'%c'), (1,'%x'), (2,'%X')): - current_format = date_time[offset] - for old, new in replacement_pairs: + date_time = [] + for directive in ('%c', '%x', '%X'): + current_format = time.strftime(directive, time_tuple).lower() + current_format = current_format.replace('%', '%%') + # The month and the day of the week formats are treated specially + # because of a possible ambiguity in some locales where the full + # and abbreviated names are equal or names of different types + # are equal. See doc of __find_month_format for more details. + lst, fmt = self.__find_weekday_format(directive) + if lst: + current_format = current_format.replace(lst[2], fmt, 1) + lst, fmt = self.__find_month_format(directive) + if lst: + current_format = current_format.replace(lst[3], fmt, 1) + if self.am_pm[1]: # Must deal with possible lack of locale info # manifesting itself as the empty string (e.g., Swedish's # lack of AM/PM info) or a platform returning a tuple of empty # strings (e.g., MacOS 9 having timezone as ('','')). - if old: - current_format = current_format.replace(old, new) + current_format = current_format.replace(self.am_pm[1], '%p') + for tz_values in self.timezone: + for tz in tz_values: + if tz: + current_format = current_format.replace(tz, "%Z") + for old, new in replacement_pairs: + current_format = current_format.replace(old, new) # If %W is used, then Sunday, 2005-01-03 will fall on week 0 since # 2005-01-03 occurs before the first Monday of the year. Otherwise # %U is used. - time_tuple = time.struct_time((1999,1,3,1,1,1,6,3,0)) - if '00' in time.strftime(directive, time_tuple): + if '00' in time.strftime(directive, time_tuple2): U_W = '%W' else: U_W = '%U' - date_time[offset] = current_format.replace('11', U_W) + current_format = current_format.replace('11', U_W) + date_time.append(current_format) self.LC_date_time = date_time[0] self.LC_date = date_time[1] self.LC_time = date_time[2] + def __find_month_format(self, directive): + """Find the month format appropriate for the current locale. + + In some locales (for example French and Hebrew), the default month + used in __calc_date_time has the same name in full and abbreviated + form. Also, the month name can by accident match other part of the + representation: the day of the week name (for example in Morisyen) + or the month number (for example in Japanese). Thus, cycle months + of the year and find all positions that match the month name for + each month, If no common positions are found, the representation + does not use the month name. + """ + full_indices = abbr_indices = None + for m in range(1, 13): + time_tuple = time.struct_time((1999, m, 17, 22, 44, 55, 2, 76, 0)) + datetime = time.strftime(directive, time_tuple).lower() + indices = set(_findall(datetime, self.f_month[m])) + if full_indices is None: + full_indices = indices + else: + full_indices &= indices + indices = set(_findall(datetime, self.a_month[m])) + if abbr_indices is None: + abbr_indices = indices + else: + abbr_indices &= indices + if not full_indices and not abbr_indices: + return None, None + if full_indices: + return self.f_month, '%B' + if abbr_indices: + return self.a_month, '%b' + return None, None + + def __find_weekday_format(self, directive): + """Find the day of the week format appropriate for the current locale. + + Similar to __find_month_format(). + """ + full_indices = abbr_indices = None + for wd in range(7): + time_tuple = time.struct_time((1999, 3, 17, 22, 44, 55, wd, 76, 0)) + datetime = time.strftime(directive, time_tuple).lower() + indices = set(_findall(datetime, self.f_weekday[wd])) + if full_indices is None: + full_indices = indices + else: + full_indices &= indices + if self.f_weekday[wd] != self.a_weekday[wd]: + indices = set(_findall(datetime, self.a_weekday[wd])) + if abbr_indices is None: + abbr_indices = indices + else: + abbr_indices &= indices + if not full_indices and not abbr_indices: + return None, None + if full_indices: + return self.f_weekday, '%A' + if abbr_indices: + return self.a_weekday, '%a' + return None, None + def __calc_timezone(self): # Set self.timezone by using time.tzname. # Do not worry about possibility of time.tzname[0] == time.tzname[1] @@ -187,7 +272,7 @@ def __init__(self, locale_time=None): 'd': r"(?P3[0-1]|[1-2]\d|0[1-9]|[1-9]| [1-9])", 'f': r"(?P[0-9]{1,6})", 'H': r"(?P2[0-3]|[0-1]\d|\d)", - 'I': r"(?P1[0-2]|0[1-9]|[1-9])", + 'I': r"(?P1[0-2]|0[1-9]|[1-9]| [1-9])", 'G': r"(?P\d\d\d\d)", 'j': r"(?P36[0-6]|3[0-5]\d|[1-2]\d\d|0[1-9]\d|00[1-9]|[1-9]\d|0[1-9]|[1-9])", 'm': r"(?P1[0-2]|0[1-9]|[1-9])", @@ -349,8 +434,8 @@ def _strptime(data_string, format="%a %b %d %H:%M:%S %Y"): _regex_cache[format] = format_regex found = format_regex.match(data_string) if not found: - raise ValueError("time data %r does not match format %r" % - (data_string, format)) + raise ValueError("time data %r does not match format %r :: /%s/" % + (data_string, format, format_regex.pattern)) if len(data_string) != found.end(): raise ValueError("unconverted data remains: %s" % data_string[found.end():]) diff --git a/Lib/test/test_strptime.py b/Lib/test/test_strptime.py index 37f6b08db28b3e5..79f48dfe44abde2 100644 --- a/Lib/test/test_strptime.py +++ b/Lib/test/test_strptime.py @@ -5,6 +5,7 @@ import locale import re import os +import platform import sys from test import support from test.support import warnings_helper @@ -13,6 +14,13 @@ import _strptime +libc_ver = platform.libc_ver() +if libc_ver[0] == 'glibc': + glibc_ver = tuple(map(int, libc_ver[1].split('.'))) +else: + glibc_ver = None + + class getlang_Tests(unittest.TestCase): """Test _getlang""" def test_basic(self): @@ -478,16 +486,16 @@ def test_bad_timezone(self): # * Year is not included: ha_NG. # * Use non-Gregorian calendar: lo_LA, thai, th_TH. # - # BUG: Generates invalid regexp for br_FR, csb_PL, Arabic. - # BUG: Generates regexp that does not match the current date and time - # for fa_IR, gez_ER, gez_ET, lzh_TW, my_MM, or_IN, shn_MM, yo_NG. # BUG: Generates regexp that does not match the current date and time - # for fa_IR, gez_ER, gez_ET, lzh_TW, my_MM, or_IN, shn_MM, yo_NG, - # fr_FR, ja_JP, he_IL, ko_KR, zh_CN, etc. - @run_with_locales('LC_TIME', 'C', 'en_US', 'de_DE', - 'eu_ES', 'mfe_MU') + # for az_IR, fa_IR, lzh_TW, my_MM, or_IN, shn_MM. + @run_with_locales('LC_TIME', 'C', 'en_US', 'fr_FR', 'de_DE', 'ja_JP', + 'he_IL', 'eu_ES', 'ar_AE', 'mfe_MU', 'yo_NG', + 'csb_PL', 'br_FR', 'gez_ET', 'brx_IN') def test_date_time_locale(self): # Test %c directive + loc = locale.getlocale(locale.LC_TIME)[0] + if glibc_ver and glibc_ver < (2, 31) and loc == 'br_FR': + self.skipTest('%c in locale br_FR does not include time') now = time.time() self.roundtrip('%c', slice(0, 6), time.localtime(now)) # 1 hour 20 minutes 30 seconds ago @@ -505,7 +513,9 @@ def test_date_time_locale(self): # NB: Dates before 1969 do not roundtrip on some locales: # bo_CN, bo_IN, dz_BT, eu_ES, eu_FR. - @run_with_locales('LC_TIME', 'C', 'en_US', 'de_DE', 'ja_JP') + @run_with_locales('LC_TIME', 'C', 'en_US', 'fr_FR', 'de_DE', 'ja_JP', + 'he_IL', 'ar_AE', 'mfe_MU', 'yo_NG', + 'csb_PL', 'br_FR', 'gez_ET', 'brx_IN') def test_date_time_locale2(self): # Test %c directive self.roundtrip('%c', slice(0, 6), (1900, 1, 1, 0, 0, 0, 0, 1, 0)) @@ -513,10 +523,9 @@ def test_date_time_locale2(self): # NB: Does not roundtrip because use non-Gregorian calendar: # lo_LA, thai, th_TH. # BUG: Generates regexp that does not match the current date - # for az_IR, fa_IR, lzh_TW, my_MM, or_IN, shn_MM, - # Arabic, ja_JP, ko_KR, zh_CN, etc. - @run_with_locales('LC_TIME', 'C', 'en_US', 'fr_FR', 'de_DE', - 'he_IL', 'eu_ES') + # for az_IR, fa_IR, lzh_TW, my_MM, or_IN, shn_MM. + @run_with_locales('LC_TIME', 'C', 'en_US', 'fr_FR', 'de_DE', 'ja_JP', + 'he_IL', 'eu_ES', 'ar_AE') def test_date_locale(self): # Test %x directive now = time.time() @@ -535,7 +544,8 @@ def test_date_locale(self): support.is_emscripten or support.is_wasi, "musl libc issue on Emscripten, bpo-46390" ) - @run_with_locales('LC_TIME', 'en_US', 'fr_FR', 'de_DE', 'ja_JP') + @run_with_locales('LC_TIME', 'en_US', 'fr_FR', 'de_DE', 'ja_JP', + 'eu_ES', 'ar_AE') def test_date_locale2(self): # Test %x directive self.roundtrip('%x', slice(0, 3), (1900, 1, 1, 0, 0, 0, 0, 1, 0)) diff --git a/Misc/NEWS.d/next/Library/2024-10-03-20-45-57.gh-issue-53203.3Sk4Ia.rst b/Misc/NEWS.d/next/Library/2024-10-03-20-45-57.gh-issue-53203.3Sk4Ia.rst new file mode 100644 index 000000000000000..6895cffcf545fd3 --- /dev/null +++ b/Misc/NEWS.d/next/Library/2024-10-03-20-45-57.gh-issue-53203.3Sk4Ia.rst @@ -0,0 +1,5 @@ +Fix :func:`time.strptime` for ``%c`` and ``%x`` formats in many locales: +Arabic, Bislama, Breton, Bodo, Kashubian, Chuvash, Estonian, French, Irish, +Ge'ez, Gurajati, Manx Gaelic, Hebrew, Hindi, Chhattisgarhi, Haitian Kreyol, +Japanese, Kannada, Korean, Marathi, Malay, Norwegian, Nynorsk, Punjabi, +Rajasthani, Tok Pisin, Yoruba, Yue Chinese, Yau/Nungon and Chinese. From 62d5a53a0b2a5262a86984cfe9817aeb653ebfca Mon Sep 17 00:00:00 2001 From: Richard Hansen Date: Sat, 12 Oct 2024 15:33:00 -0400 Subject: [PATCH 50/80] gh-124872: Move PyThreadState to first argument for consistency (#124774) --- Python/context.c | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/Python/context.c b/Python/context.c index 36e2677c398f591..9b742136b0726d8 100644 --- a/Python/context.c +++ b/Python/context.c @@ -112,7 +112,8 @@ context_event_name(PyContextEvent event) { Py_UNREACHABLE(); } -static void notify_context_watchers(PyContextEvent event, PyContext *ctx, PyThreadState *ts) +static void +notify_context_watchers(PyThreadState *ts, PyContextEvent event, PyContext *ctx) { assert(Py_REFCNT(ctx) > 0); PyInterpreterState *interp = ts->interp; @@ -192,7 +193,7 @@ _PyContext_Enter(PyThreadState *ts, PyObject *octx) ts->context = Py_NewRef(ctx); ts->context_ver++; - notify_context_watchers(Py_CONTEXT_EVENT_ENTER, ctx, ts); + notify_context_watchers(ts, Py_CONTEXT_EVENT_ENTER, ctx); return 0; } @@ -226,7 +227,7 @@ _PyContext_Exit(PyThreadState *ts, PyObject *octx) return -1; } - notify_context_watchers(Py_CONTEXT_EVENT_EXIT, ctx, ts); + notify_context_watchers(ts, Py_CONTEXT_EVENT_EXIT, ctx); Py_SETREF(ts->context, (PyObject *)ctx->ctx_prev); ts->context_ver++; From 4a2282b0679bbf7b7fbd36aae1b1565145238961 Mon Sep 17 00:00:00 2001 From: Stephen Rosen Date: Sat, 12 Oct 2024 15:21:55 -0500 Subject: [PATCH 51/80] Prefer "similar" over "equivalent" in tutorial (#125343) In the datastructures tutorial doc, some operations are described as "equivalent to" others. This has led to some user-confusion -- at least in the Discourse forums -- about cases in which the operations differ. This change doesn't systematically eliminate the word "equivalent" from the tutorial. It just substitutes "similar to" in several cases in which "equivalent to" could mislead users into expecting exact equivalence. --- Doc/tutorial/datastructures.rst | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/Doc/tutorial/datastructures.rst b/Doc/tutorial/datastructures.rst index 73f17adeea72dec..31941bc112a1358 100644 --- a/Doc/tutorial/datastructures.rst +++ b/Doc/tutorial/datastructures.rst @@ -19,13 +19,13 @@ objects: .. method:: list.append(x) :noindex: - Add an item to the end of the list. Equivalent to ``a[len(a):] = [x]``. + Add an item to the end of the list. Similar to ``a[len(a):] = [x]``. .. method:: list.extend(iterable) :noindex: - Extend the list by appending all the items from the iterable. Equivalent to + Extend the list by appending all the items from the iterable. Similar to ``a[len(a):] = iterable``. @@ -56,7 +56,7 @@ objects: .. method:: list.clear() :noindex: - Remove all items from the list. Equivalent to ``del a[:]``. + Remove all items from the list. Similar to ``del a[:]``. .. method:: list.index(x[, start[, end]]) @@ -93,7 +93,7 @@ objects: .. method:: list.copy() :noindex: - Return a shallow copy of the list. Equivalent to ``a[:]``. + Return a shallow copy of the list. Similar to ``a[:]``. An example that uses most of the list methods:: From fa52b82c91a8e1a0971bd5fef656473ec93f41e3 Mon Sep 17 00:00:00 2001 From: "Ghorban M. Tavakoly" <58617996+galmyk@users.noreply.github.com> Date: Sun, 13 Oct 2024 00:08:13 +0330 Subject: [PATCH 52/80] gh-125289: Update sample code in asyncio-task.rst (GH-125292) * Update sample code in asyncio-task.rst This will change **coroutines** sample code in the **Awaitables** section and make the example clearer. * Update Doc/library/asyncio-task.rst Revert the added print Co-authored-by: Carol Willing * Update Doc/library/asyncio-task.rst Co-authored-by: Carol Willing --------- Co-authored-by: Carol Willing --- Doc/library/asyncio-task.rst | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Doc/library/asyncio-task.rst b/Doc/library/asyncio-task.rst index 4716a3f9c8ac79f..f27e858cf420f4b 100644 --- a/Doc/library/asyncio-task.rst +++ b/Doc/library/asyncio-task.rst @@ -158,7 +158,7 @@ other coroutines:: # Nothing happens if we just call "nested()". # A coroutine object is created but not awaited, # so it *won't run at all*. - nested() + nested() # will raise a "RuntimeWarning". # Let's do it differently now and await it: print(await nested()) # will print "42". From 330c527299a5380f39c658bfa9321706cabc445d Mon Sep 17 00:00:00 2001 From: Richard Hansen Date: Sat, 12 Oct 2024 16:57:27 -0400 Subject: [PATCH 53/80] gh-124872: Change PyContext_WatchCallback to take PyObject (#124737) The PyContext struct is not intended to be public, and users of the API don't need anything more specific than PyObject. Also see gh-78943. --- Doc/c-api/contextvars.rst | 2 +- Include/cpython/context.h | 2 +- Modules/_testcapi/watchers.c | 10 +++++----- Python/context.c | 6 +++--- 4 files changed, 10 insertions(+), 10 deletions(-) diff --git a/Doc/c-api/contextvars.rst b/Doc/c-api/contextvars.rst index 59e74ba1ac70226..8eba54a80dc80d4 100644 --- a/Doc/c-api/contextvars.rst +++ b/Doc/c-api/contextvars.rst @@ -136,7 +136,7 @@ Context object management functions: .. versionadded:: 3.14 -.. c:type:: int (*PyContext_WatchCallback)(PyContextEvent event, PyContext* ctx) +.. c:type:: int (*PyContext_WatchCallback)(PyContextEvent event, PyObject *obj) Context object watcher callback function. The object passed to the callback is event-specific; see :c:type:`PyContextEvent` for details. diff --git a/Include/cpython/context.h b/Include/cpython/context.h index d722b4d93134f70..3c9be7873b93990 100644 --- a/Include/cpython/context.h +++ b/Include/cpython/context.h @@ -52,7 +52,7 @@ typedef enum { * if the callback returns with an exception set, it must return -1. Otherwise * it should return 0 */ -typedef int (*PyContext_WatchCallback)(PyContextEvent, PyContext *); +typedef int (*PyContext_WatchCallback)(PyContextEvent, PyObject *); /* * Register a per-interpreter callback that will be invoked for context object diff --git a/Modules/_testcapi/watchers.c b/Modules/_testcapi/watchers.c index 689863d098ad8a5..b4233d07134aea8 100644 --- a/Modules/_testcapi/watchers.c +++ b/Modules/_testcapi/watchers.c @@ -630,7 +630,7 @@ static int num_context_object_enter_events[NUM_CONTEXT_WATCHERS] = {0, 0}; static int num_context_object_exit_events[NUM_CONTEXT_WATCHERS] = {0, 0}; static int -handle_context_watcher_event(int which_watcher, PyContextEvent event, PyContext *ctx) { +handle_context_watcher_event(int which_watcher, PyContextEvent event, PyObject *ctx) { if (event == Py_CONTEXT_EVENT_ENTER) { num_context_object_enter_events[which_watcher]++; } @@ -644,22 +644,22 @@ handle_context_watcher_event(int which_watcher, PyContextEvent event, PyContext } static int -first_context_watcher_callback(PyContextEvent event, PyContext *ctx) { +first_context_watcher_callback(PyContextEvent event, PyObject *ctx) { return handle_context_watcher_event(0, event, ctx); } static int -second_context_watcher_callback(PyContextEvent event, PyContext *ctx) { +second_context_watcher_callback(PyContextEvent event, PyObject *ctx) { return handle_context_watcher_event(1, event, ctx); } static int -noop_context_event_handler(PyContextEvent event, PyContext *ctx) { +noop_context_event_handler(PyContextEvent event, PyObject *ctx) { return 0; } static int -error_context_event_handler(PyContextEvent event, PyContext *ctx) { +error_context_event_handler(PyContextEvent event, PyObject *ctx) { PyErr_SetString(PyExc_RuntimeError, "boom!"); return -1; } diff --git a/Python/context.c b/Python/context.c index 9b742136b0726d8..8bc487a33c890bf 100644 --- a/Python/context.c +++ b/Python/context.c @@ -113,7 +113,7 @@ context_event_name(PyContextEvent event) { } static void -notify_context_watchers(PyThreadState *ts, PyContextEvent event, PyContext *ctx) +notify_context_watchers(PyThreadState *ts, PyContextEvent event, PyObject *ctx) { assert(Py_REFCNT(ctx) > 0); PyInterpreterState *interp = ts->interp; @@ -193,7 +193,7 @@ _PyContext_Enter(PyThreadState *ts, PyObject *octx) ts->context = Py_NewRef(ctx); ts->context_ver++; - notify_context_watchers(ts, Py_CONTEXT_EVENT_ENTER, ctx); + notify_context_watchers(ts, Py_CONTEXT_EVENT_ENTER, octx); return 0; } @@ -227,7 +227,7 @@ _PyContext_Exit(PyThreadState *ts, PyObject *octx) return -1; } - notify_context_watchers(ts, Py_CONTEXT_EVENT_EXIT, ctx); + notify_context_watchers(ts, Py_CONTEXT_EVENT_EXIT, octx); Py_SETREF(ts->context, (PyObject *)ctx->ctx_prev); ts->context_ver++; From a8fa4ad9e9f7aa0cba8b23af2c583d17bb1d1847 Mon Sep 17 00:00:00 2001 From: Ruoyu Zhong Date: Sun, 13 Oct 2024 08:53:28 +0800 Subject: [PATCH 54/80] Doc: Fix suggested usage of `-X gil=0` in the glossary (#125366) Currently, the "global interpreter lock" entry in the glossary mentions that `-X gil 0` can be used to disable the GIL [1]. However, this is invalid; the correct usage should be `-X gil=0`. $ python -X gil 0 -c 'print("Hello, world")' Fatal Python error: config_read_gil: PYTHON_GIL / -X gil must be "0" or "1" Python runtime state: preinitialized $ python -X gil=0 -c 'print("Hello, world")' Hello, world [1]: https://docs.python.org/3/using/cmdline.html#cmdoption-X Signed-off-by: Ruoyu Zhong --- Doc/glossary.rst | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Doc/glossary.rst b/Doc/glossary.rst index 1d407732eef5768..f67f3ecad0bc406 100644 --- a/Doc/glossary.rst +++ b/Doc/glossary.rst @@ -617,7 +617,7 @@ Glossary As of Python 3.13, the GIL can be disabled using the :option:`--disable-gil` build configuration. After building Python with this option, code must be - run with :option:`-X gil 0 <-X>` or after setting the :envvar:`PYTHON_GIL=0 ` + run with :option:`-X gil=0 <-X>` or after setting the :envvar:`PYTHON_GIL=0 ` environment variable. This feature enables improved performance for multi-threaded applications and makes it easier to use multi-core CPUs efficiently. For more details, see :pep:`703`. From 82bcaf15890cf85b76b4f62d2dd1710bb49c3ed1 Mon Sep 17 00:00:00 2001 From: Andrew Athan <24279435+aathan@users.noreply.github.com> Date: Sun, 13 Oct 2024 00:22:05 -0700 Subject: [PATCH 55/80] Trivial change: Update comments in activate about what running hash -r does (GH-125385) Update comments about what running hash -r does The old comment said "hash -r" forgets "past commands." However, the documentation for "hash" states that it forgets past locations. The old comment was, in my opinion, confusing. This is because it could be interpreted to mean it does something to the command history (HISTORY/HISTFILE etc) vs the cache of locations. --- Lib/venv/scripts/common/activate | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/Lib/venv/scripts/common/activate b/Lib/venv/scripts/common/activate index cbd4873f0122464..4593799b7e9b0ea 100644 --- a/Lib/venv/scripts/common/activate +++ b/Lib/venv/scripts/common/activate @@ -14,8 +14,9 @@ deactivate () { unset _OLD_VIRTUAL_PYTHONHOME fi - # Call hash to forget past commands. Without forgetting - # past commands the $PATH changes we made may not be respected + # Call hash to forget past locations. Without forgetting + # past locations the $PATH changes we made may not be respected. + # See "man bash" for more details. hash is usually a builtin of your shell hash -r 2> /dev/null if [ -n "${_OLD_VIRTUAL_PS1:-}" ] ; then From 4197a796ecf3a751ad7245b8d4f980d6d444b614 Mon Sep 17 00:00:00 2001 From: "Erlend E. Aasland" Date: Sun, 13 Oct 2024 09:38:47 +0200 Subject: [PATCH 56/80] gh-86673: Loosen test_ttk.test_identify() requirements (#125335) In aeca373b3 (PR gh-12011, issue gh-71500), test_identify() was changed to expect different results on Darwin. Ned's fix was later adjusted by e52f9bee8. This workaround is only needed for some variants of Tk/Tcl on macOS, so we now allow both the workaround and the generic results for these tests. --- Lib/test/test_ttk/test_widgets.py | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/Lib/test/test_ttk/test_widgets.py b/Lib/test/test_ttk/test_widgets.py index 88740b18864006b..10bec33be617a1d 100644 --- a/Lib/test/test_ttk/test_widgets.py +++ b/Lib/test/test_ttk/test_widgets.py @@ -336,7 +336,8 @@ class EntryTest(AbstractWidgetTest, unittest.TestCase): 'show', 'state', 'style', 'takefocus', 'textvariable', 'validate', 'validatecommand', 'width', 'xscrollcommand', ) - IDENTIFY_AS = 'Entry.field' if sys.platform == 'darwin' else 'textarea' + # bpo-27313: macOS Tk/Tcl may or may not report 'Entry.field'. + IDENTIFY_AS = {'Entry.field', 'textarea'} def setUp(self): super().setUp() @@ -373,8 +374,7 @@ def test_identify(self): self.entry.pack() self.entry.update() - # bpo-27313: macOS Cocoa widget differs from X, allow either - self.assertEqual(self.entry.identify(5, 5), self.IDENTIFY_AS) + self.assertIn(self.entry.identify(5, 5), self.IDENTIFY_AS) self.assertEqual(self.entry.identify(-1, -1), "") self.assertRaises(tkinter.TclError, self.entry.identify, None, 5) @@ -461,7 +461,7 @@ class ComboboxTest(EntryTest, unittest.TestCase): 'validate', 'validatecommand', 'values', 'width', 'xscrollcommand', ) - IDENTIFY_AS = 'Combobox.button' if sys.platform == 'darwin' else 'textarea' + IDENTIFY_AS = {'Combobox.button', 'textarea'} def setUp(self): super().setUp() @@ -1204,7 +1204,7 @@ class SpinboxTest(EntryTest, unittest.TestCase): 'takefocus', 'textvariable', 'to', 'validate', 'validatecommand', 'values', 'width', 'wrap', 'xscrollcommand', ) - IDENTIFY_AS = 'Spinbox.field' if sys.platform == 'darwin' else 'textarea' + IDENTIFY_AS = {'Spinbox.field', 'textarea'} def setUp(self): super().setUp() From ce740d46246b28bb675ba9d62214b59be9b8411e Mon Sep 17 00:00:00 2001 From: "Erlend E. Aasland" Date: Sun, 13 Oct 2024 10:22:31 +0200 Subject: [PATCH 57/80] gh-61698: Use launchctl to detect macOS window manager in tests (#118390) --- Lib/test/support/__init__.py | 24 +++++++++--------------- 1 file changed, 9 insertions(+), 15 deletions(-) diff --git a/Lib/test/support/__init__.py b/Lib/test/support/__init__.py index 72ce5dacd1be4ce..d768bead7120c72 100644 --- a/Lib/test/support/__init__.py +++ b/Lib/test/support/__init__.py @@ -253,22 +253,16 @@ class USEROBJECTFLAGS(ctypes.Structure): # process not running under the same user id as the current console # user. To avoid that, raise an exception if the window manager # connection is not available. - from ctypes import cdll, c_int, pointer, Structure - from ctypes.util import find_library - - app_services = cdll.LoadLibrary(find_library("ApplicationServices")) - - if app_services.CGMainDisplayID() == 0: - reason = "gui tests cannot run without OS X window manager" + import subprocess + try: + rc = subprocess.run(["launchctl", "managername"], + capture_output=True, check=True) + managername = rc.stdout.decode("utf-8").strip() + except subprocess.CalledProcessError: + reason = "unable to detect macOS launchd job manager" else: - class ProcessSerialNumber(Structure): - _fields_ = [("highLongOfPSN", c_int), - ("lowLongOfPSN", c_int)] - psn = ProcessSerialNumber() - psn_p = pointer(psn) - if ( (app_services.GetCurrentProcess(psn_p) < 0) or - (app_services.SetFrontProcess(psn_p) < 0) ): - reason = "cannot run without OS X gui process" + if managername != "Aqua": + reason = f"{managername=} -- can only run in a macOS GUI session" # check on every platform whether tkinter can actually do anything if not reason: From 283ea5f3b2b6a18605b8598a979afe263b0f21ce Mon Sep 17 00:00:00 2001 From: Wulian Date: Sun, 13 Oct 2024 19:10:59 +0800 Subject: [PATCH 58/80] gh-125383: Update `fib` function comment for accuracy (#125386) `Doc/tutorial/controlflow.rst`: fix comment for `fib` function --- Doc/tutorial/controlflow.rst | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/Doc/tutorial/controlflow.rst b/Doc/tutorial/controlflow.rst index 9b73ac475c78d5d..b830ce94ba4f475 100644 --- a/Doc/tutorial/controlflow.rst +++ b/Doc/tutorial/controlflow.rst @@ -461,8 +461,8 @@ Defining Functions We can create a function that writes the Fibonacci series to an arbitrary boundary:: - >>> def fib(n): # write Fibonacci series up to n - ... """Print a Fibonacci series up to n.""" + >>> def fib(n): # write Fibonacci series less than n + ... """Print a Fibonacci series less than n.""" ... a, b = 0, 1 ... while a < n: ... print(a, end=' ') From 022c50d190e14affb952a244c4eb6e4a644ad0c9 Mon Sep 17 00:00:00 2001 From: Kumar Aditya Date: Sun, 13 Oct 2024 20:59:07 +0530 Subject: [PATCH 59/80] fix comment in _PyMutex_TryUnlock (#125319) --- Include/internal/pycore_lock.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/Include/internal/pycore_lock.h b/Include/internal/pycore_lock.h index e6da083b807ce5b..cd7deda00c7beec 100644 --- a/Include/internal/pycore_lock.h +++ b/Include/internal/pycore_lock.h @@ -64,8 +64,8 @@ PyMutex_LockFlags(PyMutex *m, _PyLockFlags flags) } } -// Unlock a mutex, returns 0 if the mutex is not locked (used for improved -// error messages). +// Unlock a mutex, returns -1 if the mutex is not locked (used for improved +// error messages) otherwise returns 0. extern int _PyMutex_TryUnlock(PyMutex *m); From 08489325d1cd94eba97c5f5f8cac49521fd0b0d7 Mon Sep 17 00:00:00 2001 From: Kumar Aditya Date: Sun, 13 Oct 2024 20:59:41 +0530 Subject: [PATCH 60/80] gh-125161: return non zero value in pthread_self on wasi (#125303) --- Python/thread_pthread_stubs.h | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/Python/thread_pthread_stubs.h b/Python/thread_pthread_stubs.h index 4741e594e52e656..458f8fc5951720f 100644 --- a/Python/thread_pthread_stubs.h +++ b/Python/thread_pthread_stubs.h @@ -1,5 +1,9 @@ #include "cpython/pthread_stubs.h" +typedef struct py_stub_tls_entry py_tls_entry; + +#define py_tls_entries (_PyRuntime.threads.stubs.tls_entries) + // mutex int pthread_mutex_init(pthread_mutex_t *restrict mutex, @@ -105,7 +109,7 @@ pthread_join(pthread_t thread, void** value_ptr) PyAPI_FUNC(pthread_t) pthread_self(void) { - return 0; + return (pthread_t)(uintptr_t)&py_tls_entries; } int @@ -134,10 +138,6 @@ pthread_attr_destroy(pthread_attr_t *attr) } -typedef struct py_stub_tls_entry py_tls_entry; - -#define py_tls_entries (_PyRuntime.threads.stubs.tls_entries) - int pthread_key_create(pthread_key_t *key, void (*destr_function)(void *)) { From cd0f9d111a040ad863c680e9f464419640c8c3fd Mon Sep 17 00:00:00 2001 From: Kumar Aditya Date: Sun, 13 Oct 2024 21:05:05 +0530 Subject: [PATCH 61/80] gh-89967: make WeakKeyDictionary and WeakValueDictionary thread safe (#125325) Make `WeakKeyDictionary` and `WeakValueDictionary` thread safe by copying the underlying the dict before iterating over it. --- Lib/_weakrefset.py | 25 --- Lib/weakref.py | 198 +++++------------- ...4-10-11-16-19-46.gh-issue-89967.vhWUOR.rst | 1 + 3 files changed, 50 insertions(+), 174 deletions(-) create mode 100644 Misc/NEWS.d/next/Library/2024-10-11-16-19-46.gh-issue-89967.vhWUOR.rst diff --git a/Lib/_weakrefset.py b/Lib/_weakrefset.py index 2071755d71dfc84..d1c7fcaeec9821c 100644 --- a/Lib/_weakrefset.py +++ b/Lib/_weakrefset.py @@ -8,31 +8,6 @@ __all__ = ['WeakSet'] -class _IterationGuard: - # This context manager registers itself in the current iterators of the - # weak container, such as to delay all removals until the context manager - # exits. - # This technique should be relatively thread-safe (since sets are). - - def __init__(self, weakcontainer): - # Don't create cycles - self.weakcontainer = ref(weakcontainer) - - def __enter__(self): - w = self.weakcontainer() - if w is not None: - w._iterating.add(self) - return self - - def __exit__(self, e, t, b): - w = self.weakcontainer() - if w is not None: - s = w._iterating - s.remove(self) - if not s: - w._commit_removals() - - class WeakSet: def __init__(self, data=None): self.data = set() diff --git a/Lib/weakref.py b/Lib/weakref.py index 25b70927e29c31d..94e4278143c9878 100644 --- a/Lib/weakref.py +++ b/Lib/weakref.py @@ -19,7 +19,7 @@ ReferenceType, _remove_dead_weakref) -from _weakrefset import WeakSet, _IterationGuard +from _weakrefset import WeakSet import _collections_abc # Import after _weakref to avoid circular import. import sys @@ -105,34 +105,14 @@ def __init__(self, other=(), /, **kw): def remove(wr, selfref=ref(self), _atomic_removal=_remove_dead_weakref): self = selfref() if self is not None: - if self._iterating: - self._pending_removals.append(wr.key) - else: - # Atomic removal is necessary since this function - # can be called asynchronously by the GC - _atomic_removal(self.data, wr.key) + # Atomic removal is necessary since this function + # can be called asynchronously by the GC + _atomic_removal(self.data, wr.key) self._remove = remove - # A list of keys to be removed - self._pending_removals = [] - self._iterating = set() self.data = {} self.update(other, **kw) - def _commit_removals(self, _atomic_removal=_remove_dead_weakref): - pop = self._pending_removals.pop - d = self.data - # We shouldn't encounter any KeyError, because this method should - # always be called *before* mutating the dict. - while True: - try: - key = pop() - except IndexError: - return - _atomic_removal(d, key) - def __getitem__(self, key): - if self._pending_removals: - self._commit_removals() o = self.data[key]() if o is None: raise KeyError(key) @@ -140,18 +120,12 @@ def __getitem__(self, key): return o def __delitem__(self, key): - if self._pending_removals: - self._commit_removals() del self.data[key] def __len__(self): - if self._pending_removals: - self._commit_removals() return len(self.data) def __contains__(self, key): - if self._pending_removals: - self._commit_removals() try: o = self.data[key]() except KeyError: @@ -162,38 +136,28 @@ def __repr__(self): return "<%s at %#x>" % (self.__class__.__name__, id(self)) def __setitem__(self, key, value): - if self._pending_removals: - self._commit_removals() self.data[key] = KeyedRef(value, self._remove, key) def copy(self): - if self._pending_removals: - self._commit_removals() new = WeakValueDictionary() - with _IterationGuard(self): - for key, wr in self.data.items(): - o = wr() - if o is not None: - new[key] = o + for key, wr in self.data.copy().items(): + o = wr() + if o is not None: + new[key] = o return new __copy__ = copy def __deepcopy__(self, memo): from copy import deepcopy - if self._pending_removals: - self._commit_removals() new = self.__class__() - with _IterationGuard(self): - for key, wr in self.data.items(): - o = wr() - if o is not None: - new[deepcopy(key, memo)] = o + for key, wr in self.data.copy().items(): + o = wr() + if o is not None: + new[deepcopy(key, memo)] = o return new def get(self, key, default=None): - if self._pending_removals: - self._commit_removals() try: wr = self.data[key] except KeyError: @@ -207,21 +171,15 @@ def get(self, key, default=None): return o def items(self): - if self._pending_removals: - self._commit_removals() - with _IterationGuard(self): - for k, wr in self.data.items(): - v = wr() - if v is not None: - yield k, v + for k, wr in self.data.copy().items(): + v = wr() + if v is not None: + yield k, v def keys(self): - if self._pending_removals: - self._commit_removals() - with _IterationGuard(self): - for k, wr in self.data.items(): - if wr() is not None: - yield k + for k, wr in self.data.copy().items(): + if wr() is not None: + yield k __iter__ = keys @@ -235,23 +193,15 @@ def itervaluerefs(self): keep the values around longer than needed. """ - if self._pending_removals: - self._commit_removals() - with _IterationGuard(self): - yield from self.data.values() + yield from self.data.copy().values() def values(self): - if self._pending_removals: - self._commit_removals() - with _IterationGuard(self): - for wr in self.data.values(): - obj = wr() - if obj is not None: - yield obj + for wr in self.data.copy().values(): + obj = wr() + if obj is not None: + yield obj def popitem(self): - if self._pending_removals: - self._commit_removals() while True: key, wr = self.data.popitem() o = wr() @@ -259,8 +209,6 @@ def popitem(self): return key, o def pop(self, key, *args): - if self._pending_removals: - self._commit_removals() try: o = self.data.pop(key)() except KeyError: @@ -279,16 +227,12 @@ def setdefault(self, key, default=None): except KeyError: o = None if o is None: - if self._pending_removals: - self._commit_removals() self.data[key] = KeyedRef(default, self._remove, key) return default else: return o def update(self, other=None, /, **kwargs): - if self._pending_removals: - self._commit_removals() d = self.data if other is not None: if not hasattr(other, "items"): @@ -308,9 +252,7 @@ def valuerefs(self): keep the values around longer than needed. """ - if self._pending_removals: - self._commit_removals() - return list(self.data.values()) + return list(self.data.copy().values()) def __ior__(self, other): self.update(other) @@ -369,57 +311,22 @@ def __init__(self, dict=None): def remove(k, selfref=ref(self)): self = selfref() if self is not None: - if self._iterating: - self._pending_removals.append(k) - else: - try: - del self.data[k] - except KeyError: - pass + try: + del self.data[k] + except KeyError: + pass self._remove = remove - # A list of dead weakrefs (keys to be removed) - self._pending_removals = [] - self._iterating = set() - self._dirty_len = False if dict is not None: self.update(dict) - def _commit_removals(self): - # NOTE: We don't need to call this method before mutating the dict, - # because a dead weakref never compares equal to a live weakref, - # even if they happened to refer to equal objects. - # However, it means keys may already have been removed. - pop = self._pending_removals.pop - d = self.data - while True: - try: - key = pop() - except IndexError: - return - - try: - del d[key] - except KeyError: - pass - - def _scrub_removals(self): - d = self.data - self._pending_removals = [k for k in self._pending_removals if k in d] - self._dirty_len = False - def __delitem__(self, key): - self._dirty_len = True del self.data[ref(key)] def __getitem__(self, key): return self.data[ref(key)] def __len__(self): - if self._dirty_len and self._pending_removals: - # self._pending_removals may still contain keys which were - # explicitly removed, we have to scrub them (see issue #21173). - self._scrub_removals() - return len(self.data) - len(self._pending_removals) + return len(self.data) def __repr__(self): return "<%s at %#x>" % (self.__class__.__name__, id(self)) @@ -429,11 +336,10 @@ def __setitem__(self, key, value): def copy(self): new = WeakKeyDictionary() - with _IterationGuard(self): - for key, value in self.data.items(): - o = key() - if o is not None: - new[o] = value + for key, value in self.data.copy().items(): + o = key() + if o is not None: + new[o] = value return new __copy__ = copy @@ -441,11 +347,10 @@ def copy(self): def __deepcopy__(self, memo): from copy import deepcopy new = self.__class__() - with _IterationGuard(self): - for key, value in self.data.items(): - o = key() - if o is not None: - new[o] = deepcopy(value, memo) + for key, value in self.data.copy().items(): + o = key() + if o is not None: + new[o] = deepcopy(value, memo) return new def get(self, key, default=None): @@ -459,26 +364,23 @@ def __contains__(self, key): return wr in self.data def items(self): - with _IterationGuard(self): - for wr, value in self.data.items(): - key = wr() - if key is not None: - yield key, value + for wr, value in self.data.copy().items(): + key = wr() + if key is not None: + yield key, value def keys(self): - with _IterationGuard(self): - for wr in self.data: - obj = wr() - if obj is not None: - yield obj + for wr in self.data.copy(): + obj = wr() + if obj is not None: + yield obj __iter__ = keys def values(self): - with _IterationGuard(self): - for wr, value in self.data.items(): - if wr() is not None: - yield value + for wr, value in self.data.copy().items(): + if wr() is not None: + yield value def keyrefs(self): """Return a list of weak references to the keys. @@ -493,7 +395,6 @@ def keyrefs(self): return list(self.data) def popitem(self): - self._dirty_len = True while True: key, value = self.data.popitem() o = key() @@ -501,7 +402,6 @@ def popitem(self): return o, value def pop(self, key, *args): - self._dirty_len = True return self.data.pop(ref(key), *args) def setdefault(self, key, default=None): diff --git a/Misc/NEWS.d/next/Library/2024-10-11-16-19-46.gh-issue-89967.vhWUOR.rst b/Misc/NEWS.d/next/Library/2024-10-11-16-19-46.gh-issue-89967.vhWUOR.rst new file mode 100644 index 000000000000000..d0860457c8e813b --- /dev/null +++ b/Misc/NEWS.d/next/Library/2024-10-11-16-19-46.gh-issue-89967.vhWUOR.rst @@ -0,0 +1 @@ +Make :class:`~weakref.WeakKeyDictionary` and :class:`~weakref.WeakValueDictionary` safe against concurrent mutations from other threads. Patch by Kumar Aditya. From 6c386b703d19aaec9a34fd1e843a4d0a144ad14b Mon Sep 17 00:00:00 2001 From: partev Date: Sun, 13 Oct 2024 12:46:15 -0400 Subject: [PATCH 62/80] gh-125403: fix console formatting in Chapter 12 of the tutorial (#125404) --- Doc/tutorial/venv.rst | 16 ++++++++-------- 1 file changed, 8 insertions(+), 8 deletions(-) diff --git a/Doc/tutorial/venv.rst b/Doc/tutorial/venv.rst index 91e4ce18acef1dd..f362e1943b666f7 100644 --- a/Doc/tutorial/venv.rst +++ b/Doc/tutorial/venv.rst @@ -76,7 +76,7 @@ virtual environment you're using, and modify the environment so that running ``python`` will get you that particular version and installation of Python. For example: -.. code-block:: bash +.. code-block:: console $ source ~/envs/tutorial-env/bin/activate (tutorial-env) $ python @@ -108,7 +108,7 @@ complete documentation for ``pip``.) You can install the latest version of a package by specifying a package's name: -.. code-block:: bash +.. code-block:: console (tutorial-env) $ python -m pip install novas Collecting novas @@ -120,7 +120,7 @@ You can install the latest version of a package by specifying a package's name: You can also install a specific version of a package by giving the package name followed by ``==`` and the version number: -.. code-block:: bash +.. code-block:: console (tutorial-env) $ python -m pip install requests==2.6.0 Collecting requests==2.6.0 @@ -133,7 +133,7 @@ version is already installed and do nothing. You can supply a different version number to get that version, or you can run ``python -m pip install --upgrade`` to upgrade the package to the latest version: -.. code-block:: bash +.. code-block:: console (tutorial-env) $ python -m pip install --upgrade requests Collecting requests @@ -148,7 +148,7 @@ remove the packages from the virtual environment. ``python -m pip show`` will display information about a particular package: -.. code-block:: bash +.. code-block:: console (tutorial-env) $ python -m pip show requests --- @@ -166,7 +166,7 @@ remove the packages from the virtual environment. ``python -m pip list`` will display all of the packages installed in the virtual environment: -.. code-block:: bash +.. code-block:: console (tutorial-env) $ python -m pip list novas (3.1.1.3) @@ -179,7 +179,7 @@ the virtual environment: but the output uses the format that ``python -m pip install`` expects. A common convention is to put this list in a ``requirements.txt`` file: -.. code-block:: bash +.. code-block:: console (tutorial-env) $ python -m pip freeze > requirements.txt (tutorial-env) $ cat requirements.txt @@ -191,7 +191,7 @@ The ``requirements.txt`` can then be committed to version control and shipped as part of an application. Users can then install all the necessary packages with ``install -r``: -.. code-block:: bash +.. code-block:: console (tutorial-env) $ python -m pip install -r requirements.txt Collecting novas==3.1.1.3 (from -r requirements.txt (line 1)) From e79bbd147fd58e825572f1aa93c5398953289fb2 Mon Sep 17 00:00:00 2001 From: Kumar Aditya Date: Sun, 13 Oct 2024 22:29:27 +0530 Subject: [PATCH 63/80] add Kumar Aditya as codeowner for weakref (#125405) --- .github/CODEOWNERS | 2 ++ 1 file changed, 2 insertions(+) diff --git a/.github/CODEOWNERS b/.github/CODEOWNERS index 7e9c3caf23f079f..221008717b29b1c 100644 --- a/.github/CODEOWNERS +++ b/.github/CODEOWNERS @@ -280,3 +280,5 @@ Lib/test/test_configparser.py @jaraco # Doc sections Doc/reference/ @willingc + +**/*weakref* @kumaraditya303 \ No newline at end of file From c6d7b644c2425b397cfb641f336bea70eb8a329a Mon Sep 17 00:00:00 2001 From: Sergey B Kirpichev Date: Sun, 13 Oct 2024 20:38:42 +0300 Subject: [PATCH 64/80] gh-101291: Add versionadded directives for PyUnstable_Long_* (#125384) --- Doc/c-api/long.rst | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/Doc/c-api/long.rst b/Doc/c-api/long.rst index e0ae0f77a01db97..02ef8aa78464681 100644 --- a/Doc/c-api/long.rst +++ b/Doc/c-api/long.rst @@ -608,6 +608,9 @@ distinguished from a number. Use :c:func:`PyErr_Occurred` to disambiguate. Exactly what values are considered compact is an implementation detail and is subject to change. + .. versionadded:: 3.12 + + .. c:function:: Py_ssize_t PyUnstable_Long_CompactValue(const PyLongObject* op) If *op* is compact, as determined by :c:func:`PyUnstable_Long_IsCompact`, @@ -615,3 +618,5 @@ distinguished from a number. Use :c:func:`PyErr_Occurred` to disambiguate. Otherwise, the return value is undefined. + .. versionadded:: 3.12 + From cb8e5995d89d9b90e83cf43310ec50e177484e70 Mon Sep 17 00:00:00 2001 From: Barney Gale Date: Sun, 13 Oct 2024 18:46:10 +0100 Subject: [PATCH 65/80] GH-125069: Fix inconsistent joining in `WindowsPath(PosixPath(...))` (#125156) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit `PurePath.__init__()` incorrectly uses the `_raw_paths` of a given `PurePath` object with a different flavour, even though the procedure to join path segments can differ between flavours. This change makes the `_raw_paths`-enabled deferred joining apply _only_ when the path flavours match. Co-authored-by: Bénédikt Tran <10796600+picnixz@users.noreply.github.com> --- Lib/pathlib/_local.py | 4 ++-- Lib/test/test_pathlib/test_pathlib.py | 9 +++++++++ .../2024-10-08-21-17-16.gh-issue-125069.0RP0Mx.rst | 4 ++++ 3 files changed, 15 insertions(+), 2 deletions(-) create mode 100644 Misc/NEWS.d/next/Library/2024-10-08-21-17-16.gh-issue-125069.0RP0Mx.rst diff --git a/Lib/pathlib/_local.py b/Lib/pathlib/_local.py index 1c02e4168d3a9e7..a78997179820b1b 100644 --- a/Lib/pathlib/_local.py +++ b/Lib/pathlib/_local.py @@ -119,9 +119,9 @@ def __init__(self, *args): paths = [] for arg in args: if isinstance(arg, PurePath): - if arg.parser is ntpath and self.parser is posixpath: + if arg.parser is not self.parser: # GH-103631: Convert separators for backwards compatibility. - paths.extend(path.replace('\\', '/') for path in arg._raw_paths) + paths.append(arg.as_posix()) else: paths.extend(arg._raw_paths) else: diff --git a/Lib/test/test_pathlib/test_pathlib.py b/Lib/test/test_pathlib/test_pathlib.py index b47b4a194cfaa91..c7104bfda90f6cb 100644 --- a/Lib/test/test_pathlib/test_pathlib.py +++ b/Lib/test/test_pathlib/test_pathlib.py @@ -131,6 +131,15 @@ def test_constructor_nested(self): self.assertEqual(P(P('a'), P('b'), P('c')), P(FakePath("a/b/c"))) self.assertEqual(P(P('./a:b')), P('./a:b')) + @needs_windows + def test_constructor_nested_foreign_flavour(self): + # See GH-125069. + p1 = pathlib.PurePosixPath('b/c:\\d') + p2 = pathlib.PurePosixPath('b/', 'c:\\d') + self.assertEqual(p1, p2) + self.assertEqual(self.cls(p1), self.cls('b/c:/d')) + self.assertEqual(self.cls(p2), self.cls('b/c:/d')) + def _check_parse_path(self, raw_path, *expected): sep = self.parser.sep actual = self.cls._parse_path(raw_path.replace('/', sep)) diff --git a/Misc/NEWS.d/next/Library/2024-10-08-21-17-16.gh-issue-125069.0RP0Mx.rst b/Misc/NEWS.d/next/Library/2024-10-08-21-17-16.gh-issue-125069.0RP0Mx.rst new file mode 100644 index 000000000000000..9f1fd871e1d0b50 --- /dev/null +++ b/Misc/NEWS.d/next/Library/2024-10-08-21-17-16.gh-issue-125069.0RP0Mx.rst @@ -0,0 +1,4 @@ +Fix an issue where providing a :class:`pathlib.PurePath` object as an +initializer argument to a second :class:`~pathlib.PurePath` object with a +different :attr:`~pathlib.PurePath.parser` resulted in arguments to the +former object's initializer being joined by the latter object's parser. From f1d33dbddd3496b062e1fbe024fb6d7b023a35f5 Mon Sep 17 00:00:00 2001 From: Sam Gross Date: Sun, 13 Oct 2024 16:17:51 -0400 Subject: [PATCH 66/80] gh-125243: Fix ZoneInfo data race in free threading build (#125281) Lock `ZoneInfoType` to protect accesses to `ZONEINFO_STRONG_CACHE`. Refactor the `tp_new` handler to use Argument Clinic so that we can just use `@critical_section` annotations on the relevant functions. Also use `PyDict_SetDefaultRef` instead of `PyDict_SetDefault` when inserting into the `TIMEDELTA_CACHE`. --- ...-10-10-20-39-57.gh-issue-125243.eUbbtu.rst | 2 + Modules/_zoneinfo.c | 44 +++++++------ Modules/clinic/_zoneinfo.c.h | 61 ++++++++++++++++++- 3 files changed, 87 insertions(+), 20 deletions(-) create mode 100644 Misc/NEWS.d/next/Library/2024-10-10-20-39-57.gh-issue-125243.eUbbtu.rst diff --git a/Misc/NEWS.d/next/Library/2024-10-10-20-39-57.gh-issue-125243.eUbbtu.rst b/Misc/NEWS.d/next/Library/2024-10-10-20-39-57.gh-issue-125243.eUbbtu.rst new file mode 100644 index 000000000000000..49f84d9711819f6 --- /dev/null +++ b/Misc/NEWS.d/next/Library/2024-10-10-20-39-57.gh-issue-125243.eUbbtu.rst @@ -0,0 +1,2 @@ +Fix data race when creating :class:`zoneinfo.ZoneInfo` objects in the free +threading build. diff --git a/Modules/_zoneinfo.c b/Modules/_zoneinfo.c index 902ece795b575b4..c5292575c22f233 100644 --- a/Modules/_zoneinfo.c +++ b/Modules/_zoneinfo.c @@ -3,6 +3,7 @@ #endif #include "Python.h" +#include "pycore_critical_section.h" // _Py_CRITICAL_SECTION_ASSERT_OBJECT_LOCKED() #include "pycore_long.h" // _PyLong_GetOne() #include "pycore_pyerrors.h" // _PyErr_ChainExceptions1() @@ -298,15 +299,20 @@ get_weak_cache(zoneinfo_state *state, PyTypeObject *type) } } +/*[clinic input] +@critical_section +@classmethod +zoneinfo.ZoneInfo.__new__ + + key: object + +Create a new ZoneInfo instance. +[clinic start generated code]*/ + static PyObject * -zoneinfo_new(PyTypeObject *type, PyObject *args, PyObject *kw) +zoneinfo_ZoneInfo_impl(PyTypeObject *type, PyObject *key) +/*[clinic end generated code: output=95e61dab86bb95c3 input=ef73d7a83bf8790e]*/ { - PyObject *key = NULL; - static char *kwlist[] = {"key", NULL}; - if (PyArg_ParseTupleAndKeywords(args, kw, "O", kwlist, &key) == 0) { - return NULL; - } - zoneinfo_state *state = zoneinfo_get_state_by_self(type); PyObject *instance = zone_from_strong_cache(state, type, key); if (instance != NULL || PyErr_Occurred()) { @@ -467,6 +473,7 @@ zoneinfo_ZoneInfo_no_cache_impl(PyTypeObject *type, PyTypeObject *cls, } /*[clinic input] +@critical_section @classmethod zoneinfo.ZoneInfo.clear_cache @@ -481,7 +488,7 @@ Clear the ZoneInfo cache. static PyObject * zoneinfo_ZoneInfo_clear_cache_impl(PyTypeObject *type, PyTypeObject *cls, PyObject *only_keys) -/*[clinic end generated code: output=114d9b7c8a22e660 input=e32ca3bb396788ba]*/ +/*[clinic end generated code: output=114d9b7c8a22e660 input=35944715df26d24e]*/ { zoneinfo_state *state = zoneinfo_get_state_by_cls(cls); PyObject *weak_cache = get_weak_cache(state, type); @@ -816,14 +823,10 @@ zoneinfo_ZoneInfo__unpickle_impl(PyTypeObject *type, PyTypeObject *cls, /*[clinic end generated code: output=556712fc709deecb input=6ac8c73eed3de316]*/ { if (from_cache) { - PyObject *val_args = PyTuple_Pack(1, key); - if (val_args == NULL) { - return NULL; - } - - PyObject *rv = zoneinfo_new(type, val_args, NULL); - - Py_DECREF(val_args); + PyObject *rv; + Py_BEGIN_CRITICAL_SECTION(type); + rv = zoneinfo_ZoneInfo_impl(type, key); + Py_END_CRITICAL_SECTION(); return rv; } else { @@ -858,8 +861,7 @@ load_timedelta(zoneinfo_state *state, long seconds) 0, seconds, 0, 1, PyDateTimeAPI->DeltaType); if (tmp != NULL) { - rv = PyDict_SetDefault(state->TIMEDELTA_CACHE, pyoffset, tmp); - Py_XINCREF(rv); + PyDict_SetDefaultRef(state->TIMEDELTA_CACHE, pyoffset, tmp, &rv); Py_DECREF(tmp); } } @@ -2368,6 +2370,7 @@ strong_cache_free(StrongCacheNode *root) static void remove_from_strong_cache(zoneinfo_state *state, StrongCacheNode *node) { + _Py_CRITICAL_SECTION_ASSERT_OBJECT_LOCKED(state->ZoneInfoType); if (state->ZONEINFO_STRONG_CACHE == node) { state->ZONEINFO_STRONG_CACHE = node->next; } @@ -2422,6 +2425,7 @@ eject_from_strong_cache(zoneinfo_state *state, const PyTypeObject *const type, return 0; } + _Py_CRITICAL_SECTION_ASSERT_OBJECT_LOCKED(state->ZoneInfoType); StrongCacheNode *cache = state->ZONEINFO_STRONG_CACHE; StrongCacheNode *node = find_in_strong_cache(cache, key); if (node != NULL) { @@ -2478,6 +2482,7 @@ zone_from_strong_cache(zoneinfo_state *state, const PyTypeObject *const type, return NULL; // Strong cache currently only implemented for base class } + _Py_CRITICAL_SECTION_ASSERT_OBJECT_LOCKED(state->ZoneInfoType); StrongCacheNode *cache = state->ZONEINFO_STRONG_CACHE; StrongCacheNode *node = find_in_strong_cache(cache, key); @@ -2504,6 +2509,7 @@ update_strong_cache(zoneinfo_state *state, const PyTypeObject *const type, return; } + _Py_CRITICAL_SECTION_ASSERT_OBJECT_LOCKED(state->ZoneInfoType); StrongCacheNode *new_node = strong_cache_node_new(key, zone); if (new_node == NULL) { return; @@ -2631,7 +2637,7 @@ static PyType_Slot zoneinfo_slots[] = { {Py_tp_getattro, PyObject_GenericGetAttr}, {Py_tp_methods, zoneinfo_methods}, {Py_tp_members, zoneinfo_members}, - {Py_tp_new, zoneinfo_new}, + {Py_tp_new, zoneinfo_ZoneInfo}, {Py_tp_dealloc, zoneinfo_dealloc}, {Py_tp_traverse, zoneinfo_traverse}, {Py_tp_clear, zoneinfo_clear}, diff --git a/Modules/clinic/_zoneinfo.c.h b/Modules/clinic/_zoneinfo.c.h index 9905b6425e2f799..bde88b5c4fa65bf 100644 --- a/Modules/clinic/_zoneinfo.c.h +++ b/Modules/clinic/_zoneinfo.c.h @@ -6,8 +6,65 @@ preserve # include "pycore_gc.h" // PyGC_Head # include "pycore_runtime.h" // _Py_ID() #endif +#include "pycore_critical_section.h"// Py_BEGIN_CRITICAL_SECTION() #include "pycore_modsupport.h" // _PyArg_UnpackKeywords() +PyDoc_STRVAR(zoneinfo_ZoneInfo__doc__, +"ZoneInfo(key)\n" +"--\n" +"\n" +"Create a new ZoneInfo instance."); + +static PyObject * +zoneinfo_ZoneInfo_impl(PyTypeObject *type, PyObject *key); + +static PyObject * +zoneinfo_ZoneInfo(PyTypeObject *type, PyObject *args, PyObject *kwargs) +{ + PyObject *return_value = NULL; + #if defined(Py_BUILD_CORE) && !defined(Py_BUILD_CORE_MODULE) + + #define NUM_KEYWORDS 1 + static struct { + PyGC_Head _this_is_not_used; + PyObject_VAR_HEAD + PyObject *ob_item[NUM_KEYWORDS]; + } _kwtuple = { + .ob_base = PyVarObject_HEAD_INIT(&PyTuple_Type, NUM_KEYWORDS) + .ob_item = { &_Py_ID(key), }, + }; + #undef NUM_KEYWORDS + #define KWTUPLE (&_kwtuple.ob_base.ob_base) + + #else // !Py_BUILD_CORE + # define KWTUPLE NULL + #endif // !Py_BUILD_CORE + + static const char * const _keywords[] = {"key", NULL}; + static _PyArg_Parser _parser = { + .keywords = _keywords, + .fname = "ZoneInfo", + .kwtuple = KWTUPLE, + }; + #undef KWTUPLE + PyObject *argsbuf[1]; + PyObject * const *fastargs; + Py_ssize_t nargs = PyTuple_GET_SIZE(args); + PyObject *key; + + fastargs = _PyArg_UnpackKeywords(_PyTuple_CAST(args)->ob_item, nargs, kwargs, NULL, &_parser, 1, 1, 0, argsbuf); + if (!fastargs) { + goto exit; + } + key = fastargs[0]; + Py_BEGIN_CRITICAL_SECTION(type); + return_value = zoneinfo_ZoneInfo_impl(type, key); + Py_END_CRITICAL_SECTION(); + +exit: + return return_value; +} + PyDoc_STRVAR(zoneinfo_ZoneInfo_from_file__doc__, "from_file($type, file_obj, /, key=None)\n" "--\n" @@ -182,7 +239,9 @@ zoneinfo_ZoneInfo_clear_cache(PyTypeObject *type, PyTypeObject *cls, PyObject *c } only_keys = args[0]; skip_optional_kwonly: + Py_BEGIN_CRITICAL_SECTION(type); return_value = zoneinfo_ZoneInfo_clear_cache_impl(type, cls, only_keys); + Py_END_CRITICAL_SECTION(); exit: return return_value; @@ -372,4 +431,4 @@ zoneinfo_ZoneInfo__unpickle(PyTypeObject *type, PyTypeObject *cls, PyObject *con exit: return return_value; } -/*[clinic end generated code: output=2a15f32fdd2ab6cd input=a9049054013a1b77]*/ +/*[clinic end generated code: output=b4fdc0b30247110a input=a9049054013a1b77]*/ From cfc27bc50fe165330f2295f9ac0ad56ca5b0f31c Mon Sep 17 00:00:00 2001 From: Sergey B Kirpichev Date: Mon, 14 Oct 2024 07:44:48 +0300 Subject: [PATCH 67/80] gh-123133: clarify p=0 case for "f" and "e" formatting types (GH-125426) Co-authored-by: Serhiy Storchaka --- Doc/library/string.rst | 10 ++++------ 1 file changed, 4 insertions(+), 6 deletions(-) diff --git a/Doc/library/string.rst b/Doc/library/string.rst index 57a1f9205230353..49aeb28d57c8d17 100644 --- a/Doc/library/string.rst +++ b/Doc/library/string.rst @@ -509,9 +509,8 @@ The available presentation types for :class:`float` and | | significant digits. With no precision given, uses a | | | precision of ``6`` digits after the decimal point for | | | :class:`float`, and shows all coefficient digits | - | | for :class:`~decimal.Decimal`. If no digits follow the | - | | decimal point, the decimal point is also removed unless | - | | the ``#`` option is used. | + | | for :class:`~decimal.Decimal`. If ``p=0``, the decimal | + | | point is omitted unless the ``#`` option is used. | +---------+----------------------------------------------------------+ | ``'E'`` | Scientific notation. Same as ``'e'`` except it uses | | | an upper case 'E' as the separator character. | @@ -522,9 +521,8 @@ The available presentation types for :class:`float` and | | precision given, uses a precision of ``6`` digits after | | | the decimal point for :class:`float`, and uses a | | | precision large enough to show all coefficient digits | - | | for :class:`~decimal.Decimal`. If no digits follow the | - | | decimal point, the decimal point is also removed unless | - | | the ``#`` option is used. | + | | for :class:`~decimal.Decimal`. If ``p=0``, the decimal | + | | point is omitted unless the ``#`` option is used. | +---------+----------------------------------------------------------+ | ``'F'`` | Fixed-point notation. Same as ``'f'``, but converts | | | ``nan`` to ``NAN`` and ``inf`` to ``INF``. | From 66b3922b97388c328c9bd8df050eef11c0261fae Mon Sep 17 00:00:00 2001 From: rindeal Date: Mon, 14 Oct 2024 06:36:53 +0000 Subject: [PATCH 68/80] gh-86357: argparse: use str() consistently and explicitly to print choices (GH-117766) Signed-off-by: Jan Chren ~rindeal --- Lib/argparse.py | 12 +++---- Lib/test/test_argparse.py | 31 ++++++++++++++++++- ...-04-19-05-58-50.gh-issue-117766.J3xepp.rst | 1 + 3 files changed, 36 insertions(+), 8 deletions(-) create mode 100644 Misc/NEWS.d/next/Library/2024-04-19-05-58-50.gh-issue-117766.J3xepp.rst diff --git a/Lib/argparse.py b/Lib/argparse.py index 550415dc93478b4..fa9f5211257e964 100644 --- a/Lib/argparse.py +++ b/Lib/argparse.py @@ -547,8 +547,7 @@ def _metavar_formatter(self, action, default_metavar): if action.metavar is not None: result = action.metavar elif action.choices is not None: - choice_strs = [str(choice) for choice in action.choices] - result = '{%s}' % ','.join(choice_strs) + result = '{%s}' % ','.join(map(str, action.choices)) else: result = default_metavar @@ -599,8 +598,7 @@ def _expand_help(self, action): elif hasattr(value, '__name__'): params[name] = value.__name__ if params.get('choices') is not None: - choices_str = ', '.join([str(c) for c in params['choices']]) - params['choices'] = choices_str + params['choices'] = ', '.join(map(str, params['choices'])) return help_string % params def _iter_indented_subactions(self, action): @@ -717,7 +715,7 @@ def _get_action_name(argument): elif argument.dest not in (None, SUPPRESS): return argument.dest elif argument.choices: - return '{' + ','.join(argument.choices) + '}' + return '{%s}' % ','.join(map(str, argument.choices)) else: return None @@ -2607,8 +2605,8 @@ def _check_value(self, action, value): if isinstance(choices, str): choices = iter(choices) if value not in choices: - args = {'value': value, - 'choices': ', '.join(map(repr, action.choices))} + args = {'value': str(value), + 'choices': ', '.join(map(str, action.choices))} msg = _('invalid choice: %(value)r (choose from %(choices)s)') raise ArgumentError(action, msg % args) diff --git a/Lib/test/test_argparse.py b/Lib/test/test_argparse.py index f52a4b6bdd8acac..78692fd34747829 100644 --- a/Lib/test/test_argparse.py +++ b/Lib/test/test_argparse.py @@ -16,6 +16,7 @@ import argparse import warnings +from enum import StrEnum from test.support import captured_stderr from test.support import import_helper from test.support import os_helper @@ -985,6 +986,34 @@ class TestDisallowLongAbbreviationAllowsShortGroupingPrefix(ParserTestCase): ] +class TestStrEnumChoices(TestCase): + class Color(StrEnum): + RED = "red" + GREEN = "green" + BLUE = "blue" + + def test_parse_enum_value(self): + parser = argparse.ArgumentParser() + parser.add_argument('--color', choices=self.Color) + args = parser.parse_args(['--color', 'red']) + self.assertEqual(args.color, self.Color.RED) + + def test_help_message_contains_enum_choices(self): + parser = argparse.ArgumentParser() + parser.add_argument('--color', choices=self.Color, help='Choose a color') + self.assertIn('[--color {red,green,blue}]', parser.format_usage()) + self.assertIn(' --color {red,green,blue}', parser.format_help()) + + def test_invalid_enum_value_raises_error(self): + parser = argparse.ArgumentParser(exit_on_error=False) + parser.add_argument('--color', choices=self.Color) + self.assertRaisesRegex( + argparse.ArgumentError, + r"invalid choice: 'yellow' \(choose from red, green, blue\)", + parser.parse_args, + ['--color', 'yellow'], + ) + # ================ # Positional tests # ================ @@ -2485,7 +2514,7 @@ def test_wrong_argument_subparsers_no_destination_error(self): parser.parse_args(('baz',)) self.assertRegex( excinfo.exception.stderr, - r"error: argument {foo,bar}: invalid choice: 'baz' \(choose from 'foo', 'bar'\)\n$" + r"error: argument {foo,bar}: invalid choice: 'baz' \(choose from foo, bar\)\n$" ) def test_optional_subparsers(self): diff --git a/Misc/NEWS.d/next/Library/2024-04-19-05-58-50.gh-issue-117766.J3xepp.rst b/Misc/NEWS.d/next/Library/2024-04-19-05-58-50.gh-issue-117766.J3xepp.rst new file mode 100644 index 000000000000000..d090f931f0238d9 --- /dev/null +++ b/Misc/NEWS.d/next/Library/2024-04-19-05-58-50.gh-issue-117766.J3xepp.rst @@ -0,0 +1 @@ +Always use :func:`str` to print ``choices`` in :mod:`argparse`. From b52c7306ea4470f9d7548655c2a1b89a07ff5504 Mon Sep 17 00:00:00 2001 From: Serhiy Storchaka Date: Mon, 14 Oct 2024 10:54:59 +0300 Subject: [PATCH 69/80] gh-121797: Add class method Fraction.from_number() (GH-121800) It is an alternative constructor which only accepts a single numeric argument. Unlike to Fraction.from_float() and Fraction.from_decimal() it accepts any real numbers supported by the standard constructor (int, float, Decimal, Rational numbers, objects with as_integer_ratio()). Unlike to the standard constructor, it does not accept strings. --- Doc/library/fractions.rst | 10 ++++ Doc/whatsnew/3.14.rst | 4 ++ Lib/fractions.py | 25 +++++++++- Lib/test/test_fractions.py | 49 ++++++++++++++++--- ...-07-15-19-34-56.gh-issue-121797.qDqj59.rst | 2 + 5 files changed, 82 insertions(+), 8 deletions(-) create mode 100644 Misc/NEWS.d/next/Library/2024-07-15-19-34-56.gh-issue-121797.qDqj59.rst diff --git a/Doc/library/fractions.rst b/Doc/library/fractions.rst index 2ee154952549aca..fc7f9a6301a9153 100644 --- a/Doc/library/fractions.rst +++ b/Doc/library/fractions.rst @@ -166,6 +166,16 @@ another rational number, or from a string. instance. + .. classmethod:: from_number(number) + + Alternative constructor which only accepts instances of + :class:`numbers.Integral`, :class:`numbers.Rational`, + :class:`float` or :class:`decimal.Decimal`, and objects with + the :meth:`!as_integer_ratio` method, but not strings. + + .. versionadded:: 3.14 + + .. method:: limit_denominator(max_denominator=1000000) Finds and returns the closest :class:`Fraction` to ``self`` that has diff --git a/Doc/whatsnew/3.14.rst b/Doc/whatsnew/3.14.rst index c62a3ca5872eefd..b22d1bd1e99d4e5 100644 --- a/Doc/whatsnew/3.14.rst +++ b/Doc/whatsnew/3.14.rst @@ -263,6 +263,10 @@ fractions :meth:`!as_integer_ratio` method to a :class:`~fractions.Fraction`. (Contributed by Serhiy Storchaka in :gh:`82017`.) +* Add alternative :class:`~fractions.Fraction` constructor + :meth:`Fraction.from_number() `. + (Contributed by Serhiy Storchaka in :gh:`121797`.) + functools --------- diff --git a/Lib/fractions.py b/Lib/fractions.py index 34fd0803d1b1ab6..f0cbc8c2e6c012a 100644 --- a/Lib/fractions.py +++ b/Lib/fractions.py @@ -279,7 +279,8 @@ def __new__(cls, numerator=0, denominator=None): numerator = -numerator else: - raise TypeError("argument should be a string or a number") + raise TypeError("argument should be a string or a Rational " + "instance or have the as_integer_ratio() method") elif type(numerator) is int is type(denominator): pass # *very* normal case @@ -305,6 +306,28 @@ def __new__(cls, numerator=0, denominator=None): self._denominator = denominator return self + @classmethod + def from_number(cls, number): + """Converts a finite real number to a rational number, exactly. + + Beware that Fraction.from_number(0.3) != Fraction(3, 10). + + """ + if type(number) is int: + return cls._from_coprime_ints(number, 1) + + elif isinstance(number, numbers.Rational): + return cls._from_coprime_ints(number.numerator, number.denominator) + + elif (isinstance(number, float) or + (not isinstance(number, type) and + hasattr(number, 'as_integer_ratio'))): + return cls._from_coprime_ints(*number.as_integer_ratio()) + + else: + raise TypeError("argument should be a Rational instance or " + "have the as_integer_ratio() method") + @classmethod def from_float(cls, f): """Converts a finite float to a rational number, exactly. diff --git a/Lib/test/test_fractions.py b/Lib/test/test_fractions.py index 4907f4093f52c9e..98dccbec9566acc 100644 --- a/Lib/test/test_fractions.py +++ b/Lib/test/test_fractions.py @@ -283,6 +283,13 @@ def __repr__(self): class RectComplex(Rect, complex): pass +class Ratio: + def __init__(self, ratio): + self._ratio = ratio + def as_integer_ratio(self): + return self._ratio + + class FractionTest(unittest.TestCase): def assertTypedEquals(self, expected, actual): @@ -355,14 +362,9 @@ def testInitFromDecimal(self): self.assertRaises(OverflowError, F, Decimal('-inf')) def testInitFromIntegerRatio(self): - class Ratio: - def __init__(self, ratio): - self._ratio = ratio - def as_integer_ratio(self): - return self._ratio - self.assertEqual((7, 3), _components(F(Ratio((7, 3))))) - errmsg = "argument should be a string or a number" + errmsg = (r"argument should be a string or a Rational instance or " + r"have the as_integer_ratio\(\) method") # the type also has an "as_integer_ratio" attribute. self.assertRaisesRegex(TypeError, errmsg, F, Ratio) # bad ratio @@ -388,6 +390,8 @@ class B(metaclass=M): pass self.assertRaisesRegex(TypeError, errmsg, F, B) self.assertRaisesRegex(TypeError, errmsg, F, B()) + self.assertRaises(TypeError, F.from_number, B) + self.assertRaises(TypeError, F.from_number, B()) def testFromString(self): self.assertEqual((5, 1), _components(F("5"))) @@ -594,6 +598,37 @@ def testFromDecimal(self): ValueError, "cannot convert NaN to integer ratio", F.from_decimal, Decimal("snan")) + def testFromNumber(self, cls=F): + def check(arg, numerator, denominator): + f = cls.from_number(arg) + self.assertIs(type(f), cls) + self.assertEqual(f.numerator, numerator) + self.assertEqual(f.denominator, denominator) + + check(10, 10, 1) + check(2.5, 5, 2) + check(Decimal('2.5'), 5, 2) + check(F(22, 7), 22, 7) + check(DummyFraction(22, 7), 22, 7) + check(Rat(22, 7), 22, 7) + check(Ratio((22, 7)), 22, 7) + self.assertRaises(TypeError, cls.from_number, 3+4j) + self.assertRaises(TypeError, cls.from_number, '5/2') + self.assertRaises(TypeError, cls.from_number, []) + self.assertRaises(OverflowError, cls.from_number, float('inf')) + self.assertRaises(OverflowError, cls.from_number, Decimal('inf')) + + # as_integer_ratio not defined in a class + class A: + pass + a = A() + a.as_integer_ratio = lambda: (9, 5) + check(a, 9, 5) + + def testFromNumber_subclass(self): + self.testFromNumber(DummyFraction) + + def test_is_integer(self): self.assertTrue(F(1, 1).is_integer()) self.assertTrue(F(-1, 1).is_integer()) diff --git a/Misc/NEWS.d/next/Library/2024-07-15-19-34-56.gh-issue-121797.qDqj59.rst b/Misc/NEWS.d/next/Library/2024-07-15-19-34-56.gh-issue-121797.qDqj59.rst new file mode 100644 index 000000000000000..9525379587f6cd1 --- /dev/null +++ b/Misc/NEWS.d/next/Library/2024-07-15-19-34-56.gh-issue-121797.qDqj59.rst @@ -0,0 +1,2 @@ +Add alternative :class:`~fractions.Fraction` constructor +:meth:`Fraction.from_number() `. From 4b358ee647809019813f106eb901f466a3846d98 Mon Sep 17 00:00:00 2001 From: Ken Jin Date: Mon, 14 Oct 2024 16:17:51 +0800 Subject: [PATCH 70/80] gh-125323: Remove some unsafe Py_DECREFs in bytecodes.c, replacing them with PyStackRef_CLOSEs (GH-125324) --- Include/internal/pycore_stackref.h | 3 ++ Python/bytecodes.c | 40 +++++++++++----------- Python/executor_cases.c.h | 40 +++++++++++----------- Python/generated_cases.c.h | 40 +++++++++++----------- Tools/cases_generator/analyzer.py | 1 + Tools/cases_generator/generators_common.py | 1 + 6 files changed, 65 insertions(+), 60 deletions(-) diff --git a/Include/internal/pycore_stackref.h b/Include/internal/pycore_stackref.h index 7d1eb11aa5ecb83..0e6410466b924b7 100644 --- a/Include/internal/pycore_stackref.h +++ b/Include/internal/pycore_stackref.h @@ -153,6 +153,8 @@ PyStackRef_AsStrongReference(_PyStackRef stackref) return PyStackRef_FromPyObjectSteal(PyStackRef_AsPyObjectSteal(stackref)); } +#define PyStackRef_CLOSE_SPECIALIZED(stackref, dealloc) PyStackRef_CLOSE(stackref) + #else // Py_GIL_DISABLED @@ -177,6 +179,7 @@ static const _PyStackRef PyStackRef_NULL = { .bits = 0 }; #define PyStackRef_DUP(stackref) PyStackRef_FromPyObjectSteal(Py_NewRef(PyStackRef_AsPyObjectBorrow(stackref))) +#define PyStackRef_CLOSE_SPECIALIZED(stackref, dealloc) _Py_DECREF_SPECIALIZED(PyStackRef_AsPyObjectBorrow(stackref), dealloc) #endif // Py_GIL_DISABLED diff --git a/Python/bytecodes.c b/Python/bytecodes.c index 34fdfcb05e3c187..299608f252c546c 100644 --- a/Python/bytecodes.c +++ b/Python/bytecodes.c @@ -474,8 +474,8 @@ dummy_func( STAT_INC(BINARY_OP, hit); PyObject *res_o = _PyLong_Multiply((PyLongObject *)left_o, (PyLongObject *)right_o); - _Py_DECREF_SPECIALIZED(right_o, (destructor)PyObject_Free); - _Py_DECREF_SPECIALIZED(left_o, (destructor)PyObject_Free); + PyStackRef_CLOSE_SPECIALIZED(right, (destructor)PyObject_Free); + PyStackRef_CLOSE_SPECIALIZED(left, (destructor)PyObject_Free); INPUTS_DEAD(); ERROR_IF(res_o == NULL, error); res = PyStackRef_FromPyObjectSteal(res_o); @@ -487,8 +487,8 @@ dummy_func( STAT_INC(BINARY_OP, hit); PyObject *res_o = _PyLong_Add((PyLongObject *)left_o, (PyLongObject *)right_o); - _Py_DECREF_SPECIALIZED(right_o, (destructor)PyObject_Free); - _Py_DECREF_SPECIALIZED(left_o, (destructor)PyObject_Free); + PyStackRef_CLOSE_SPECIALIZED(right, (destructor)PyObject_Free); + PyStackRef_CLOSE_SPECIALIZED(left, (destructor)PyObject_Free); INPUTS_DEAD(); ERROR_IF(res_o == NULL, error); res = PyStackRef_FromPyObjectSteal(res_o); @@ -500,8 +500,8 @@ dummy_func( STAT_INC(BINARY_OP, hit); PyObject *res_o = _PyLong_Subtract((PyLongObject *)left_o, (PyLongObject *)right_o); - _Py_DECREF_SPECIALIZED(right_o, (destructor)PyObject_Free); - _Py_DECREF_SPECIALIZED(left_o, (destructor)PyObject_Free); + PyStackRef_CLOSE_SPECIALIZED(right, (destructor)PyObject_Free); + PyStackRef_CLOSE_SPECIALIZED(left, (destructor)PyObject_Free); INPUTS_DEAD(); ERROR_IF(res_o == NULL, error); res = PyStackRef_FromPyObjectSteal(res_o); @@ -594,8 +594,8 @@ dummy_func( STAT_INC(BINARY_OP, hit); PyObject *res_o = PyUnicode_Concat(left_o, right_o); - _Py_DECREF_SPECIALIZED(left_o, _PyUnicode_ExactDealloc); - _Py_DECREF_SPECIALIZED(right_o, _PyUnicode_ExactDealloc); + PyStackRef_CLOSE_SPECIALIZED(left, _PyUnicode_ExactDealloc); + PyStackRef_CLOSE_SPECIALIZED(right, _PyUnicode_ExactDealloc); INPUTS_DEAD(); ERROR_IF(res_o == NULL, error); res = PyStackRef_FromPyObjectSteal(res_o); @@ -636,12 +636,12 @@ dummy_func( * that the string is safe to mutate. */ assert(Py_REFCNT(left_o) >= 2); - _Py_DECREF_NO_DEALLOC(left_o); + PyStackRef_CLOSE(left); DEAD(left); PyObject *temp = PyStackRef_AsPyObjectBorrow(*target_local); PyUnicode_Append(&temp, right_o); *target_local = PyStackRef_FromPyObjectSteal(temp); - _Py_DECREF_SPECIALIZED(right_o, _PyUnicode_ExactDealloc); + PyStackRef_CLOSE_SPECIALIZED(right, _PyUnicode_ExactDealloc); DEAD(right); ERROR_IF(PyStackRef_IsNull(*target_local), error); #if TIER_ONE @@ -755,7 +755,7 @@ dummy_func( PyObject *res_o = PyList_GET_ITEM(list, index); assert(res_o != NULL); Py_INCREF(res_o); - _Py_DECREF_SPECIALIZED(sub, (destructor)PyObject_Free); + PyStackRef_CLOSE_SPECIALIZED(sub_st, (destructor)PyObject_Free); DEAD(sub_st); PyStackRef_CLOSE(list_st); res = PyStackRef_FromPyObjectSteal(res_o); @@ -775,7 +775,7 @@ dummy_func( DEOPT_IF(Py_ARRAY_LENGTH(_Py_SINGLETON(strings).ascii) <= c); STAT_INC(BINARY_SUBSCR, hit); PyObject *res_o = (PyObject*)&_Py_SINGLETON(strings).ascii[c]; - _Py_DECREF_SPECIALIZED(sub, (destructor)PyObject_Free); + PyStackRef_CLOSE_SPECIALIZED(sub_st, (destructor)PyObject_Free); DEAD(sub_st); PyStackRef_CLOSE(str_st); res = PyStackRef_FromPyObjectSteal(res_o); @@ -796,7 +796,7 @@ dummy_func( PyObject *res_o = PyTuple_GET_ITEM(tuple, index); assert(res_o != NULL); Py_INCREF(res_o); - _Py_DECREF_SPECIALIZED(sub, (destructor)PyObject_Free); + PyStackRef_CLOSE_SPECIALIZED(sub_st, (destructor)PyObject_Free); DEAD(sub_st); PyStackRef_CLOSE(tuple_st); res = PyStackRef_FromPyObjectSteal(res_o); @@ -908,7 +908,7 @@ dummy_func( PyList_SET_ITEM(list, index, PyStackRef_AsPyObjectSteal(value)); assert(old_value != NULL); Py_DECREF(old_value); - _Py_DECREF_SPECIALIZED(sub, (destructor)PyObject_Free); + PyStackRef_CLOSE_SPECIALIZED(sub_st, (destructor)PyObject_Free); DEAD(sub_st); PyStackRef_CLOSE(list_st); } @@ -2398,9 +2398,9 @@ dummy_func( double dright = PyFloat_AS_DOUBLE(right_o); // 1 if NaN, 2 if <, 4 if >, 8 if ==; this matches low four bits of the oparg int sign_ish = COMPARISON_BIT(dleft, dright); - _Py_DECREF_SPECIALIZED(left_o, _PyFloat_ExactDealloc); + PyStackRef_CLOSE_SPECIALIZED(left, _PyFloat_ExactDealloc); DEAD(left); - _Py_DECREF_SPECIALIZED(right_o, _PyFloat_ExactDealloc); + PyStackRef_CLOSE_SPECIALIZED(right, _PyFloat_ExactDealloc); DEAD(right); res = (sign_ish & oparg) ? PyStackRef_True : PyStackRef_False; // It's always a bool, so we don't care about oparg & 16. @@ -2420,9 +2420,9 @@ dummy_func( Py_ssize_t iright = _PyLong_CompactValue((PyLongObject *)right_o); // 2 if <, 4 if >, 8 if ==; this matches the low 4 bits of the oparg int sign_ish = COMPARISON_BIT(ileft, iright); - _Py_DECREF_SPECIALIZED(left_o, (destructor)PyObject_Free); + PyStackRef_CLOSE_SPECIALIZED(left, (destructor)PyObject_Free); DEAD(left); - _Py_DECREF_SPECIALIZED(right_o, (destructor)PyObject_Free); + PyStackRef_CLOSE_SPECIALIZED(right, (destructor)PyObject_Free); DEAD(right); res = (sign_ish & oparg) ? PyStackRef_True : PyStackRef_False; // It's always a bool, so we don't care about oparg & 16. @@ -2436,9 +2436,9 @@ dummy_func( STAT_INC(COMPARE_OP, hit); int eq = _PyUnicode_Equal(left_o, right_o); assert((oparg >> 5) == Py_EQ || (oparg >> 5) == Py_NE); - _Py_DECREF_SPECIALIZED(left_o, _PyUnicode_ExactDealloc); + PyStackRef_CLOSE_SPECIALIZED(left, _PyUnicode_ExactDealloc); DEAD(left); - _Py_DECREF_SPECIALIZED(right_o, _PyUnicode_ExactDealloc); + PyStackRef_CLOSE_SPECIALIZED(right, _PyUnicode_ExactDealloc); DEAD(right); assert(eq == 0 || eq == 1); assert((oparg & 0xf) == COMPARISON_NOT_EQUALS || (oparg & 0xf) == COMPARISON_EQUALS); diff --git a/Python/executor_cases.c.h b/Python/executor_cases.c.h index ef110e2e2a794a0..5532c04e497a750 100644 --- a/Python/executor_cases.c.h +++ b/Python/executor_cases.c.h @@ -541,8 +541,8 @@ PyObject *right_o = PyStackRef_AsPyObjectBorrow(right); STAT_INC(BINARY_OP, hit); PyObject *res_o = _PyLong_Multiply((PyLongObject *)left_o, (PyLongObject *)right_o); - _Py_DECREF_SPECIALIZED(right_o, (destructor)PyObject_Free); - _Py_DECREF_SPECIALIZED(left_o, (destructor)PyObject_Free); + PyStackRef_CLOSE_SPECIALIZED(right, (destructor)PyObject_Free); + PyStackRef_CLOSE_SPECIALIZED(left, (destructor)PyObject_Free); if (res_o == NULL) JUMP_TO_ERROR(); res = PyStackRef_FromPyObjectSteal(res_o); stack_pointer[-2] = res; @@ -561,8 +561,8 @@ PyObject *right_o = PyStackRef_AsPyObjectBorrow(right); STAT_INC(BINARY_OP, hit); PyObject *res_o = _PyLong_Add((PyLongObject *)left_o, (PyLongObject *)right_o); - _Py_DECREF_SPECIALIZED(right_o, (destructor)PyObject_Free); - _Py_DECREF_SPECIALIZED(left_o, (destructor)PyObject_Free); + PyStackRef_CLOSE_SPECIALIZED(right, (destructor)PyObject_Free); + PyStackRef_CLOSE_SPECIALIZED(left, (destructor)PyObject_Free); if (res_o == NULL) JUMP_TO_ERROR(); res = PyStackRef_FromPyObjectSteal(res_o); stack_pointer[-2] = res; @@ -581,8 +581,8 @@ PyObject *right_o = PyStackRef_AsPyObjectBorrow(right); STAT_INC(BINARY_OP, hit); PyObject *res_o = _PyLong_Subtract((PyLongObject *)left_o, (PyLongObject *)right_o); - _Py_DECREF_SPECIALIZED(right_o, (destructor)PyObject_Free); - _Py_DECREF_SPECIALIZED(left_o, (destructor)PyObject_Free); + PyStackRef_CLOSE_SPECIALIZED(right, (destructor)PyObject_Free); + PyStackRef_CLOSE_SPECIALIZED(left, (destructor)PyObject_Free); if (res_o == NULL) JUMP_TO_ERROR(); res = PyStackRef_FromPyObjectSteal(res_o); stack_pointer[-2] = res; @@ -722,8 +722,8 @@ PyObject *right_o = PyStackRef_AsPyObjectBorrow(right); STAT_INC(BINARY_OP, hit); PyObject *res_o = PyUnicode_Concat(left_o, right_o); - _Py_DECREF_SPECIALIZED(left_o, _PyUnicode_ExactDealloc); - _Py_DECREF_SPECIALIZED(right_o, _PyUnicode_ExactDealloc); + PyStackRef_CLOSE_SPECIALIZED(left, _PyUnicode_ExactDealloc); + PyStackRef_CLOSE_SPECIALIZED(right, _PyUnicode_ExactDealloc); if (res_o == NULL) JUMP_TO_ERROR(); res = PyStackRef_FromPyObjectSteal(res_o); stack_pointer[-2] = res; @@ -764,11 +764,11 @@ * that the string is safe to mutate. */ assert(Py_REFCNT(left_o) >= 2); - _Py_DECREF_NO_DEALLOC(left_o); + PyStackRef_CLOSE(left); PyObject *temp = PyStackRef_AsPyObjectBorrow(*target_local); PyUnicode_Append(&temp, right_o); *target_local = PyStackRef_FromPyObjectSteal(temp); - _Py_DECREF_SPECIALIZED(right_o, _PyUnicode_ExactDealloc); + PyStackRef_CLOSE_SPECIALIZED(right, _PyUnicode_ExactDealloc); if (PyStackRef_IsNull(*target_local)) JUMP_TO_ERROR(); #if TIER_ONE // The STORE_FAST is already done. This is done here in tier one, @@ -904,7 +904,7 @@ PyObject *res_o = PyList_GET_ITEM(list, index); assert(res_o != NULL); Py_INCREF(res_o); - _Py_DECREF_SPECIALIZED(sub, (destructor)PyObject_Free); + PyStackRef_CLOSE_SPECIALIZED(sub_st, (destructor)PyObject_Free); PyStackRef_CLOSE(list_st); res = PyStackRef_FromPyObjectSteal(res_o); stack_pointer[-2] = res; @@ -946,7 +946,7 @@ } STAT_INC(BINARY_SUBSCR, hit); PyObject *res_o = (PyObject*)&_Py_SINGLETON(strings).ascii[c]; - _Py_DECREF_SPECIALIZED(sub, (destructor)PyObject_Free); + PyStackRef_CLOSE_SPECIALIZED(sub_st, (destructor)PyObject_Free); PyStackRef_CLOSE(str_st); res = PyStackRef_FromPyObjectSteal(res_o); stack_pointer[-2] = res; @@ -985,7 +985,7 @@ PyObject *res_o = PyTuple_GET_ITEM(tuple, index); assert(res_o != NULL); Py_INCREF(res_o); - _Py_DECREF_SPECIALIZED(sub, (destructor)PyObject_Free); + PyStackRef_CLOSE_SPECIALIZED(sub_st, (destructor)PyObject_Free); PyStackRef_CLOSE(tuple_st); res = PyStackRef_FromPyObjectSteal(res_o); stack_pointer[-2] = res; @@ -1160,7 +1160,7 @@ PyList_SET_ITEM(list, index, PyStackRef_AsPyObjectSteal(value)); assert(old_value != NULL); Py_DECREF(old_value); - _Py_DECREF_SPECIALIZED(sub, (destructor)PyObject_Free); + PyStackRef_CLOSE_SPECIALIZED(sub_st, (destructor)PyObject_Free); PyStackRef_CLOSE(list_st); stack_pointer += -3; assert(WITHIN_STACK_BOUNDS()); @@ -2912,8 +2912,8 @@ double dright = PyFloat_AS_DOUBLE(right_o); // 1 if NaN, 2 if <, 4 if >, 8 if ==; this matches low four bits of the oparg int sign_ish = COMPARISON_BIT(dleft, dright); - _Py_DECREF_SPECIALIZED(left_o, _PyFloat_ExactDealloc); - _Py_DECREF_SPECIALIZED(right_o, _PyFloat_ExactDealloc); + PyStackRef_CLOSE_SPECIALIZED(left, _PyFloat_ExactDealloc); + PyStackRef_CLOSE_SPECIALIZED(right, _PyFloat_ExactDealloc); res = (sign_ish & oparg) ? PyStackRef_True : PyStackRef_False; // It's always a bool, so we don't care about oparg & 16. stack_pointer[-2] = res; @@ -2946,8 +2946,8 @@ Py_ssize_t iright = _PyLong_CompactValue((PyLongObject *)right_o); // 2 if <, 4 if >, 8 if ==; this matches the low 4 bits of the oparg int sign_ish = COMPARISON_BIT(ileft, iright); - _Py_DECREF_SPECIALIZED(left_o, (destructor)PyObject_Free); - _Py_DECREF_SPECIALIZED(right_o, (destructor)PyObject_Free); + PyStackRef_CLOSE_SPECIALIZED(left, (destructor)PyObject_Free); + PyStackRef_CLOSE_SPECIALIZED(right, (destructor)PyObject_Free); res = (sign_ish & oparg) ? PyStackRef_True : PyStackRef_False; // It's always a bool, so we don't care about oparg & 16. stack_pointer[-2] = res; @@ -2968,8 +2968,8 @@ STAT_INC(COMPARE_OP, hit); int eq = _PyUnicode_Equal(left_o, right_o); assert((oparg >> 5) == Py_EQ || (oparg >> 5) == Py_NE); - _Py_DECREF_SPECIALIZED(left_o, _PyUnicode_ExactDealloc); - _Py_DECREF_SPECIALIZED(right_o, _PyUnicode_ExactDealloc); + PyStackRef_CLOSE_SPECIALIZED(left, _PyUnicode_ExactDealloc); + PyStackRef_CLOSE_SPECIALIZED(right, _PyUnicode_ExactDealloc); assert(eq == 0 || eq == 1); assert((oparg & 0xf) == COMPARISON_NOT_EQUALS || (oparg & 0xf) == COMPARISON_EQUALS); assert(COMPARISON_NOT_EQUALS + 1 == COMPARISON_EQUALS); diff --git a/Python/generated_cases.c.h b/Python/generated_cases.c.h index 7023aea369db497..0eeb566a0adadc6 100644 --- a/Python/generated_cases.c.h +++ b/Python/generated_cases.c.h @@ -118,8 +118,8 @@ PyObject *right_o = PyStackRef_AsPyObjectBorrow(right); STAT_INC(BINARY_OP, hit); PyObject *res_o = _PyLong_Add((PyLongObject *)left_o, (PyLongObject *)right_o); - _Py_DECREF_SPECIALIZED(right_o, (destructor)PyObject_Free); - _Py_DECREF_SPECIALIZED(left_o, (destructor)PyObject_Free); + PyStackRef_CLOSE_SPECIALIZED(right, (destructor)PyObject_Free); + PyStackRef_CLOSE_SPECIALIZED(left, (destructor)PyObject_Free); if (res_o == NULL) goto pop_2_error; res = PyStackRef_FromPyObjectSteal(res_o); } @@ -153,8 +153,8 @@ PyObject *right_o = PyStackRef_AsPyObjectBorrow(right); STAT_INC(BINARY_OP, hit); PyObject *res_o = PyUnicode_Concat(left_o, right_o); - _Py_DECREF_SPECIALIZED(left_o, _PyUnicode_ExactDealloc); - _Py_DECREF_SPECIALIZED(right_o, _PyUnicode_ExactDealloc); + PyStackRef_CLOSE_SPECIALIZED(left, _PyUnicode_ExactDealloc); + PyStackRef_CLOSE_SPECIALIZED(right, _PyUnicode_ExactDealloc); if (res_o == NULL) goto pop_2_error; res = PyStackRef_FromPyObjectSteal(res_o); } @@ -207,11 +207,11 @@ * that the string is safe to mutate. */ assert(Py_REFCNT(left_o) >= 2); - _Py_DECREF_NO_DEALLOC(left_o); + PyStackRef_CLOSE(left); PyObject *temp = PyStackRef_AsPyObjectBorrow(*target_local); PyUnicode_Append(&temp, right_o); *target_local = PyStackRef_FromPyObjectSteal(temp); - _Py_DECREF_SPECIALIZED(right_o, _PyUnicode_ExactDealloc); + PyStackRef_CLOSE_SPECIALIZED(right, _PyUnicode_ExactDealloc); if (PyStackRef_IsNull(*target_local)) goto pop_2_error; #if TIER_ONE // The STORE_FAST is already done. This is done here in tier one, @@ -285,8 +285,8 @@ PyObject *right_o = PyStackRef_AsPyObjectBorrow(right); STAT_INC(BINARY_OP, hit); PyObject *res_o = _PyLong_Multiply((PyLongObject *)left_o, (PyLongObject *)right_o); - _Py_DECREF_SPECIALIZED(right_o, (destructor)PyObject_Free); - _Py_DECREF_SPECIALIZED(left_o, (destructor)PyObject_Free); + PyStackRef_CLOSE_SPECIALIZED(right, (destructor)PyObject_Free); + PyStackRef_CLOSE_SPECIALIZED(left, (destructor)PyObject_Free); if (res_o == NULL) goto pop_2_error; res = PyStackRef_FromPyObjectSteal(res_o); } @@ -356,8 +356,8 @@ PyObject *right_o = PyStackRef_AsPyObjectBorrow(right); STAT_INC(BINARY_OP, hit); PyObject *res_o = _PyLong_Subtract((PyLongObject *)left_o, (PyLongObject *)right_o); - _Py_DECREF_SPECIALIZED(right_o, (destructor)PyObject_Free); - _Py_DECREF_SPECIALIZED(left_o, (destructor)PyObject_Free); + PyStackRef_CLOSE_SPECIALIZED(right, (destructor)PyObject_Free); + PyStackRef_CLOSE_SPECIALIZED(left, (destructor)PyObject_Free); if (res_o == NULL) goto pop_2_error; res = PyStackRef_FromPyObjectSteal(res_o); } @@ -582,7 +582,7 @@ PyObject *res_o = PyList_GET_ITEM(list, index); assert(res_o != NULL); Py_INCREF(res_o); - _Py_DECREF_SPECIALIZED(sub, (destructor)PyObject_Free); + PyStackRef_CLOSE_SPECIALIZED(sub_st, (destructor)PyObject_Free); PyStackRef_CLOSE(list_st); res = PyStackRef_FromPyObjectSteal(res_o); stack_pointer[-2] = res; @@ -614,7 +614,7 @@ DEOPT_IF(Py_ARRAY_LENGTH(_Py_SINGLETON(strings).ascii) <= c, BINARY_SUBSCR); STAT_INC(BINARY_SUBSCR, hit); PyObject *res_o = (PyObject*)&_Py_SINGLETON(strings).ascii[c]; - _Py_DECREF_SPECIALIZED(sub, (destructor)PyObject_Free); + PyStackRef_CLOSE_SPECIALIZED(sub_st, (destructor)PyObject_Free); PyStackRef_CLOSE(str_st); res = PyStackRef_FromPyObjectSteal(res_o); stack_pointer[-2] = res; @@ -646,7 +646,7 @@ PyObject *res_o = PyTuple_GET_ITEM(tuple, index); assert(res_o != NULL); Py_INCREF(res_o); - _Py_DECREF_SPECIALIZED(sub, (destructor)PyObject_Free); + PyStackRef_CLOSE_SPECIALIZED(sub_st, (destructor)PyObject_Free); PyStackRef_CLOSE(tuple_st); res = PyStackRef_FromPyObjectSteal(res_o); stack_pointer[-2] = res; @@ -3284,8 +3284,8 @@ double dright = PyFloat_AS_DOUBLE(right_o); // 1 if NaN, 2 if <, 4 if >, 8 if ==; this matches low four bits of the oparg int sign_ish = COMPARISON_BIT(dleft, dright); - _Py_DECREF_SPECIALIZED(left_o, _PyFloat_ExactDealloc); - _Py_DECREF_SPECIALIZED(right_o, _PyFloat_ExactDealloc); + PyStackRef_CLOSE_SPECIALIZED(left, _PyFloat_ExactDealloc); + PyStackRef_CLOSE_SPECIALIZED(right, _PyFloat_ExactDealloc); res = (sign_ish & oparg) ? PyStackRef_True : PyStackRef_False; // It's always a bool, so we don't care about oparg & 16. } @@ -3326,8 +3326,8 @@ Py_ssize_t iright = _PyLong_CompactValue((PyLongObject *)right_o); // 2 if <, 4 if >, 8 if ==; this matches the low 4 bits of the oparg int sign_ish = COMPARISON_BIT(ileft, iright); - _Py_DECREF_SPECIALIZED(left_o, (destructor)PyObject_Free); - _Py_DECREF_SPECIALIZED(right_o, (destructor)PyObject_Free); + PyStackRef_CLOSE_SPECIALIZED(left, (destructor)PyObject_Free); + PyStackRef_CLOSE_SPECIALIZED(right, (destructor)PyObject_Free); res = (sign_ish & oparg) ? PyStackRef_True : PyStackRef_False; // It's always a bool, so we don't care about oparg & 16. } @@ -3362,8 +3362,8 @@ STAT_INC(COMPARE_OP, hit); int eq = _PyUnicode_Equal(left_o, right_o); assert((oparg >> 5) == Py_EQ || (oparg >> 5) == Py_NE); - _Py_DECREF_SPECIALIZED(left_o, _PyUnicode_ExactDealloc); - _Py_DECREF_SPECIALIZED(right_o, _PyUnicode_ExactDealloc); + PyStackRef_CLOSE_SPECIALIZED(left, _PyUnicode_ExactDealloc); + PyStackRef_CLOSE_SPECIALIZED(right, _PyUnicode_ExactDealloc); assert(eq == 0 || eq == 1); assert((oparg & 0xf) == COMPARISON_NOT_EQUALS || (oparg & 0xf) == COMPARISON_EQUALS); assert(COMPARISON_NOT_EQUALS + 1 == COMPARISON_EQUALS); @@ -7720,7 +7720,7 @@ PyList_SET_ITEM(list, index, PyStackRef_AsPyObjectSteal(value)); assert(old_value != NULL); Py_DECREF(old_value); - _Py_DECREF_SPECIALIZED(sub, (destructor)PyObject_Free); + PyStackRef_CLOSE_SPECIALIZED(sub_st, (destructor)PyObject_Free); PyStackRef_CLOSE(list_st); stack_pointer += -3; assert(WITHIN_STACK_BOUNDS()); diff --git a/Tools/cases_generator/analyzer.py b/Tools/cases_generator/analyzer.py index 60f5d010a7a083b..19fdeac65cf2df9 100644 --- a/Tools/cases_generator/analyzer.py +++ b/Tools/cases_generator/analyzer.py @@ -540,6 +540,7 @@ def has_error_without_pop(op: parser.InstDef) -> bool: "PyStackRef_AsPyObjectSteal", "PyStackRef_CLEAR", "PyStackRef_CLOSE", + "PyStackRef_CLOSE_SPECIALIZED", "PyStackRef_DUP", "PyStackRef_False", "PyStackRef_FromPyObjectImmortal", diff --git a/Tools/cases_generator/generators_common.py b/Tools/cases_generator/generators_common.py index 0bfa1a3b56fbc2d..7e032c21d2485cb 100644 --- a/Tools/cases_generator/generators_common.py +++ b/Tools/cases_generator/generators_common.py @@ -116,6 +116,7 @@ def __init__(self, out: CWriter): "SAVE_STACK": self.save_stack, "RELOAD_STACK": self.reload_stack, "PyStackRef_CLOSE": self.stackref_close, + "PyStackRef_CLOSE_SPECIALIZED": self.stackref_close, "PyStackRef_AsPyObjectSteal": self.stackref_steal, "DISPATCH": self.dispatch } From 5217328f93f599755bd70418952392c54f705a71 Mon Sep 17 00:00:00 2001 From: Serhiy Storchaka Date: Mon, 14 Oct 2024 11:24:01 +0300 Subject: [PATCH 71/80] gh-121798: Add class method Decimal.from_number() (GH-121801) It is an alternate constructor which only accepts a single numeric argument. Unlike to Decimal.from_float() it accepts also Decimal. Unlike to the standard constructor, it does not accept strings and tuples. --- Doc/library/decimal.rst | 17 +++++++ Doc/whatsnew/3.14.rst | 6 +++ Lib/_pydecimal.py | 15 ++++++ Lib/test/test_decimal.py | 23 ++++++++++ ...-07-15-19-25-25.gh-issue-121798.GmuBDu.rst | 2 + Modules/_decimal/_decimal.c | 46 +++++++++++++++++++ Modules/_decimal/docstrings.h | 13 ++++++ 7 files changed, 122 insertions(+) create mode 100644 Misc/NEWS.d/next/Library/2024-07-15-19-25-25.gh-issue-121798.GmuBDu.rst diff --git a/Doc/library/decimal.rst b/Doc/library/decimal.rst index 916f17cadfaa7ed..c9a3e448cad0630 100644 --- a/Doc/library/decimal.rst +++ b/Doc/library/decimal.rst @@ -598,6 +598,23 @@ Decimal objects .. versionadded:: 3.1 + .. classmethod:: from_number(number) + + Alternative constructor that only accepts instances of + :class:`float`, :class:`int` or :class:`Decimal`, but not strings + or tuples. + + .. doctest:: + + >>> Decimal.from_number(314) + Decimal('314') + >>> Decimal.from_number(0.1) + Decimal('0.1000000000000000055511151231257827021181583404541015625') + >>> Decimal.from_number(Decimal('3.14')) + Decimal('3.14') + + .. versionadded:: 3.14 + .. method:: fma(other, third, context=None) Fused multiply-add. Return self*other+third with no rounding of the diff --git a/Doc/whatsnew/3.14.rst b/Doc/whatsnew/3.14.rst index b22d1bd1e99d4e5..25e69a59bdec62f 100644 --- a/Doc/whatsnew/3.14.rst +++ b/Doc/whatsnew/3.14.rst @@ -239,6 +239,12 @@ ctypes to help match a non-default ABI. (Contributed by Petr Viktorin in :gh:`97702`.) +decimal +------- + +* Add alternative :class:`~decimal.Decimal` constructor + :meth:`Decimal.from_number() `. + (Contributed by Serhiy Storchaka in :gh:`121798`.) dis --- diff --git a/Lib/_pydecimal.py b/Lib/_pydecimal.py index 75df3db262470b7..5b60570c6c592aa 100644 --- a/Lib/_pydecimal.py +++ b/Lib/_pydecimal.py @@ -582,6 +582,21 @@ def __new__(cls, value="0", context=None): raise TypeError("Cannot convert %r to Decimal" % value) + @classmethod + def from_number(cls, number): + """Converts a real number to a decimal number, exactly. + + >>> Decimal.from_number(314) # int + Decimal('314') + >>> Decimal.from_number(0.1) # float + Decimal('0.1000000000000000055511151231257827021181583404541015625') + >>> Decimal.from_number(Decimal('3.14')) # another decimal instance + Decimal('3.14') + """ + if isinstance(number, (int, Decimal, float)): + return cls(number) + raise TypeError("Cannot convert %r to Decimal" % number) + @classmethod def from_float(cls, f): """Converts a float to a decimal number, exactly. diff --git a/Lib/test/test_decimal.py b/Lib/test/test_decimal.py index d1e7e69e7e951b3..bc6c64277409497 100644 --- a/Lib/test/test_decimal.py +++ b/Lib/test/test_decimal.py @@ -812,6 +812,29 @@ def test_explicit_context_create_from_float(self): x = random.expovariate(0.01) * (random.random() * 2.0 - 1.0) self.assertEqual(x, float(nc.create_decimal(x))) # roundtrip + def test_from_number(self, cls=None): + Decimal = self.decimal.Decimal + if cls is None: + cls = Decimal + + def check(arg, expected): + d = cls.from_number(arg) + self.assertIs(type(d), cls) + self.assertEqual(d, expected) + + check(314, Decimal(314)) + check(3.14, Decimal.from_float(3.14)) + check(Decimal('3.14'), Decimal('3.14')) + self.assertRaises(TypeError, cls.from_number, 3+4j) + self.assertRaises(TypeError, cls.from_number, '314') + self.assertRaises(TypeError, cls.from_number, (0, (3, 1, 4), 0)) + self.assertRaises(TypeError, cls.from_number, object()) + + def test_from_number_subclass(self, cls=None): + class DecimalSubclass(self.decimal.Decimal): + pass + self.test_from_number(DecimalSubclass) + def test_unicode_digits(self): Decimal = self.decimal.Decimal diff --git a/Misc/NEWS.d/next/Library/2024-07-15-19-25-25.gh-issue-121798.GmuBDu.rst b/Misc/NEWS.d/next/Library/2024-07-15-19-25-25.gh-issue-121798.GmuBDu.rst new file mode 100644 index 000000000000000..5706e4bffeb4a1f --- /dev/null +++ b/Misc/NEWS.d/next/Library/2024-07-15-19-25-25.gh-issue-121798.GmuBDu.rst @@ -0,0 +1,2 @@ +Add alternative :class:`~decimal.Decimal` constructor +:meth:`Decimal.from_number() `. diff --git a/Modules/_decimal/_decimal.c b/Modules/_decimal/_decimal.c index a33c9793b5ad17d..c564813036e5040 100644 --- a/Modules/_decimal/_decimal.c +++ b/Modules/_decimal/_decimal.c @@ -2857,6 +2857,51 @@ dec_from_float(PyObject *type, PyObject *pyfloat) return result; } +/* 'v' can have any numeric type accepted by the Decimal constructor. Attempt + an exact conversion. If the result does not meet the restrictions + for an mpd_t, fail with InvalidOperation. */ +static PyObject * +PyDecType_FromNumberExact(PyTypeObject *type, PyObject *v, PyObject *context) +{ + decimal_state *state = get_module_state_by_def(type); + assert(v != NULL); + if (PyDec_Check(state, v)) { + return PyDecType_FromDecimalExact(type, v, context); + } + else if (PyLong_Check(v)) { + return PyDecType_FromLongExact(type, v, context); + } + else if (PyFloat_Check(v)) { + if (dec_addstatus(context, MPD_Float_operation)) { + return NULL; + } + return PyDecType_FromFloatExact(type, v, context); + } + else { + PyErr_Format(PyExc_TypeError, + "conversion from %s to Decimal is not supported", + Py_TYPE(v)->tp_name); + return NULL; + } +} + +/* class method */ +static PyObject * +dec_from_number(PyObject *type, PyObject *number) +{ + PyObject *context; + PyObject *result; + + decimal_state *state = get_module_state_by_def((PyTypeObject *)type); + CURRENT_CONTEXT(state, context); + result = PyDecType_FromNumberExact(state->PyDec_Type, number, context); + if (type != (PyObject *)state->PyDec_Type && result != NULL) { + Py_SETREF(result, PyObject_CallFunctionObjArgs(type, result, NULL)); + } + + return result; +} + /* create_decimal_from_float */ static PyObject * ctx_from_float(PyObject *context, PyObject *v) @@ -5052,6 +5097,7 @@ static PyMethodDef dec_methods [] = /* Miscellaneous */ { "from_float", dec_from_float, METH_O|METH_CLASS, doc_from_float }, + { "from_number", dec_from_number, METH_O|METH_CLASS, doc_from_number }, { "as_tuple", PyDec_AsTuple, METH_NOARGS, doc_as_tuple }, { "as_integer_ratio", dec_as_integer_ratio, METH_NOARGS, doc_as_integer_ratio }, diff --git a/Modules/_decimal/docstrings.h b/Modules/_decimal/docstrings.h index a1823cdd32b74c1..b34bff83d3f4e95 100644 --- a/Modules/_decimal/docstrings.h +++ b/Modules/_decimal/docstrings.h @@ -189,6 +189,19 @@ Decimal.from_float(0.1) is not the same as Decimal('0.1').\n\ \n\ \n"); +PyDoc_STRVAR(doc_from_number, +"from_number($type, number, /)\n--\n\n\ +Class method that converts a real number to a decimal number, exactly.\n\ +\n\ + >>> Decimal.from_number(314) # int\n\ + Decimal('314')\n\ + >>> Decimal.from_number(0.1) # float\n\ + Decimal('0.1000000000000000055511151231257827021181583404541015625')\n\ + >>> Decimal.from_number(Decimal('3.14')) # another decimal instance\n\ + Decimal('3.14')\n\ +\n\ +\n"); + PyDoc_STRVAR(doc_fma, "fma($self, /, other, third, context=None)\n--\n\n\ Fused multiply-add. Return self*other+third with no rounding of the\n\ From 67f6e08147bc005e460d82fcce85bf5d56009cf5 Mon Sep 17 00:00:00 2001 From: Kumar Aditya Date: Mon, 14 Oct 2024 14:06:31 +0530 Subject: [PATCH 72/80] gh-125139: use `_PyRecursiveMutex` in `_thread.RLock` (#125144) --- Include/internal/pycore_lock.h | 3 +- Modules/_threadmodule.c | 151 +++++++-------------------------- Python/lock.c | 31 ++++++- 3 files changed, 63 insertions(+), 122 deletions(-) diff --git a/Include/internal/pycore_lock.h b/Include/internal/pycore_lock.h index cd7deda00c7beec..57cbce8f126acab 100644 --- a/Include/internal/pycore_lock.h +++ b/Include/internal/pycore_lock.h @@ -160,8 +160,9 @@ typedef struct { PyAPI_FUNC(int) _PyRecursiveMutex_IsLockedByCurrentThread(_PyRecursiveMutex *m); PyAPI_FUNC(void) _PyRecursiveMutex_Lock(_PyRecursiveMutex *m); +extern PyLockStatus _PyRecursiveMutex_LockTimed(_PyRecursiveMutex *m, PyTime_t timeout, _PyLockFlags flags); PyAPI_FUNC(void) _PyRecursiveMutex_Unlock(_PyRecursiveMutex *m); - +extern int _PyRecursiveMutex_TryUnlock(_PyRecursiveMutex *m); // A readers-writer (RW) lock. The lock supports multiple concurrent readers or // a single writer. The lock is write-preferring: if a writer is waiting while diff --git a/Modules/_threadmodule.c b/Modules/_threadmodule.c index 9617f9cafe76ffd..d4408aa9e42d9de 100644 --- a/Modules/_threadmodule.c +++ b/Modules/_threadmodule.c @@ -726,11 +726,6 @@ lock_dealloc(PyObject *op) Py_DECREF(tp); } -static inline PyLockStatus -acquire_timed(PyThread_type_lock lock, PyTime_t timeout) -{ - return PyThread_acquire_lock_timed_with_retries(lock, timeout); -} static int lock_acquire_parse_args(PyObject *args, PyObject *kwds, @@ -973,10 +968,7 @@ static PyType_Spec lock_type_spec = { typedef struct { PyObject_HEAD - PyThread_type_lock rlock_lock; - PyThread_ident_t rlock_owner; - unsigned long rlock_count; - PyObject *in_weakreflist; + _PyRecursiveMutex lock; } rlockobject; static int @@ -992,59 +984,26 @@ rlock_dealloc(PyObject *op) { rlockobject *self = (rlockobject*)op; PyObject_GC_UnTrack(self); - if (self->in_weakreflist != NULL) - PyObject_ClearWeakRefs((PyObject *) self); - /* self->rlock_lock can be NULL if PyThread_allocate_lock() failed - in rlock_new() */ - if (self->rlock_lock != NULL) { - /* Unlock the lock so it's safe to free it */ - if (self->rlock_count > 0) - PyThread_release_lock(self->rlock_lock); - - PyThread_free_lock(self->rlock_lock); - } + PyObject_ClearWeakRefs((PyObject *) self); PyTypeObject *tp = Py_TYPE(self); tp->tp_free(self); Py_DECREF(tp); } -static bool -rlock_is_owned_by(rlockobject *self, PyThread_ident_t tid) -{ - PyThread_ident_t owner_tid = - _Py_atomic_load_ullong_relaxed(&self->rlock_owner); - return owner_tid == tid && self->rlock_count > 0; -} static PyObject * rlock_acquire(PyObject *op, PyObject *args, PyObject *kwds) { rlockobject *self = (rlockobject*)op; PyTime_t timeout; - PyThread_ident_t tid; - PyLockStatus r = PY_LOCK_ACQUIRED; - if (lock_acquire_parse_args(args, kwds, &timeout) < 0) + if (lock_acquire_parse_args(args, kwds, &timeout) < 0) { return NULL; - - tid = PyThread_get_thread_ident_ex(); - if (rlock_is_owned_by(self, tid)) { - unsigned long count = self->rlock_count + 1; - if (count <= self->rlock_count) { - PyErr_SetString(PyExc_OverflowError, - "Internal lock count overflowed"); - return NULL; - } - self->rlock_count = count; - Py_RETURN_TRUE; - } - r = acquire_timed(self->rlock_lock, timeout); - if (r == PY_LOCK_ACQUIRED) { - assert(self->rlock_count == 0); - _Py_atomic_store_ullong_relaxed(&self->rlock_owner, tid); - self->rlock_count = 1; } - else if (r == PY_LOCK_INTR) { + + PyLockStatus r = _PyRecursiveMutex_LockTimed(&self->lock, timeout, + _PY_LOCK_HANDLE_SIGNALS | _PY_LOCK_DETACH); + if (r == PY_LOCK_INTR) { return NULL; } @@ -1078,17 +1037,12 @@ static PyObject * rlock_release(PyObject *op, PyObject *Py_UNUSED(ignored)) { rlockobject *self = (rlockobject*)op; - PyThread_ident_t tid = PyThread_get_thread_ident_ex(); - if (!rlock_is_owned_by(self, tid)) { + if (_PyRecursiveMutex_TryUnlock(&self->lock) < 0) { PyErr_SetString(PyExc_RuntimeError, "cannot release un-acquired lock"); return NULL; } - if (--self->rlock_count == 0) { - _Py_atomic_store_ullong_relaxed(&self->rlock_owner, 0); - PyThread_release_lock(self->rlock_lock); - } Py_RETURN_NONE; } @@ -1116,25 +1070,15 @@ rlock_acquire_restore(PyObject *op, PyObject *args) { rlockobject *self = (rlockobject*)op; PyThread_ident_t owner; - unsigned long count; - int r = 1; + Py_ssize_t count; - if (!PyArg_ParseTuple(args, "(k" Py_PARSE_THREAD_IDENT_T "):_acquire_restore", + if (!PyArg_ParseTuple(args, "(n" Py_PARSE_THREAD_IDENT_T "):_acquire_restore", &count, &owner)) return NULL; - if (!PyThread_acquire_lock(self->rlock_lock, 0)) { - Py_BEGIN_ALLOW_THREADS - r = PyThread_acquire_lock(self->rlock_lock, 1); - Py_END_ALLOW_THREADS - } - if (!r) { - PyErr_SetString(ThreadError, "couldn't acquire lock"); - return NULL; - } - assert(self->rlock_count == 0); - _Py_atomic_store_ullong_relaxed(&self->rlock_owner, owner); - self->rlock_count = count; + _PyRecursiveMutex_Lock(&self->lock); + _Py_atomic_store_ullong_relaxed(&self->lock.thread, owner); + self->lock.level = (size_t)count - 1; Py_RETURN_NONE; } @@ -1148,21 +1092,18 @@ static PyObject * rlock_release_save(PyObject *op, PyObject *Py_UNUSED(ignored)) { rlockobject *self = (rlockobject*)op; - PyThread_ident_t owner; - unsigned long count; - if (self->rlock_count == 0) { + if (!_PyRecursiveMutex_IsLockedByCurrentThread(&self->lock)) { PyErr_SetString(PyExc_RuntimeError, "cannot release un-acquired lock"); return NULL; } - owner = self->rlock_owner; - count = self->rlock_count; - self->rlock_count = 0; - _Py_atomic_store_ullong_relaxed(&self->rlock_owner, 0); - PyThread_release_lock(self->rlock_lock); - return Py_BuildValue("k" Py_PARSE_THREAD_IDENT_T, count, owner); + PyThread_ident_t owner = self->lock.thread; + Py_ssize_t count = self->lock.level + 1; + self->lock.level = 0; // ensure the unlock releases the lock + _PyRecursiveMutex_Unlock(&self->lock); + return Py_BuildValue("n" Py_PARSE_THREAD_IDENT_T, count, owner); } PyDoc_STRVAR(rlock_release_save_doc, @@ -1175,10 +1116,10 @@ static PyObject * rlock_recursion_count(PyObject *op, PyObject *Py_UNUSED(ignored)) { rlockobject *self = (rlockobject*)op; - PyThread_ident_t tid = PyThread_get_thread_ident_ex(); - PyThread_ident_t owner = - _Py_atomic_load_ullong_relaxed(&self->rlock_owner); - return PyLong_FromUnsignedLong(owner == tid ? self->rlock_count : 0UL); + if (_PyRecursiveMutex_IsLockedByCurrentThread(&self->lock)) { + return PyLong_FromSize_t(self->lock.level + 1); + } + return PyLong_FromLong(0); } PyDoc_STRVAR(rlock_recursion_count_doc, @@ -1191,12 +1132,8 @@ static PyObject * rlock_is_owned(PyObject *op, PyObject *Py_UNUSED(ignored)) { rlockobject *self = (rlockobject*)op; - PyThread_ident_t tid = PyThread_get_thread_ident_ex(); - - if (rlock_is_owned_by(self, tid)) { - Py_RETURN_TRUE; - } - Py_RETURN_FALSE; + long owned = _PyRecursiveMutex_IsLockedByCurrentThread(&self->lock); + return PyBool_FromLong(owned); } PyDoc_STRVAR(rlock_is_owned_doc, @@ -1212,16 +1149,7 @@ rlock_new(PyTypeObject *type, PyObject *args, PyObject *kwds) if (self == NULL) { return NULL; } - self->in_weakreflist = NULL; - self->rlock_owner = 0; - self->rlock_count = 0; - - self->rlock_lock = PyThread_allocate_lock(); - if (self->rlock_lock == NULL) { - Py_DECREF(self); - PyErr_SetString(ThreadError, "can't allocate lock"); - return NULL; - } + self->lock = (_PyRecursiveMutex){0}; return (PyObject *) self; } @@ -1229,13 +1157,13 @@ static PyObject * rlock_repr(PyObject *op) { rlockobject *self = (rlockobject*)op; - PyThread_ident_t owner = - _Py_atomic_load_ullong_relaxed(&self->rlock_owner); + PyThread_ident_t owner = self->lock.thread; + size_t count = self->lock.level + 1; return PyUnicode_FromFormat( - "<%s %s object owner=%" PY_FORMAT_THREAD_IDENT_T " count=%lu at %p>", - self->rlock_count ? "locked" : "unlocked", + "<%s %s object owner=%" PY_FORMAT_THREAD_IDENT_T " count=%zu at %p>", + owner ? "locked" : "unlocked", Py_TYPE(self)->tp_name, owner, - self->rlock_count, self); + count, self); } @@ -1243,14 +1171,7 @@ rlock_repr(PyObject *op) static PyObject * rlock__at_fork_reinit(rlockobject *self, PyObject *Py_UNUSED(args)) { - if (_PyThread_at_fork_reinit(&self->rlock_lock) < 0) { - PyErr_SetString(ThreadError, "failed to reinitialize lock at fork"); - return NULL; - } - - self->rlock_owner = 0; - self->rlock_count = 0; - + self->lock = (_PyRecursiveMutex){0}; Py_RETURN_NONE; } #endif /* HAVE_FORK */ @@ -1281,18 +1202,12 @@ static PyMethodDef rlock_methods[] = { }; -static PyMemberDef rlock_type_members[] = { - {"__weaklistoffset__", Py_T_PYSSIZET, offsetof(rlockobject, in_weakreflist), Py_READONLY}, - {NULL}, -}; - static PyType_Slot rlock_type_slots[] = { {Py_tp_dealloc, rlock_dealloc}, {Py_tp_repr, rlock_repr}, {Py_tp_methods, rlock_methods}, {Py_tp_alloc, PyType_GenericAlloc}, {Py_tp_new, rlock_new}, - {Py_tp_members, rlock_type_members}, {Py_tp_traverse, rlock_traverse}, {0, 0}, }; @@ -1301,7 +1216,7 @@ static PyType_Spec rlock_type_spec = { .name = "_thread.RLock", .basicsize = sizeof(rlockobject), .flags = (Py_TPFLAGS_DEFAULT | Py_TPFLAGS_BASETYPE | - Py_TPFLAGS_HAVE_GC | Py_TPFLAGS_IMMUTABLETYPE), + Py_TPFLAGS_HAVE_GC | Py_TPFLAGS_IMMUTABLETYPE | Py_TPFLAGS_MANAGED_WEAKREF), .slots = rlock_type_slots, }; diff --git a/Python/lock.c b/Python/lock.c index 57675fe1873fa28..554c51d77803224 100644 --- a/Python/lock.c +++ b/Python/lock.c @@ -377,21 +377,46 @@ _PyRecursiveMutex_Lock(_PyRecursiveMutex *m) assert(m->level == 0); } +PyLockStatus +_PyRecursiveMutex_LockTimed(_PyRecursiveMutex *m, PyTime_t timeout, _PyLockFlags flags) +{ + PyThread_ident_t thread = PyThread_get_thread_ident_ex(); + if (recursive_mutex_is_owned_by(m, thread)) { + m->level++; + return PY_LOCK_ACQUIRED; + } + PyLockStatus s = _PyMutex_LockTimed(&m->mutex, timeout, flags); + if (s == PY_LOCK_ACQUIRED) { + _Py_atomic_store_ullong_relaxed(&m->thread, thread); + assert(m->level == 0); + } + return s; +} + void _PyRecursiveMutex_Unlock(_PyRecursiveMutex *m) +{ + if (_PyRecursiveMutex_TryUnlock(m) < 0) { + Py_FatalError("unlocking a recursive mutex that is not " + "owned by the current thread"); + } +} + +int +_PyRecursiveMutex_TryUnlock(_PyRecursiveMutex *m) { PyThread_ident_t thread = PyThread_get_thread_ident_ex(); if (!recursive_mutex_is_owned_by(m, thread)) { - Py_FatalError("unlocking a recursive mutex that is not owned by the" - " current thread"); + return -1; } if (m->level > 0) { m->level--; - return; + return 0; } assert(m->level == 0); _Py_atomic_store_ullong_relaxed(&m->thread, 0); PyMutex_Unlock(&m->mutex); + return 0; } #define _Py_WRITE_LOCKED 1 From 06ca33020e1168459fc6c3e0df93664daf801339 Mon Sep 17 00:00:00 2001 From: Mark Shannon Date: Mon, 14 Oct 2024 14:18:57 +0100 Subject: [PATCH 73/80] GH-125323: Convert DECREF_INPUTS_AND_REUSE_FLOAT into a function that takes PyStackRefs. (GH-125439) --- Include/internal/pycore_ceval.h | 2 ++ Include/internal/pycore_opcode_metadata.h | 6 ++-- Include/internal/pycore_stackref.h | 7 +++++ Include/internal/pycore_uop_metadata.h | 6 ++-- Objects/floatobject.c | 35 +++++++++++++++++++++++ Python/bytecodes.c | 12 ++++---- Python/ceval_macros.h | 20 ------------- Python/executor_cases.c.h | 12 ++++---- Python/generated_cases.c.h | 12 ++++---- Tools/cases_generator/analyzer.py | 1 + 10 files changed, 69 insertions(+), 44 deletions(-) diff --git a/Include/internal/pycore_ceval.h b/Include/internal/pycore_ceval.h index 594fbb1c8e443bc..cff2b1f71147933 100644 --- a/Include/internal/pycore_ceval.h +++ b/Include/internal/pycore_ceval.h @@ -316,6 +316,8 @@ _Py_eval_breaker_bit_is_set(PyThreadState *tstate, uintptr_t bit) void _Py_set_eval_breaker_bit_all(PyInterpreterState *interp, uintptr_t bit); void _Py_unset_eval_breaker_bit_all(PyInterpreterState *interp, uintptr_t bit); +PyAPI_FUNC(PyObject *) _PyFloat_FromDouble_ConsumeInputs(_PyStackRef left, _PyStackRef right, double value); + #ifdef __cplusplus } diff --git a/Include/internal/pycore_opcode_metadata.h b/Include/internal/pycore_opcode_metadata.h index 8fec45b1e8d5c31..c18423476d39621 100644 --- a/Include/internal/pycore_opcode_metadata.h +++ b/Include/internal/pycore_opcode_metadata.h @@ -1015,13 +1015,13 @@ extern const struct opcode_metadata _PyOpcode_opcode_metadata[266]; #ifdef NEED_OPCODE_METADATA const struct opcode_metadata _PyOpcode_opcode_metadata[266] = { [BINARY_OP] = { true, INSTR_FMT_IBC, HAS_ARG_FLAG | HAS_ERROR_FLAG | HAS_ESCAPES_FLAG }, - [BINARY_OP_ADD_FLOAT] = { true, INSTR_FMT_IXC, HAS_EXIT_FLAG }, + [BINARY_OP_ADD_FLOAT] = { true, INSTR_FMT_IXC, HAS_EXIT_FLAG | HAS_ERROR_FLAG }, [BINARY_OP_ADD_INT] = { true, INSTR_FMT_IXC, HAS_EXIT_FLAG | HAS_ERROR_FLAG }, [BINARY_OP_ADD_UNICODE] = { true, INSTR_FMT_IXC, HAS_EXIT_FLAG | HAS_ERROR_FLAG }, [BINARY_OP_INPLACE_ADD_UNICODE] = { true, INSTR_FMT_IXC, HAS_LOCAL_FLAG | HAS_DEOPT_FLAG | HAS_EXIT_FLAG | HAS_ERROR_FLAG }, - [BINARY_OP_MULTIPLY_FLOAT] = { true, INSTR_FMT_IXC, HAS_EXIT_FLAG }, + [BINARY_OP_MULTIPLY_FLOAT] = { true, INSTR_FMT_IXC, HAS_EXIT_FLAG | HAS_ERROR_FLAG }, [BINARY_OP_MULTIPLY_INT] = { true, INSTR_FMT_IXC, HAS_EXIT_FLAG | HAS_ERROR_FLAG }, - [BINARY_OP_SUBTRACT_FLOAT] = { true, INSTR_FMT_IXC, HAS_EXIT_FLAG }, + [BINARY_OP_SUBTRACT_FLOAT] = { true, INSTR_FMT_IXC, HAS_EXIT_FLAG | HAS_ERROR_FLAG }, [BINARY_OP_SUBTRACT_INT] = { true, INSTR_FMT_IXC, HAS_EXIT_FLAG | HAS_ERROR_FLAG }, [BINARY_SLICE] = { true, INSTR_FMT_IX, HAS_ERROR_FLAG | HAS_ESCAPES_FLAG }, [BINARY_SUBSCR] = { true, INSTR_FMT_IXC, HAS_ERROR_FLAG | HAS_ESCAPES_FLAG }, diff --git a/Include/internal/pycore_stackref.h b/Include/internal/pycore_stackref.h index 0e6410466b924b7..588e57f6cd97e04 100644 --- a/Include/internal/pycore_stackref.h +++ b/Include/internal/pycore_stackref.h @@ -76,6 +76,13 @@ PyStackRef_AsPyObjectBorrow(_PyStackRef stackref) #define PyStackRef_IsDeferred(ref) (((ref).bits & Py_TAG_BITS) == Py_TAG_DEFERRED) +static inline PyObject * +PyStackRef_NotDeferred_AsPyObject(_PyStackRef stackref) +{ + assert(!PyStackRef_IsDeferred(stackref)); + return (PyObject *)stackref.bits; +} + static inline PyObject * PyStackRef_AsPyObjectSteal(_PyStackRef stackref) { diff --git a/Include/internal/pycore_uop_metadata.h b/Include/internal/pycore_uop_metadata.h index fd41e9a5fe862bd..2f0a7fb2f6e549d 100644 --- a/Include/internal/pycore_uop_metadata.h +++ b/Include/internal/pycore_uop_metadata.h @@ -69,9 +69,9 @@ const uint16_t _PyUop_Flags[MAX_UOP_ID+1] = { [_GUARD_BOTH_FLOAT] = HAS_EXIT_FLAG, [_GUARD_NOS_FLOAT] = HAS_EXIT_FLAG, [_GUARD_TOS_FLOAT] = HAS_EXIT_FLAG, - [_BINARY_OP_MULTIPLY_FLOAT] = HAS_PURE_FLAG, - [_BINARY_OP_ADD_FLOAT] = HAS_PURE_FLAG, - [_BINARY_OP_SUBTRACT_FLOAT] = HAS_PURE_FLAG, + [_BINARY_OP_MULTIPLY_FLOAT] = HAS_ERROR_FLAG | HAS_PURE_FLAG, + [_BINARY_OP_ADD_FLOAT] = HAS_ERROR_FLAG | HAS_PURE_FLAG, + [_BINARY_OP_SUBTRACT_FLOAT] = HAS_ERROR_FLAG | HAS_PURE_FLAG, [_GUARD_BOTH_UNICODE] = HAS_EXIT_FLAG, [_BINARY_OP_ADD_UNICODE] = HAS_ERROR_FLAG | HAS_PURE_FLAG, [_BINARY_OP_INPLACE_ADD_UNICODE] = HAS_LOCAL_FLAG | HAS_DEOPT_FLAG | HAS_ERROR_FLAG, diff --git a/Objects/floatobject.c b/Objects/floatobject.c index a48a210adee3b9a..d66863febe8c863 100644 --- a/Objects/floatobject.c +++ b/Objects/floatobject.c @@ -134,6 +134,41 @@ PyFloat_FromDouble(double fval) return (PyObject *) op; } +#ifdef Py_GIL_DISABLED + +PyObject *_PyFloat_FromDouble_ConsumeInputs(_PyStackRef left, _PyStackRef right, double value) +{ + PyStackRef_CLOSE(left); + PyStackRef_CLOSE(right); + return PyFloat_FromDouble(value); +} + +#else // Py_GIL_DISABLED + +PyObject *_PyFloat_FromDouble_ConsumeInputs(_PyStackRef left, _PyStackRef right, double value) +{ + PyObject *left_o = PyStackRef_AsPyObjectSteal(left); + PyObject *right_o = PyStackRef_AsPyObjectSteal(right); + if (Py_REFCNT(left_o) == 1) { + ((PyFloatObject *)left_o)->ob_fval = value; + _Py_DECREF_SPECIALIZED(right_o, _PyFloat_ExactDealloc); + return left_o; + } + else if (Py_REFCNT(right_o) == 1) { + ((PyFloatObject *)right_o)->ob_fval = value; + _Py_DECREF_NO_DEALLOC(left_o); + return right_o; + } + else { + PyObject *result = PyFloat_FromDouble(value); + _Py_DECREF_NO_DEALLOC(left_o); + _Py_DECREF_NO_DEALLOC(right_o); + return result; + } +} + +#endif // Py_GIL_DISABLED + static PyObject * float_from_string_inner(const char *s, Py_ssize_t len, void *obj) { diff --git a/Python/bytecodes.c b/Python/bytecodes.c index 299608f252c546c..b22916aeaa248b0 100644 --- a/Python/bytecodes.c +++ b/Python/bytecodes.c @@ -539,9 +539,9 @@ dummy_func( double dres = ((PyFloatObject *)left_o)->ob_fval * ((PyFloatObject *)right_o)->ob_fval; - PyObject *res_o; - DECREF_INPUTS_AND_REUSE_FLOAT(left_o, right_o, dres, res_o); + PyObject *res_o = _PyFloat_FromDouble_ConsumeInputs(left, right, dres); INPUTS_DEAD(); + ERROR_IF(res_o == NULL, error); res = PyStackRef_FromPyObjectSteal(res_o); } @@ -553,9 +553,9 @@ dummy_func( double dres = ((PyFloatObject *)left_o)->ob_fval + ((PyFloatObject *)right_o)->ob_fval; - PyObject *res_o; - DECREF_INPUTS_AND_REUSE_FLOAT(left_o, right_o, dres, res_o); + PyObject *res_o = _PyFloat_FromDouble_ConsumeInputs(left, right, dres); INPUTS_DEAD(); + ERROR_IF(res_o == NULL, error); res = PyStackRef_FromPyObjectSteal(res_o); } @@ -567,9 +567,9 @@ dummy_func( double dres = ((PyFloatObject *)left_o)->ob_fval - ((PyFloatObject *)right_o)->ob_fval; - PyObject *res_o; - DECREF_INPUTS_AND_REUSE_FLOAT(left_o, right_o, dres, res_o); + PyObject *res_o = _PyFloat_FromDouble_ConsumeInputs(left, right, dres); INPUTS_DEAD(); + ERROR_IF(res_o == NULL, error); res = PyStackRef_FromPyObjectSteal(res_o); } diff --git a/Python/ceval_macros.h b/Python/ceval_macros.h index e0e9cc156ed62f5..6674c4ccf9f6930 100644 --- a/Python/ceval_macros.h +++ b/Python/ceval_macros.h @@ -327,26 +327,6 @@ GETITEM(PyObject *v, Py_ssize_t i) { " in enclosing scope" #define NAME_ERROR_MSG "name '%.200s' is not defined" -#define DECREF_INPUTS_AND_REUSE_FLOAT(left, right, dval, result) \ -do { \ - if (Py_REFCNT(left) == 1) { \ - ((PyFloatObject *)left)->ob_fval = (dval); \ - _Py_DECREF_SPECIALIZED(right, _PyFloat_ExactDealloc);\ - result = (left); \ - } \ - else if (Py_REFCNT(right) == 1) {\ - ((PyFloatObject *)right)->ob_fval = (dval); \ - _Py_DECREF_NO_DEALLOC(left); \ - result = (right); \ - }\ - else { \ - result = PyFloat_FromDouble(dval); \ - if ((result) == NULL) GOTO_ERROR(error); \ - _Py_DECREF_NO_DEALLOC(left); \ - _Py_DECREF_NO_DEALLOC(right); \ - } \ -} while (0) - // If a trace function sets a new f_lineno and // *then* raises, we use the destination when searching // for an exception handler, displaying the traceback, and so on diff --git a/Python/executor_cases.c.h b/Python/executor_cases.c.h index 5532c04e497a750..0ed361a2ee7fb01 100644 --- a/Python/executor_cases.c.h +++ b/Python/executor_cases.c.h @@ -643,8 +643,8 @@ double dres = ((PyFloatObject *)left_o)->ob_fval * ((PyFloatObject *)right_o)->ob_fval; - PyObject *res_o; - DECREF_INPUTS_AND_REUSE_FLOAT(left_o, right_o, dres, res_o); + PyObject *res_o = _PyFloat_FromDouble_ConsumeInputs(left, right, dres); + if (res_o == NULL) JUMP_TO_ERROR(); res = PyStackRef_FromPyObjectSteal(res_o); stack_pointer[-2] = res; stack_pointer += -1; @@ -664,8 +664,8 @@ double dres = ((PyFloatObject *)left_o)->ob_fval + ((PyFloatObject *)right_o)->ob_fval; - PyObject *res_o; - DECREF_INPUTS_AND_REUSE_FLOAT(left_o, right_o, dres, res_o); + PyObject *res_o = _PyFloat_FromDouble_ConsumeInputs(left, right, dres); + if (res_o == NULL) JUMP_TO_ERROR(); res = PyStackRef_FromPyObjectSteal(res_o); stack_pointer[-2] = res; stack_pointer += -1; @@ -685,8 +685,8 @@ double dres = ((PyFloatObject *)left_o)->ob_fval - ((PyFloatObject *)right_o)->ob_fval; - PyObject *res_o; - DECREF_INPUTS_AND_REUSE_FLOAT(left_o, right_o, dres, res_o); + PyObject *res_o = _PyFloat_FromDouble_ConsumeInputs(left, right, dres); + if (res_o == NULL) JUMP_TO_ERROR(); res = PyStackRef_FromPyObjectSteal(res_o); stack_pointer[-2] = res; stack_pointer += -1; diff --git a/Python/generated_cases.c.h b/Python/generated_cases.c.h index 0eeb566a0adadc6..7bd1b7dd5aba272 100644 --- a/Python/generated_cases.c.h +++ b/Python/generated_cases.c.h @@ -84,8 +84,8 @@ double dres = ((PyFloatObject *)left_o)->ob_fval + ((PyFloatObject *)right_o)->ob_fval; - PyObject *res_o; - DECREF_INPUTS_AND_REUSE_FLOAT(left_o, right_o, dres, res_o); + PyObject *res_o = _PyFloat_FromDouble_ConsumeInputs(left, right, dres); + if (res_o == NULL) goto pop_2_error; res = PyStackRef_FromPyObjectSteal(res_o); } stack_pointer[-2] = res; @@ -251,8 +251,8 @@ double dres = ((PyFloatObject *)left_o)->ob_fval * ((PyFloatObject *)right_o)->ob_fval; - PyObject *res_o; - DECREF_INPUTS_AND_REUSE_FLOAT(left_o, right_o, dres, res_o); + PyObject *res_o = _PyFloat_FromDouble_ConsumeInputs(left, right, dres); + if (res_o == NULL) goto pop_2_error; res = PyStackRef_FromPyObjectSteal(res_o); } stack_pointer[-2] = res; @@ -322,8 +322,8 @@ double dres = ((PyFloatObject *)left_o)->ob_fval - ((PyFloatObject *)right_o)->ob_fval; - PyObject *res_o; - DECREF_INPUTS_AND_REUSE_FLOAT(left_o, right_o, dres, res_o); + PyObject *res_o = _PyFloat_FromDouble_ConsumeInputs(left, right, dres); + if (res_o == NULL) goto pop_2_error; res = PyStackRef_FromPyObjectSteal(res_o); } stack_pointer[-2] = res; diff --git a/Tools/cases_generator/analyzer.py b/Tools/cases_generator/analyzer.py index 19fdeac65cf2df9..381ad3a4e2082c6 100644 --- a/Tools/cases_generator/analyzer.py +++ b/Tools/cases_generator/analyzer.py @@ -575,6 +575,7 @@ def has_error_without_pop(op: parser.InstDef) -> bool: "_PyDictValues_AddToInsertionOrder", "_PyErr_Occurred", "_PyEval_FrameClearAndPop", + "_PyFloat_FromDouble_ConsumeInputs", "_PyFrame_GetCode", "_PyFrame_IsIncomplete", "_PyFrame_PushUnchecked", From 5f4e5b598cab86d5fd5727d423c9728221889ed0 Mon Sep 17 00:00:00 2001 From: Serhiy Storchaka Date: Mon, 14 Oct 2024 16:29:20 +0300 Subject: [PATCH 74/80] gh-53203: Fix strptime() for %c, %x and %X formats on many locales (GH-125406) Fixed most locales that use non-ASCII digits, like Persian, Burmese, Odia and Shan. --- Lib/_strptime.py | 68 ++++++++++++------- Lib/test/test_strptime.py | 34 ++++++---- Lib/test/test_time.py | 2 +- ...4-10-13-20-21-35.gh-issue-53203.Rz1c8A.rst | 2 + 4 files changed, 66 insertions(+), 40 deletions(-) create mode 100644 Misc/NEWS.d/next/Library/2024-10-13-20-21-35.gh-issue-53203.Rz1c8A.rst diff --git a/Lib/_strptime.py b/Lib/_strptime.py index 89adc174e5ad30d..5f4d2475c0169bc 100644 --- a/Lib/_strptime.py +++ b/Lib/_strptime.py @@ -15,6 +15,7 @@ import locale import calendar from re import compile as re_compile +from re import sub as re_sub from re import IGNORECASE from re import escape as re_escape from datetime import (date as datetime_date, @@ -129,11 +130,23 @@ def __calc_date_time(self): time_tuple = time.struct_time((1999,3,17,22,44,55,2,76,0)) time_tuple2 = time.struct_time((1999,1,3,1,1,1,6,3,0)) replacement_pairs = [ - ('1999', '%Y'), ('99', '%y'), ('22', '%H'), - ('44', '%M'), ('55', '%S'), ('76', '%j'), - ('17', '%d'), ('03', '%m'), ('3', '%m'), - # '3' needed for when no leading zero. - ('2', '%w'), ('10', '%I')] + ('1999', '%Y'), ('99', '%y'), ('22', '%H'), + ('44', '%M'), ('55', '%S'), ('76', '%j'), + ('17', '%d'), ('03', '%m'), ('3', '%m'), + # '3' needed for when no leading zero. + ('2', '%w'), ('10', '%I'), + # Non-ASCII digits + ('\u0661\u0669\u0669\u0669', '%Y'), + ('\u0669\u0669', '%Oy'), + ('\u0662\u0662', '%OH'), + ('\u0664\u0664', '%OM'), + ('\u0665\u0665', '%OS'), + ('\u0661\u0667', '%Od'), + ('\u0660\u0663', '%Om'), + ('\u0663', '%Om'), + ('\u0662', '%Ow'), + ('\u0661\u0660', '%OI'), + ] date_time = [] for directive in ('%c', '%x', '%X'): current_format = time.strftime(directive, time_tuple).lower() @@ -158,6 +171,10 @@ def __calc_date_time(self): for tz in tz_values: if tz: current_format = current_format.replace(tz, "%Z") + # Transform all non-ASCII digits to digits in range U+0660 to U+0669. + current_format = re_sub(r'\d(?3[0-1]|[1-2]\d|0[1-9]|[1-9]| [1-9])", 'f': r"(?P[0-9]{1,6})", @@ -296,11 +313,15 @@ def __init__(self, locale_time=None): 'Z': self.__seqToRE((tz for tz_names in self.locale_time.timezone for tz in tz_names), 'Z'), - '%': '%'}) - base.__setitem__('W', base.__getitem__('U').replace('U', 'W')) - base.__setitem__('c', self.pattern(self.locale_time.LC_date_time)) - base.__setitem__('x', self.pattern(self.locale_time.LC_date)) + '%': '%'} + for d in 'dmyHIMS': + mapping['O' + d] = r'(?P<%s>\d\d|\d| \d)' % d + mapping['Ow'] = r'(?P\d)' + mapping['W'] = mapping['U'].replace('U', 'W') + base.__init__(mapping) base.__setitem__('X', self.pattern(self.locale_time.LC_time)) + base.__setitem__('x', self.pattern(self.locale_time.LC_date)) + base.__setitem__('c', self.pattern(self.locale_time.LC_date_time)) def __seqToRE(self, to_convert, directive): """Convert a list to a regex string for matching a directive. @@ -328,28 +349,25 @@ def pattern(self, format): regex syntax are escaped. """ - processed_format = '' # The sub() call escapes all characters that might be misconstrued # as regex syntax. Cannot use re.escape since we have to deal with # format directives (%m, etc.). - regex_chars = re_compile(r"([\\.^$*+?\(\){}\[\]|])") - format = regex_chars.sub(r"\\\1", format) - whitespace_replacement = re_compile(r'\s+') - format = whitespace_replacement.sub(r'\\s+', format) + format = re_sub(r"([\\.^$*+?\(\){}\[\]|])", r"\\\1", format) + format = re_sub(r'\s+', r'\\s+', format) + format = re_sub(r"'", "['\u02bc]", format) # needed for br_FR year_in_format = False day_of_month_in_format = False - while '%' in format: - directive_index = format.index('%')+1 - format_char = format[directive_index] - processed_format = "%s%s%s" % (processed_format, - format[:directive_index-1], - self[format_char]) - format = format[directive_index+1:] + def repl(m): + format_char = m[1] match format_char: case 'Y' | 'y' | 'G': + nonlocal year_in_format year_in_format = True case 'd': + nonlocal day_of_month_in_format day_of_month_in_format = True + return self[format_char] + format = re_sub(r'%(O?.)', repl, format) if day_of_month_in_format and not year_in_format: import warnings warnings.warn("""\ @@ -360,7 +378,7 @@ def pattern(self, format): See https://github.com/python/cpython/issues/70647.""", DeprecationWarning, skip_file_prefixes=(os.path.dirname(__file__),)) - return "%s%s" % (processed_format, format) + return format def compile(self, format): """Return a compiled re object for the format string.""" @@ -434,8 +452,8 @@ def _strptime(data_string, format="%a %b %d %H:%M:%S %Y"): _regex_cache[format] = format_regex found = format_regex.match(data_string) if not found: - raise ValueError("time data %r does not match format %r :: /%s/" % - (data_string, format, format_regex.pattern)) + raise ValueError("time data %r does not match format %r" % + (data_string, format)) if len(data_string) != found.end(): raise ValueError("unconverted data remains: %s" % data_string[found.end():]) diff --git a/Lib/test/test_strptime.py b/Lib/test/test_strptime.py index 79f48dfe44abde2..12366b053a2fc10 100644 --- a/Lib/test/test_strptime.py +++ b/Lib/test/test_strptime.py @@ -292,7 +292,7 @@ def test_strptime_exception_context(self): # additional check for IndexError branch (issue #19545) with self.assertRaises(ValueError) as e: _strptime._strptime_time('19', '%Y %') - self.assertIs(e.exception.__suppress_context__, True) + self.assertIsNone(e.exception.__context__) def test_unconverteddata(self): # Check ValueError is raised when there is unconverted data @@ -485,12 +485,14 @@ def test_bad_timezone(self): # id_ID, ms_MY. # * Year is not included: ha_NG. # * Use non-Gregorian calendar: lo_LA, thai, th_TH. + # On Windows: ar_IN, ar_SA, fa_IR, ps_AF. # # BUG: Generates regexp that does not match the current date and time - # for az_IR, fa_IR, lzh_TW, my_MM, or_IN, shn_MM. + # for lzh_TW. @run_with_locales('LC_TIME', 'C', 'en_US', 'fr_FR', 'de_DE', 'ja_JP', 'he_IL', 'eu_ES', 'ar_AE', 'mfe_MU', 'yo_NG', - 'csb_PL', 'br_FR', 'gez_ET', 'brx_IN') + 'csb_PL', 'br_FR', 'gez_ET', 'brx_IN', + 'my_MM', 'or_IN', 'shn_MM', 'az_IR') def test_date_time_locale(self): # Test %c directive loc = locale.getlocale(locale.LC_TIME)[0] @@ -512,20 +514,23 @@ def test_date_time_locale(self): self.roundtrip('%c', slice(0, 6), time.localtime(now - 366*24*3600)) # NB: Dates before 1969 do not roundtrip on some locales: - # bo_CN, bo_IN, dz_BT, eu_ES, eu_FR. + # az_IR, bo_CN, bo_IN, dz_BT, eu_ES, eu_FR, fa_IR, or_IN. @run_with_locales('LC_TIME', 'C', 'en_US', 'fr_FR', 'de_DE', 'ja_JP', 'he_IL', 'ar_AE', 'mfe_MU', 'yo_NG', - 'csb_PL', 'br_FR', 'gez_ET', 'brx_IN') + 'csb_PL', 'br_FR', 'gez_ET', 'brx_IN', + 'my_MM', 'shn_MM') def test_date_time_locale2(self): # Test %c directive self.roundtrip('%c', slice(0, 6), (1900, 1, 1, 0, 0, 0, 0, 1, 0)) + self.roundtrip('%c', slice(0, 6), (1800, 1, 1, 0, 0, 0, 0, 1, 0)) # NB: Does not roundtrip because use non-Gregorian calendar: - # lo_LA, thai, th_TH. + # lo_LA, thai, th_TH. On Windows: ar_IN, ar_SA, fa_IR, ps_AF. # BUG: Generates regexp that does not match the current date - # for az_IR, fa_IR, lzh_TW, my_MM, or_IN, shn_MM. + # for lzh_TW. @run_with_locales('LC_TIME', 'C', 'en_US', 'fr_FR', 'de_DE', 'ja_JP', - 'he_IL', 'eu_ES', 'ar_AE') + 'he_IL', 'eu_ES', 'ar_AE', + 'az_IR', 'my_MM', 'or_IN', 'shn_MM') def test_date_locale(self): # Test %x directive now = time.time() @@ -545,10 +550,11 @@ def test_date_locale(self): "musl libc issue on Emscripten, bpo-46390" ) @run_with_locales('LC_TIME', 'en_US', 'fr_FR', 'de_DE', 'ja_JP', - 'eu_ES', 'ar_AE') + 'eu_ES', 'ar_AE', 'my_MM', 'shn_MM') def test_date_locale2(self): # Test %x directive self.roundtrip('%x', slice(0, 3), (1900, 1, 1, 0, 0, 0, 0, 1, 0)) + self.roundtrip('%x', slice(0, 3), (1800, 1, 1, 0, 0, 0, 0, 1, 0)) # NB: Does not roundtrip in some locales due to the ambiguity of # the time representation (bugs in locales?): @@ -556,11 +562,11 @@ def test_date_locale2(self): # norwegian, nynorsk. # * Hours are in 12-hour notation without AM/PM indication: hy_AM, # ms_MY, sm_WS. - # BUG: Generates regexp that does not match the current time for - # aa_DJ, aa_ER, aa_ET, am_ET, az_IR, byn_ER, fa_IR, gez_ER, gez_ET, - # lzh_TW, my_MM, om_ET, om_KE, or_IN, shn_MM, sid_ET, so_DJ, so_ET, - # so_SO, ti_ER, ti_ET, tig_ER, wal_ET. - @run_with_locales('LC_TIME', 'C', 'en_US', 'fr_FR', 'de_DE', 'ja_JP') + # BUG: Generates regexp that does not match the current time for lzh_TW. + @run_with_locales('LC_TIME', 'C', 'en_US', 'fr_FR', 'de_DE', 'ja_JP', + 'aa_ET', 'am_ET', 'az_IR', 'byn_ER', 'fa_IR', 'gez_ET', + 'my_MM', 'om_ET', 'or_IN', 'shn_MM', 'sid_ET', 'so_SO', + 'ti_ET', 'tig_ER', 'wal_ET') def test_time_locale(self): # Test %X directive now = time.time() diff --git a/Lib/test/test_time.py b/Lib/test/test_time.py index 5b5779231f06ce5..27c0f51acc58ab0 100644 --- a/Lib/test/test_time.py +++ b/Lib/test/test_time.py @@ -298,7 +298,7 @@ def test_strptime_exception_context(self): # additional check for IndexError branch (issue #19545) with self.assertRaises(ValueError) as e: time.strptime('19', '%Y %') - self.assertIs(e.exception.__suppress_context__, True) + self.assertIsNone(e.exception.__context__) def test_strptime_leap_year(self): # GH-70647: warns if parsing a format with a day and no year. diff --git a/Misc/NEWS.d/next/Library/2024-10-13-20-21-35.gh-issue-53203.Rz1c8A.rst b/Misc/NEWS.d/next/Library/2024-10-13-20-21-35.gh-issue-53203.Rz1c8A.rst new file mode 100644 index 000000000000000..cdfa8c191e82425 --- /dev/null +++ b/Misc/NEWS.d/next/Library/2024-10-13-20-21-35.gh-issue-53203.Rz1c8A.rst @@ -0,0 +1,2 @@ +Fix :func:`time.strptime` for ``%c``, ``%x`` and ``%X`` formats in many +locales that use non-ASCII digits, like Persian, Burmese, Odia and Shan. From 6a08a753b702ac63c9b6ac58dd204d1fe9662e9d Mon Sep 17 00:00:00 2001 From: Wulian Date: Mon, 14 Oct 2024 21:53:50 +0800 Subject: [PATCH 75/80] gh-124960: Fixed `barry_as_FLUFL` future flag does not work in new REPL (#124999) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Co-authored-by: Nice Zombies Co-authored-by: Łukasz Langa --- Lib/_pyrepl/console.py | 10 +++++-- Lib/codeop.py | 7 +++-- Lib/test/test_pyrepl/test_interact.py | 27 ++++++++++++++++++- ...-10-05-15-49-53.gh-issue-124960.Bol9hT.rst | 1 + 4 files changed, 40 insertions(+), 5 deletions(-) create mode 100644 Misc/NEWS.d/next/Library/2024-10-05-15-49-53.gh-issue-124960.Bol9hT.rst diff --git a/Lib/_pyrepl/console.py b/Lib/_pyrepl/console.py index 3e72a56807f6fb1..03266c4dfc2dd85 100644 --- a/Lib/_pyrepl/console.py +++ b/Lib/_pyrepl/console.py @@ -174,7 +174,13 @@ def _excepthook(self, typ, value, tb): def runsource(self, source, filename="", symbol="single"): try: - tree = ast.parse(source) + tree = self.compile.compiler( + source, + filename, + "exec", + ast.PyCF_ONLY_AST, + incomplete_input=False, + ) except (SyntaxError, OverflowError, ValueError): self.showsyntaxerror(filename, source=source) return False @@ -185,7 +191,7 @@ def runsource(self, source, filename="", symbol="single"): the_symbol = symbol if stmt is last_stmt else "exec" item = wrapper([stmt]) try: - code = self.compile.compiler(item, filename, the_symbol, dont_inherit=True) + code = self.compile.compiler(item, filename, the_symbol) except SyntaxError as e: if e.args[0] == "'await' outside function": python = os.path.basename(sys.executable) diff --git a/Lib/codeop.py b/Lib/codeop.py index a0276b52d484e35..adf000ba29f88c9 100644 --- a/Lib/codeop.py +++ b/Lib/codeop.py @@ -44,6 +44,7 @@ # Caveat emptor: These flags are undocumented on purpose and depending # on their effect outside the standard library is **unsupported**. PyCF_DONT_IMPLY_DEDENT = 0x200 +PyCF_ONLY_AST = 0x400 PyCF_ALLOW_INCOMPLETE_INPUT = 0x4000 def _maybe_compile(compiler, source, filename, symbol): @@ -109,12 +110,14 @@ class Compile: def __init__(self): self.flags = PyCF_DONT_IMPLY_DEDENT | PyCF_ALLOW_INCOMPLETE_INPUT - def __call__(self, source, filename, symbol, **kwargs): - flags = self.flags + def __call__(self, source, filename, symbol, flags=0, **kwargs): + flags |= self.flags if kwargs.get('incomplete_input', True) is False: flags &= ~PyCF_DONT_IMPLY_DEDENT flags &= ~PyCF_ALLOW_INCOMPLETE_INPUT codeob = compile(source, filename, symbol, flags, True) + if flags & PyCF_ONLY_AST: + return codeob # this is an ast.Module in this case for feature in _features: if codeob.co_flags & feature.compiler_flag: self.flags |= feature.compiler_flag diff --git a/Lib/test/test_pyrepl/test_interact.py b/Lib/test/test_pyrepl/test_interact.py index b7adaffbac0e228..0c6df4e5dae8698 100644 --- a/Lib/test/test_pyrepl/test_interact.py +++ b/Lib/test/test_pyrepl/test_interact.py @@ -119,13 +119,38 @@ def test_runsource_shows_syntax_error_for_failed_compilation(self): def test_no_active_future(self): console = InteractiveColoredConsole() - source = "x: int = 1; print(__annotate__(1))" + source = dedent("""\ + x: int = 1 + print(__annotate__(1)) + """) f = io.StringIO() with contextlib.redirect_stdout(f): result = console.runsource(source) self.assertFalse(result) self.assertEqual(f.getvalue(), "{'x': }\n") + def test_future_annotations(self): + console = InteractiveColoredConsole() + source = dedent("""\ + from __future__ import annotations + def g(x: int): ... + print(g.__annotations__) + """) + f = io.StringIO() + with contextlib.redirect_stdout(f): + result = console.runsource(source) + self.assertFalse(result) + self.assertEqual(f.getvalue(), "{'x': 'int'}\n") + + def test_future_barry_as_flufl(self): + console = InteractiveColoredConsole() + f = io.StringIO() + with contextlib.redirect_stdout(f): + result = console.runsource("from __future__ import barry_as_FLUFL\n") + result = console.runsource("""print("black" <> 'blue')\n""") + self.assertFalse(result) + self.assertEqual(f.getvalue(), "True\n") + class TestMoreLines(unittest.TestCase): def test_invalid_syntax_single_line(self): diff --git a/Misc/NEWS.d/next/Library/2024-10-05-15-49-53.gh-issue-124960.Bol9hT.rst b/Misc/NEWS.d/next/Library/2024-10-05-15-49-53.gh-issue-124960.Bol9hT.rst new file mode 100644 index 000000000000000..332d6bb54d80c74 --- /dev/null +++ b/Misc/NEWS.d/next/Library/2024-10-05-15-49-53.gh-issue-124960.Bol9hT.rst @@ -0,0 +1 @@ +Fix support for the ``barry_as_FLUFL`` future flag in the new REPL. From c77121e9f19702b1ab280299394e38e8f15c0fd3 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?B=C3=A9n=C3=A9dikt=20Tran?= <10796600+picnixz@users.noreply.github.com> Date: Mon, 14 Oct 2024 16:23:05 +0200 Subject: [PATCH 76/80] gh-111178: fix USAN failures for `partialobject` (#124733) --- Modules/_functoolsmodule.c | 59 ++++++++++++++++++++++---------------- 1 file changed, 34 insertions(+), 25 deletions(-) diff --git a/Modules/_functoolsmodule.c b/Modules/_functoolsmodule.c index 4ab3adc0fe44cc0..802b1cf792c5550 100644 --- a/Modules/_functoolsmodule.c +++ b/Modules/_functoolsmodule.c @@ -144,10 +144,13 @@ typedef struct { vectorcallfunc vectorcall; } partialobject; +// cast a PyObject pointer PTR to a partialobject pointer (no type checks) +#define _PyPartialObject_CAST(PTR) ((partialobject *)(PTR)) + static void partial_setvectorcall(partialobject *pto); static struct PyModuleDef _functools_module; static PyObject * -partial_call(partialobject *pto, PyObject *args, PyObject *kwargs); +partial_call(PyObject *pto, PyObject *args, PyObject *kwargs); static inline _functools_state * get_functools_state_by_type(PyTypeObject *type) @@ -307,8 +310,9 @@ partial_new(PyTypeObject *type, PyObject *args, PyObject *kw) } static int -partial_clear(partialobject *pto) +partial_clear(PyObject *self) { + partialobject *pto = _PyPartialObject_CAST(self); Py_CLEAR(pto->fn); Py_CLEAR(pto->args); Py_CLEAR(pto->kw); @@ -317,8 +321,9 @@ partial_clear(partialobject *pto) } static int -partial_traverse(partialobject *pto, visitproc visit, void *arg) +partial_traverse(PyObject *self, visitproc visit, void *arg) { + partialobject *pto = _PyPartialObject_CAST(self); Py_VISIT(Py_TYPE(pto)); Py_VISIT(pto->fn); Py_VISIT(pto->args); @@ -328,16 +333,16 @@ partial_traverse(partialobject *pto, visitproc visit, void *arg) } static void -partial_dealloc(partialobject *pto) +partial_dealloc(PyObject *self) { - PyTypeObject *tp = Py_TYPE(pto); + PyTypeObject *tp = Py_TYPE(self); /* bpo-31095: UnTrack is needed before calling any callbacks */ - PyObject_GC_UnTrack(pto); - if (pto->weakreflist != NULL) { - PyObject_ClearWeakRefs((PyObject *) pto); + PyObject_GC_UnTrack(self); + if (_PyPartialObject_CAST(self)->weakreflist != NULL) { + PyObject_ClearWeakRefs(self); } - (void)partial_clear(pto); - tp->tp_free(pto); + (void)partial_clear(self); + tp->tp_free(self); Py_DECREF(tp); } @@ -360,14 +365,14 @@ partial_vectorcall_fallback(PyThreadState *tstate, partialobject *pto, { pto->vectorcall = NULL; Py_ssize_t nargs = PyVectorcall_NARGS(nargsf); - return _PyObject_MakeTpCall(tstate, (PyObject *)pto, - args, nargs, kwnames); + return _PyObject_MakeTpCall(tstate, (PyObject *)pto, args, nargs, kwnames); } static PyObject * -partial_vectorcall(partialobject *pto, PyObject *const *args, +partial_vectorcall(PyObject *self, PyObject *const *args, size_t nargsf, PyObject *kwnames) { + partialobject *pto = _PyPartialObject_CAST(self);; PyThreadState *tstate = _PyThreadState_GET(); Py_ssize_t nargs = PyVectorcall_NARGS(nargsf); @@ -468,15 +473,16 @@ partial_setvectorcall(partialobject *pto) * but that is unlikely (why use partial without arguments?), * so we don't optimize that */ else { - pto->vectorcall = (vectorcallfunc)partial_vectorcall; + pto->vectorcall = partial_vectorcall; } } // Not converted to argument clinic, because of `*args, **kwargs` arguments. static PyObject * -partial_call(partialobject *pto, PyObject *args, PyObject *kwargs) +partial_call(PyObject *self, PyObject *args, PyObject *kwargs) { + partialobject *pto = _PyPartialObject_CAST(self); assert(PyCallable_Check(pto->fn)); assert(PyTuple_Check(pto->args)); assert(PyDict_Check(pto->kw)); @@ -587,8 +593,9 @@ static PyGetSetDef partial_getsetlist[] = { }; static PyObject * -partial_repr(partialobject *pto) +partial_repr(PyObject *self) { + partialobject *pto = _PyPartialObject_CAST(self); PyObject *result = NULL; PyObject *arglist; PyObject *mod; @@ -597,7 +604,7 @@ partial_repr(partialobject *pto) PyObject *key, *value; int status; - status = Py_ReprEnter((PyObject *)pto); + status = Py_ReprEnter(self); if (status != 0) { if (status < 0) return NULL; @@ -608,7 +615,7 @@ partial_repr(partialobject *pto) if (arglist == NULL) goto done; /* Pack positional arguments */ - assert (PyTuple_Check(pto->args)); + assert(PyTuple_Check(pto->args)); n = PyTuple_GET_SIZE(pto->args); for (i = 0; i < n; i++) { Py_SETREF(arglist, PyUnicode_FromFormat("%U, %R", arglist, @@ -643,11 +650,11 @@ partial_repr(partialobject *pto) Py_DECREF(arglist); done: - Py_ReprLeave((PyObject *)pto); + Py_ReprLeave(self); return result; error: Py_DECREF(arglist); - Py_ReprLeave((PyObject *)pto); + Py_ReprLeave(self); return NULL; } @@ -659,16 +666,18 @@ partial_repr(partialobject *pto) */ static PyObject * -partial_reduce(partialobject *pto, PyObject *unused) +partial_reduce(PyObject *self, PyObject *Py_UNUSED(args)) { + partialobject *pto = _PyPartialObject_CAST(self); return Py_BuildValue("O(O)(OOOO)", Py_TYPE(pto), pto->fn, pto->fn, pto->args, pto->kw, pto->dict ? pto->dict : Py_None); } static PyObject * -partial_setstate(partialobject *pto, PyObject *state) +partial_setstate(PyObject *self, PyObject *state) { + partialobject *pto = _PyPartialObject_CAST(self); PyObject *fn, *fnargs, *kw, *dict; if (!PyTuple_Check(state)) { @@ -730,8 +739,8 @@ partial_setstate(partialobject *pto, PyObject *state) } static PyMethodDef partial_methods[] = { - {"__reduce__", (PyCFunction)partial_reduce, METH_NOARGS}, - {"__setstate__", (PyCFunction)partial_setstate, METH_O}, + {"__reduce__", partial_reduce, METH_NOARGS}, + {"__setstate__", partial_setstate, METH_O}, {"__class_getitem__", Py_GenericAlias, METH_O|METH_CLASS, PyDoc_STR("See PEP 585")}, {NULL, NULL} /* sentinel */ @@ -749,7 +758,7 @@ static PyType_Slot partial_type_slots[] = { {Py_tp_methods, partial_methods}, {Py_tp_members, partial_memberlist}, {Py_tp_getset, partial_getsetlist}, - {Py_tp_descr_get, (descrgetfunc)partial_descr_get}, + {Py_tp_descr_get, partial_descr_get}, {Py_tp_new, partial_new}, {Py_tp_free, PyObject_GC_Del}, {0, 0} From 45df264f3ffbc0893cbfd257131d3abe21043786 Mon Sep 17 00:00:00 2001 From: Mikhail Efimov Date: Mon, 14 Oct 2024 17:53:08 +0300 Subject: [PATCH 77/80] gh-112088: aclocal version is updated to 1.16.5 in docs (#125457) --- Doc/using/configure.rst | 4 ++-- Doc/whatsnew/3.13.rst | 4 ++-- 2 files changed, 4 insertions(+), 4 deletions(-) diff --git a/Doc/using/configure.rst b/Doc/using/configure.rst index 4976418ba33cf86..10cdf2376229ff4 100644 --- a/Doc/using/configure.rst +++ b/Doc/using/configure.rst @@ -29,7 +29,7 @@ Features and minimum versions required to build CPython: * Tcl/Tk 8.5.12 for the :mod:`tkinter` module. -* Autoconf 2.71 and aclocal 1.16.4 are required to regenerate the +* Autoconf 2.71 and aclocal 1.16.5 are required to regenerate the :file:`configure` script. .. versionchanged:: 3.1 @@ -56,7 +56,7 @@ Features and minimum versions required to build CPython: Tcl/Tk version 8.5.12 is now required for the :mod:`tkinter` module. .. versionchanged:: 3.13 - Autoconf 2.71, aclocal 1.16.4 and SQLite 3.15.2 are now required. + Autoconf 2.71, aclocal 1.16.5 and SQLite 3.15.2 are now required. See also :pep:`7` "Style Guide for C Code" and :pep:`11` "CPython platform support". diff --git a/Doc/whatsnew/3.13.rst b/Doc/whatsnew/3.13.rst index a2897097aaba57d..f9e74a9b8ff9c66 100644 --- a/Doc/whatsnew/3.13.rst +++ b/Doc/whatsnew/3.13.rst @@ -2495,9 +2495,9 @@ Build Changes * Building CPython now requires a compiler with support for the C11 atomic library, GCC built-in atomic functions, or MSVC interlocked intrinsics. -* Autoconf 2.71 and aclocal 1.16.4 are now required to regenerate +* Autoconf 2.71 and aclocal 1.16.5 are now required to regenerate the :file:`configure` script. - (Contributed by Christian Heimes in :gh:`89886`.) + (Contributed by Christian Heimes in :gh:`89886` and by Victor Stinner in :gh:`112090`.) * SQLite 3.15.2 or newer is required to build the :mod:`sqlite3` extension module. From d5dbbf4372cd3dbf3eead1cc70ddc4261c061fd9 Mon Sep 17 00:00:00 2001 From: Thomas Grainger Date: Mon, 14 Oct 2024 16:19:56 +0100 Subject: [PATCH 78/80] gh-124958: fix asyncio.TaskGroup and _PyFuture refcycles (#124959) --- Lib/asyncio/futures.py | 6 +- Lib/asyncio/taskgroups.py | 41 +++++++-- Lib/test/test_asyncio/test_futures.py | 22 +++++ Lib/test/test_asyncio/test_taskgroups.py | 92 ++++++++++++++++++- ...-10-04-08-46-00.gh-issue-124958.rea9-x.rst | 1 + 5 files changed, 147 insertions(+), 15 deletions(-) create mode 100644 Misc/NEWS.d/next/Library/2024-10-04-08-46-00.gh-issue-124958.rea9-x.rst diff --git a/Lib/asyncio/futures.py b/Lib/asyncio/futures.py index 5f6fa2348726cfb..c95fce035cd548d 100644 --- a/Lib/asyncio/futures.py +++ b/Lib/asyncio/futures.py @@ -190,8 +190,7 @@ def result(self): the future is done and has an exception set, this exception is raised. """ if self._state == _CANCELLED: - exc = self._make_cancelled_error() - raise exc + raise self._make_cancelled_error() if self._state != _FINISHED: raise exceptions.InvalidStateError('Result is not ready.') self.__log_traceback = False @@ -208,8 +207,7 @@ def exception(self): InvalidStateError. """ if self._state == _CANCELLED: - exc = self._make_cancelled_error() - raise exc + raise self._make_cancelled_error() if self._state != _FINISHED: raise exceptions.InvalidStateError('Exception is not set.') self.__log_traceback = False diff --git a/Lib/asyncio/taskgroups.py b/Lib/asyncio/taskgroups.py index f2ee9648c43876d..9fa772ca9d02cc3 100644 --- a/Lib/asyncio/taskgroups.py +++ b/Lib/asyncio/taskgroups.py @@ -66,6 +66,20 @@ async def __aenter__(self): return self async def __aexit__(self, et, exc, tb): + tb = None + try: + return await self._aexit(et, exc) + finally: + # Exceptions are heavy objects that can have object + # cycles (bad for GC); let's not keep a reference to + # a bunch of them. It would be nicer to use a try/finally + # in __aexit__ directly but that introduced some diff noise + self._parent_task = None + self._errors = None + self._base_error = None + exc = None + + async def _aexit(self, et, exc): self._exiting = True if (exc is not None and @@ -122,7 +136,10 @@ async def __aexit__(self, et, exc, tb): assert not self._tasks if self._base_error is not None: - raise self._base_error + try: + raise self._base_error + finally: + exc = None if self._parent_cancel_requested: # If this flag is set we *must* call uncancel(). @@ -133,8 +150,14 @@ async def __aexit__(self, et, exc, tb): # Propagate CancelledError if there is one, except if there # are other errors -- those have priority. - if propagate_cancellation_error is not None and not self._errors: - raise propagate_cancellation_error + try: + if propagate_cancellation_error is not None and not self._errors: + try: + raise propagate_cancellation_error + finally: + exc = None + finally: + propagate_cancellation_error = None if et is not None and not issubclass(et, exceptions.CancelledError): self._errors.append(exc) @@ -146,14 +169,14 @@ async def __aexit__(self, et, exc, tb): if self._parent_task.cancelling(): self._parent_task.uncancel() self._parent_task.cancel() - # Exceptions are heavy objects that can have object - # cycles (bad for GC); let's not keep a reference to - # a bunch of them. try: - me = BaseExceptionGroup('unhandled errors in a TaskGroup', self._errors) - raise me from None + raise BaseExceptionGroup( + 'unhandled errors in a TaskGroup', + self._errors, + ) from None finally: - self._errors = None + exc = None + def create_task(self, coro, *, name=None, context=None): """Create a new task in this group and return it. diff --git a/Lib/test/test_asyncio/test_futures.py b/Lib/test/test_asyncio/test_futures.py index 458b70451a306a8..c566b28adb24082 100644 --- a/Lib/test/test_asyncio/test_futures.py +++ b/Lib/test/test_asyncio/test_futures.py @@ -659,6 +659,28 @@ def __del__(self): fut = self._new_future(loop=self.loop) fut.set_result(Evil()) + def test_future_cancelled_result_refcycles(self): + f = self._new_future(loop=self.loop) + f.cancel() + exc = None + try: + f.result() + except asyncio.CancelledError as e: + exc = e + self.assertIsNotNone(exc) + self.assertListEqual(gc.get_referrers(exc), []) + + def test_future_cancelled_exception_refcycles(self): + f = self._new_future(loop=self.loop) + f.cancel() + exc = None + try: + f.exception() + except asyncio.CancelledError as e: + exc = e + self.assertIsNotNone(exc) + self.assertListEqual(gc.get_referrers(exc), []) + @unittest.skipUnless(hasattr(futures, '_CFuture'), 'requires the C _asyncio module') diff --git a/Lib/test/test_asyncio/test_taskgroups.py b/Lib/test/test_asyncio/test_taskgroups.py index 4852536defc93d2..138f59ebf57ef71 100644 --- a/Lib/test/test_asyncio/test_taskgroups.py +++ b/Lib/test/test_asyncio/test_taskgroups.py @@ -1,7 +1,7 @@ # Adapted with permission from the EdgeDB project; # license: PSFL. - +import gc import asyncio import contextvars import contextlib @@ -11,7 +11,6 @@ from test.test_asyncio.utils import await_without_task - # To prevent a warning "test altered the execution environment" def tearDownModule(): asyncio.set_event_loop_policy(None) @@ -899,6 +898,95 @@ async def outer(): await outer() + async def test_exception_refcycles_direct(self): + """Test that TaskGroup doesn't keep a reference to the raised ExceptionGroup""" + tg = asyncio.TaskGroup() + exc = None + + class _Done(Exception): + pass + + try: + async with tg: + raise _Done + except ExceptionGroup as e: + exc = e + + self.assertIsNotNone(exc) + self.assertListEqual(gc.get_referrers(exc), []) + + + async def test_exception_refcycles_errors(self): + """Test that TaskGroup deletes self._errors, and __aexit__ args""" + tg = asyncio.TaskGroup() + exc = None + + class _Done(Exception): + pass + + try: + async with tg: + raise _Done + except* _Done as excs: + exc = excs.exceptions[0] + + self.assertIsInstance(exc, _Done) + self.assertListEqual(gc.get_referrers(exc), []) + + + async def test_exception_refcycles_parent_task(self): + """Test that TaskGroup deletes self._parent_task""" + tg = asyncio.TaskGroup() + exc = None + + class _Done(Exception): + pass + + async def coro_fn(): + async with tg: + raise _Done + + try: + async with asyncio.TaskGroup() as tg2: + tg2.create_task(coro_fn()) + except* _Done as excs: + exc = excs.exceptions[0].exceptions[0] + + self.assertIsInstance(exc, _Done) + self.assertListEqual(gc.get_referrers(exc), []) + + async def test_exception_refcycles_propagate_cancellation_error(self): + """Test that TaskGroup deletes propagate_cancellation_error""" + tg = asyncio.TaskGroup() + exc = None + + try: + async with asyncio.timeout(-1): + async with tg: + await asyncio.sleep(0) + except TimeoutError as e: + exc = e.__cause__ + + self.assertIsInstance(exc, asyncio.CancelledError) + self.assertListEqual(gc.get_referrers(exc), []) + + async def test_exception_refcycles_base_error(self): + """Test that TaskGroup deletes self._base_error""" + class MyKeyboardInterrupt(KeyboardInterrupt): + pass + + tg = asyncio.TaskGroup() + exc = None + + try: + async with tg: + raise MyKeyboardInterrupt + except MyKeyboardInterrupt as e: + exc = e + + self.assertIsNotNone(exc) + self.assertListEqual(gc.get_referrers(exc), []) + if __name__ == "__main__": unittest.main() diff --git a/Misc/NEWS.d/next/Library/2024-10-04-08-46-00.gh-issue-124958.rea9-x.rst b/Misc/NEWS.d/next/Library/2024-10-04-08-46-00.gh-issue-124958.rea9-x.rst new file mode 100644 index 000000000000000..534d5bb8c898da8 --- /dev/null +++ b/Misc/NEWS.d/next/Library/2024-10-04-08-46-00.gh-issue-124958.rea9-x.rst @@ -0,0 +1 @@ +Fix refcycles in exceptions raised from :class:`asyncio.TaskGroup` and the python implementation of :class:`asyncio.Future` From 5dac0dceda9097d46a0b5a6ad7c927e002c6c7a5 Mon Sep 17 00:00:00 2001 From: Paul Hoffman Date: Mon, 14 Oct 2024 08:26:57 -0700 Subject: [PATCH 79/80] gh-125461: Remove Python 2 from identifiers in doc (GH-125462) Remove Python 2 from identifiers in doc --- Doc/reference/lexical_analysis.rst | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/Doc/reference/lexical_analysis.rst b/Doc/reference/lexical_analysis.rst index ae5408ee386bbd3..f7167032ad7df98 100644 --- a/Doc/reference/lexical_analysis.rst +++ b/Doc/reference/lexical_analysis.rst @@ -284,11 +284,10 @@ UAX-31, with elaboration and changes as defined below; see also :pep:`3131` for further details. Within the ASCII range (U+0001..U+007F), the valid characters for identifiers -are the same as in Python 2.x: the uppercase and lowercase letters ``A`` through +include the uppercase and lowercase letters ``A`` through ``Z``, the underscore ``_`` and, except for the first character, the digits ``0`` through ``9``. - -Python 3.0 introduces additional characters from outside the ASCII range (see +Python 3.0 introduced additional characters from outside the ASCII range (see :pep:`3131`). For these characters, the classification uses the version of the Unicode Character Database as included in the :mod:`unicodedata` module. From 187580d95c8339a3b6e2b012f98d86101c346cfa Mon Sep 17 00:00:00 2001 From: Mikhail Efimov Date: Mon, 14 Oct 2024 20:24:54 +0300 Subject: [PATCH 80/80] gh-119786: [doc] broken link and typo fix in interpreter_definition.md (#125455) --- InternalDocs/README.md | 2 ++ Tools/cases_generator/interpreter_definition.md | 5 +++-- 2 files changed, 5 insertions(+), 2 deletions(-) diff --git a/InternalDocs/README.md b/InternalDocs/README.md index 805e2f97937e1eb..0a6ecf899458ed5 100644 --- a/InternalDocs/README.md +++ b/InternalDocs/README.md @@ -11,6 +11,8 @@ The core dev team attempts to keep this documentation up to date. If it is not, please report that through the [issue tracker](https://github.com/python/cpython/issues). +Index: +----- [Guide to the parser](parser.md) diff --git a/Tools/cases_generator/interpreter_definition.md b/Tools/cases_generator/interpreter_definition.md index ba09931c5416460..6cf36f343d5fa72 100644 --- a/Tools/cases_generator/interpreter_definition.md +++ b/Tools/cases_generator/interpreter_definition.md @@ -74,7 +74,7 @@ We update it as the need arises. ### Syntax Each op definition has a kind, a name, a stack and instruction stream effect, -and a piece of C code describing its semantics:: +and a piece of C code describing its semantics: ``` file: @@ -245,7 +245,8 @@ The same is true for all members of a pseudo instruction ## Examples -(Another source of examples can be found in the [tests](test_generator.py).) +(Another source of examples can be found in the +[tests](https://github.com/python/cpython/blob/main/Lib/test/test_generated_cases.py).) Some examples: