From c806cd5af677c385470001efc68da38a32919196 Mon Sep 17 00:00:00 2001 From: sobolevn Date: Mon, 4 Nov 2024 19:18:21 +0300 Subject: [PATCH 01/54] gh-126220: Adapt `_lsprof` to Argument Clinic (#126233) Co-authored-by: Erlend E. Aasland --- .../pycore_global_objects_fini_generated.h | 3 + Include/internal/pycore_global_strings.h | 3 + .../internal/pycore_runtime_init_generated.h | 3 + .../internal/pycore_unicodeobject_generated.h | 12 + Modules/_lsprof.c | 251 ++++++------ Modules/clinic/_lsprof.c.h | 362 +++++++++++++++++- 6 files changed, 517 insertions(+), 117 deletions(-) diff --git a/Include/internal/pycore_global_objects_fini_generated.h b/Include/internal/pycore_global_objects_fini_generated.h index 2fd7d5d13a98b29..e4f0138e17edfa3 100644 --- a/Include/internal/pycore_global_objects_fini_generated.h +++ b/Include/internal/pycore_global_objects_fini_generated.h @@ -1231,6 +1231,7 @@ _PyStaticObjects_CheckRefcnt(PyInterpreterState *interp) { _PyStaticObject_CheckRefcnt((PyObject *)&_Py_ID(strict_mode)); _PyStaticObject_CheckRefcnt((PyObject *)&_Py_ID(string)); _PyStaticObject_CheckRefcnt((PyObject *)&_Py_ID(sub_key)); + _PyStaticObject_CheckRefcnt((PyObject *)&_Py_ID(subcalls)); _PyStaticObject_CheckRefcnt((PyObject *)&_Py_ID(symmetric_difference_update)); _PyStaticObject_CheckRefcnt((PyObject *)&_Py_ID(tabsize)); _PyStaticObject_CheckRefcnt((PyObject *)&_Py_ID(tag)); @@ -1248,8 +1249,10 @@ _PyStaticObjects_CheckRefcnt(PyInterpreterState *interp) { _PyStaticObject_CheckRefcnt((PyObject *)&_Py_ID(threading)); _PyStaticObject_CheckRefcnt((PyObject *)&_Py_ID(throw)); _PyStaticObject_CheckRefcnt((PyObject *)&_Py_ID(timeout)); + _PyStaticObject_CheckRefcnt((PyObject *)&_Py_ID(timer)); _PyStaticObject_CheckRefcnt((PyObject *)&_Py_ID(times)); _PyStaticObject_CheckRefcnt((PyObject *)&_Py_ID(timetuple)); + _PyStaticObject_CheckRefcnt((PyObject *)&_Py_ID(timeunit)); _PyStaticObject_CheckRefcnt((PyObject *)&_Py_ID(top)); _PyStaticObject_CheckRefcnt((PyObject *)&_Py_ID(trace_callback)); _PyStaticObject_CheckRefcnt((PyObject *)&_Py_ID(traceback)); diff --git a/Include/internal/pycore_global_strings.h b/Include/internal/pycore_global_strings.h index fc3871570cc49d9..e70f11e2a26cd52 100644 --- a/Include/internal/pycore_global_strings.h +++ b/Include/internal/pycore_global_strings.h @@ -720,6 +720,7 @@ struct _Py_global_strings { STRUCT_FOR_ID(strict_mode) STRUCT_FOR_ID(string) STRUCT_FOR_ID(sub_key) + STRUCT_FOR_ID(subcalls) STRUCT_FOR_ID(symmetric_difference_update) STRUCT_FOR_ID(tabsize) STRUCT_FOR_ID(tag) @@ -737,8 +738,10 @@ struct _Py_global_strings { STRUCT_FOR_ID(threading) STRUCT_FOR_ID(throw) STRUCT_FOR_ID(timeout) + STRUCT_FOR_ID(timer) STRUCT_FOR_ID(times) STRUCT_FOR_ID(timetuple) + STRUCT_FOR_ID(timeunit) STRUCT_FOR_ID(top) STRUCT_FOR_ID(trace_callback) STRUCT_FOR_ID(traceback) diff --git a/Include/internal/pycore_runtime_init_generated.h b/Include/internal/pycore_runtime_init_generated.h index 3b80e265b0ca50a..5d404c8fd91ca6f 100644 --- a/Include/internal/pycore_runtime_init_generated.h +++ b/Include/internal/pycore_runtime_init_generated.h @@ -1229,6 +1229,7 @@ extern "C" { INIT_ID(strict_mode), \ INIT_ID(string), \ INIT_ID(sub_key), \ + INIT_ID(subcalls), \ INIT_ID(symmetric_difference_update), \ INIT_ID(tabsize), \ INIT_ID(tag), \ @@ -1246,8 +1247,10 @@ extern "C" { INIT_ID(threading), \ INIT_ID(throw), \ INIT_ID(timeout), \ + INIT_ID(timer), \ INIT_ID(times), \ INIT_ID(timetuple), \ + INIT_ID(timeunit), \ INIT_ID(top), \ INIT_ID(trace_callback), \ INIT_ID(traceback), \ diff --git a/Include/internal/pycore_unicodeobject_generated.h b/Include/internal/pycore_unicodeobject_generated.h index eb2eca06ec4d4f6..d0bc8d7186c0532 100644 --- a/Include/internal/pycore_unicodeobject_generated.h +++ b/Include/internal/pycore_unicodeobject_generated.h @@ -2676,6 +2676,10 @@ _PyUnicode_InitStaticStrings(PyInterpreterState *interp) { _PyUnicode_InternStatic(interp, &string); assert(_PyUnicode_CheckConsistency(string, 1)); assert(PyUnicode_GET_LENGTH(string) != 1); + string = &_Py_ID(subcalls); + _PyUnicode_InternStatic(interp, &string); + assert(_PyUnicode_CheckConsistency(string, 1)); + assert(PyUnicode_GET_LENGTH(string) != 1); string = &_Py_ID(symmetric_difference_update); _PyUnicode_InternStatic(interp, &string); assert(_PyUnicode_CheckConsistency(string, 1)); @@ -2744,6 +2748,10 @@ _PyUnicode_InitStaticStrings(PyInterpreterState *interp) { _PyUnicode_InternStatic(interp, &string); assert(_PyUnicode_CheckConsistency(string, 1)); assert(PyUnicode_GET_LENGTH(string) != 1); + string = &_Py_ID(timer); + _PyUnicode_InternStatic(interp, &string); + assert(_PyUnicode_CheckConsistency(string, 1)); + assert(PyUnicode_GET_LENGTH(string) != 1); string = &_Py_ID(times); _PyUnicode_InternStatic(interp, &string); assert(_PyUnicode_CheckConsistency(string, 1)); @@ -2752,6 +2760,10 @@ _PyUnicode_InitStaticStrings(PyInterpreterState *interp) { _PyUnicode_InternStatic(interp, &string); assert(_PyUnicode_CheckConsistency(string, 1)); assert(PyUnicode_GET_LENGTH(string) != 1); + string = &_Py_ID(timeunit); + _PyUnicode_InternStatic(interp, &string); + assert(_PyUnicode_CheckConsistency(string, 1)); + assert(PyUnicode_GET_LENGTH(string) != 1); string = &_Py_ID(top); _PyUnicode_InternStatic(interp, &string); assert(_PyUnicode_CheckConsistency(string, 1)); diff --git a/Modules/_lsprof.c b/Modules/_lsprof.c index 06958a078509d91..4f996c7230e16d5 100644 --- a/Modules/_lsprof.c +++ b/Modules/_lsprof.c @@ -606,29 +606,42 @@ setBuiltins(ProfilerObject *pObj, int nvalue) return 0; } -PyObject* pystart_callback(ProfilerObject* self, PyObject *const *args, Py_ssize_t size) +/*[clinic input] +_lsprof.Profiler._pystart_callback + + code: object + instruction_offset: object + / + +[clinic start generated code]*/ + +static PyObject * +_lsprof_Profiler__pystart_callback_impl(ProfilerObject *self, PyObject *code, + PyObject *instruction_offset) +/*[clinic end generated code: output=5fec8b7ad5ed25e8 input=b166e6953c579cda]*/ { - if (size < 2) { - PyErr_Format(PyExc_TypeError, - "_pystart_callback expected 2 arguments, got %zd", - size); - return NULL; - } - PyObject* code = args[0]; - ptrace_enter_call((PyObject*)self, (void *)code, (PyObject *)code); + ptrace_enter_call((PyObject*)self, (void *)code, code); Py_RETURN_NONE; } -PyObject* pyreturn_callback(ProfilerObject* self, PyObject *const *args, Py_ssize_t size) +/*[clinic input] +_lsprof.Profiler._pyreturn_callback + + code: object + instruction_offset: object + retval: object + / + +[clinic start generated code]*/ + +static PyObject * +_lsprof_Profiler__pyreturn_callback_impl(ProfilerObject *self, + PyObject *code, + PyObject *instruction_offset, + PyObject *retval) +/*[clinic end generated code: output=9e2f6fc1b882c51e input=667ffaeb2fa6fd1f]*/ { - if (size < 3) { - PyErr_Format(PyExc_TypeError, - "_pyreturn_callback expected 3 arguments, got %zd", - size); - return NULL; - } - PyObject* code = args[0]; ptrace_leave_call((PyObject*)self, (void *)code); Py_RETURN_NONE; @@ -661,18 +674,24 @@ PyObject* get_cfunc_from_callable(PyObject* callable, PyObject* self_arg, PyObje return NULL; } -PyObject* ccall_callback(ProfilerObject* self, PyObject *const *args, Py_ssize_t size) +/*[clinic input] +_lsprof.Profiler._ccall_callback + + code: object + instruction_offset: object + callable: object + self_arg: object + / + +[clinic start generated code]*/ + +static PyObject * +_lsprof_Profiler__ccall_callback_impl(ProfilerObject *self, PyObject *code, + PyObject *instruction_offset, + PyObject *callable, PyObject *self_arg) +/*[clinic end generated code: output=152db83cabd18cad input=0e66687cfb95c001]*/ { - if (size < 4) { - PyErr_Format(PyExc_TypeError, - "_ccall_callback expected 4 arguments, got %zd", - size); - return NULL; - } if (self->flags & POF_BUILTINS) { - PyObject* callable = args[2]; - PyObject* self_arg = args[3]; - PyObject* cfunc = get_cfunc_from_callable(callable, self_arg, self->missing); if (cfunc) { @@ -685,18 +704,25 @@ PyObject* ccall_callback(ProfilerObject* self, PyObject *const *args, Py_ssize_t Py_RETURN_NONE; } -PyObject* creturn_callback(ProfilerObject* self, PyObject *const *args, Py_ssize_t size) +/*[clinic input] +_lsprof.Profiler._creturn_callback + + code: object + instruction_offset: object + callable: object + self_arg: object + / + +[clinic start generated code]*/ + +static PyObject * +_lsprof_Profiler__creturn_callback_impl(ProfilerObject *self, PyObject *code, + PyObject *instruction_offset, + PyObject *callable, + PyObject *self_arg) +/*[clinic end generated code: output=1e886dde8fed8fb0 input=b18afe023746923a]*/ { - if (size < 4) { - PyErr_Format(PyExc_TypeError, - "_creturn_callback expected 4 arguments, got %zd", - size); - return NULL; - } if (self->flags & POF_BUILTINS) { - PyObject* callable = args[2]; - PyObject* self_arg = args[3]; - PyObject* cfunc = get_cfunc_from_callable(callable, self_arg, self->missing); if (cfunc) { @@ -724,27 +750,27 @@ static const struct { {0, NULL} }; -PyDoc_STRVAR(enable_doc, "\ -enable(subcalls=True, builtins=True)\n\ -\n\ -Start collecting profiling information.\n\ -If 'subcalls' is True, also records for each function\n\ -statistics separated according to its current caller.\n\ -If 'builtins' is True, records the time spent in\n\ -built-in functions separately from their caller.\n\ -"); - -static PyObject* -profiler_enable(ProfilerObject *self, PyObject *args, PyObject *kwds) + +/*[clinic input] +_lsprof.Profiler.enable + + subcalls: bool = True + If True, also records for each function + statistics separated according to its current caller. + + builtins: bool = True + If True, records the time spent in + built-in functions separately from their caller. + +Start collecting profiling information. +[clinic start generated code]*/ + +static PyObject * +_lsprof_Profiler_enable_impl(ProfilerObject *self, int subcalls, + int builtins) +/*[clinic end generated code: output=1e747f9dc1edd571 input=9ab81405107ab7f1]*/ { - int subcalls = -1; - int builtins = -1; - static char *kwlist[] = {"subcalls", "builtins", 0}; int all_events = 0; - - if (!PyArg_ParseTupleAndKeywords(args, kwds, "|pp:enable", - kwlist, &subcalls, &builtins)) - return NULL; if (setSubcalls(self, subcalls) < 0 || setBuiltins(self, builtins) < 0) { return NULL; } @@ -800,14 +826,16 @@ flush_unmatched(ProfilerObject *pObj) } -PyDoc_STRVAR(disable_doc, "\ -disable()\n\ -\n\ -Stop collecting profiling information.\n\ -"); -static PyObject* -profiler_disable(ProfilerObject *self, PyObject* noarg) +/*[clinic input] +_lsprof.Profiler.disable + +Stop collecting profiling information. +[clinic start generated code]*/ + +static PyObject * +_lsprof_Profiler_disable_impl(ProfilerObject *self) +/*[clinic end generated code: output=838cffef7f651870 input=05700b3fc68d1f50]*/ { if (self->flags & POF_EXT_TIMER) { PyErr_SetString(PyExc_RuntimeError, @@ -858,21 +886,22 @@ profiler_disable(ProfilerObject *self, PyObject* noarg) Py_RETURN_NONE; } -PyDoc_STRVAR(clear_doc, "\ -clear()\n\ -\n\ -Clear all profiling information collected so far.\n\ -"); +/*[clinic input] +_lsprof.Profiler.clear + +Clear all profiling information collected so far. +[clinic start generated code]*/ -static PyObject* -profiler_clear(ProfilerObject *pObj, PyObject* noarg) +static PyObject * +_lsprof_Profiler_clear_impl(ProfilerObject *self) +/*[clinic end generated code: output=dd1c668fb84b1335 input=fbe1f88c28be4f98]*/ { - if (pObj->flags & POF_EXT_TIMER) { + if (self->flags & POF_EXT_TIMER) { PyErr_SetString(PyExc_RuntimeError, "cannot clear profiler in external timer"); return NULL; } - clearEntries(pObj); + clearEntries(self); Py_RETURN_NONE; } @@ -903,33 +932,40 @@ profiler_dealloc(ProfilerObject *op) Py_DECREF(tp); } +/*[clinic input] +_lsprof.Profiler.__init__ as profiler_init + + timer: object(c_default='NULL') = None + timeunit: double = 0.0 + subcalls: bool = True + builtins: bool = True + +Build a profiler object using the specified timer function. + +The default timer is a fast built-in one based on real time. +For custom timer functions returning integers, 'timeunit' can +be a float specifying a scale (that is, how long each integer unit +is, in seconds). +[clinic start generated code]*/ + static int -profiler_init(ProfilerObject *pObj, PyObject *args, PyObject *kw) +profiler_init_impl(ProfilerObject *self, PyObject *timer, double timeunit, + int subcalls, int builtins) +/*[clinic end generated code: output=ac523803ec9f9df2 input=8285ca746f96a414]*/ { - PyObject *timer = NULL; - double timeunit = 0.0; - int subcalls = 1; - int builtins = 1; - static char *kwlist[] = {"timer", "timeunit", - "subcalls", "builtins", 0}; - - if (!PyArg_ParseTupleAndKeywords(args, kw, "|Odpp:Profiler", kwlist, - &timer, &timeunit, - &subcalls, &builtins)) - return -1; - - if (setSubcalls(pObj, subcalls) < 0 || setBuiltins(pObj, builtins) < 0) + if (setSubcalls(self, subcalls) < 0 || setBuiltins(self, builtins) < 0) { return -1; - pObj->externalTimerUnit = timeunit; - Py_XSETREF(pObj->externalTimer, Py_XNewRef(timer)); - pObj->tool_id = PY_MONITORING_PROFILER_ID; + } + self->externalTimerUnit = timeunit; + Py_XSETREF(self->externalTimer, Py_XNewRef(timer)); + self->tool_id = PY_MONITORING_PROFILER_ID; PyObject* monitoring = _PyImport_GetModuleAttrString("sys", "monitoring"); if (!monitoring) { return -1; } - pObj->missing = PyObject_GetAttrString(monitoring, "MISSING"); - if (!pObj->missing) { + self->missing = PyObject_GetAttrString(monitoring, "MISSING"); + if (!self->missing) { Py_DECREF(monitoring); return -1; } @@ -939,35 +975,18 @@ profiler_init(ProfilerObject *pObj, PyObject *args, PyObject *kw) static PyMethodDef profiler_methods[] = { _LSPROF_PROFILER_GETSTATS_METHODDEF - {"enable", _PyCFunction_CAST(profiler_enable), - METH_VARARGS | METH_KEYWORDS, enable_doc}, - {"disable", (PyCFunction)profiler_disable, - METH_NOARGS, disable_doc}, - {"clear", (PyCFunction)profiler_clear, - METH_NOARGS, clear_doc}, - {"_pystart_callback", _PyCFunction_CAST(pystart_callback), - METH_FASTCALL, NULL}, - {"_pyreturn_callback", _PyCFunction_CAST(pyreturn_callback), - METH_FASTCALL, NULL}, - {"_ccall_callback", _PyCFunction_CAST(ccall_callback), - METH_FASTCALL, NULL}, - {"_creturn_callback", _PyCFunction_CAST(creturn_callback), - METH_FASTCALL, NULL}, + _LSPROF_PROFILER_ENABLE_METHODDEF + _LSPROF_PROFILER_DISABLE_METHODDEF + _LSPROF_PROFILER_CLEAR_METHODDEF + _LSPROF_PROFILER__PYSTART_CALLBACK_METHODDEF + _LSPROF_PROFILER__PYRETURN_CALLBACK_METHODDEF + _LSPROF_PROFILER__CCALL_CALLBACK_METHODDEF + _LSPROF_PROFILER__CRETURN_CALLBACK_METHODDEF {NULL, NULL} }; -PyDoc_STRVAR(profiler_doc, "\ -Profiler(timer=None, timeunit=None, subcalls=True, builtins=True)\n\ -\n\ - Builds a profiler object using the specified timer function.\n\ - The default timer is a fast built-in one based on real time.\n\ - For custom timer functions returning integers, timeunit can\n\ - be a float specifying a scale (i.e. how long each integer unit\n\ - is, in seconds).\n\ -"); - static PyType_Slot _lsprof_profiler_type_spec_slots[] = { - {Py_tp_doc, (void *)profiler_doc}, + {Py_tp_doc, (void *)profiler_init__doc__}, {Py_tp_methods, profiler_methods}, {Py_tp_dealloc, profiler_dealloc}, {Py_tp_init, profiler_init}, diff --git a/Modules/clinic/_lsprof.c.h b/Modules/clinic/_lsprof.c.h index b3b7fda5660bfd5..234cc9ef3c0eaf9 100644 --- a/Modules/clinic/_lsprof.c.h +++ b/Modules/clinic/_lsprof.c.h @@ -2,6 +2,12 @@ preserve [clinic start generated code]*/ +#if defined(Py_BUILD_CORE) && !defined(Py_BUILD_CORE_MODULE) +# include "pycore_gc.h" // PyGC_Head +# include "pycore_runtime.h" // _Py_ID() +#endif +#include "pycore_modsupport.h" // _PyArg_CheckPositional() + PyDoc_STRVAR(_lsprof_Profiler_getstats__doc__, "getstats($self, /)\n" "--\n" @@ -45,4 +51,358 @@ _lsprof_Profiler_getstats(ProfilerObject *self, PyTypeObject *cls, PyObject *con } return _lsprof_Profiler_getstats_impl(self, cls); } -/*[clinic end generated code: output=5c9d87d89863dc83 input=a9049054013a1b77]*/ + +PyDoc_STRVAR(_lsprof_Profiler__pystart_callback__doc__, +"_pystart_callback($self, code, instruction_offset, /)\n" +"--\n" +"\n"); + +#define _LSPROF_PROFILER__PYSTART_CALLBACK_METHODDEF \ + {"_pystart_callback", _PyCFunction_CAST(_lsprof_Profiler__pystart_callback), METH_FASTCALL, _lsprof_Profiler__pystart_callback__doc__}, + +static PyObject * +_lsprof_Profiler__pystart_callback_impl(ProfilerObject *self, PyObject *code, + PyObject *instruction_offset); + +static PyObject * +_lsprof_Profiler__pystart_callback(ProfilerObject *self, PyObject *const *args, Py_ssize_t nargs) +{ + PyObject *return_value = NULL; + PyObject *code; + PyObject *instruction_offset; + + if (!_PyArg_CheckPositional("_pystart_callback", nargs, 2, 2)) { + goto exit; + } + code = args[0]; + instruction_offset = args[1]; + return_value = _lsprof_Profiler__pystart_callback_impl(self, code, instruction_offset); + +exit: + return return_value; +} + +PyDoc_STRVAR(_lsprof_Profiler__pyreturn_callback__doc__, +"_pyreturn_callback($self, code, instruction_offset, retval, /)\n" +"--\n" +"\n"); + +#define _LSPROF_PROFILER__PYRETURN_CALLBACK_METHODDEF \ + {"_pyreturn_callback", _PyCFunction_CAST(_lsprof_Profiler__pyreturn_callback), METH_FASTCALL, _lsprof_Profiler__pyreturn_callback__doc__}, + +static PyObject * +_lsprof_Profiler__pyreturn_callback_impl(ProfilerObject *self, + PyObject *code, + PyObject *instruction_offset, + PyObject *retval); + +static PyObject * +_lsprof_Profiler__pyreturn_callback(ProfilerObject *self, PyObject *const *args, Py_ssize_t nargs) +{ + PyObject *return_value = NULL; + PyObject *code; + PyObject *instruction_offset; + PyObject *retval; + + if (!_PyArg_CheckPositional("_pyreturn_callback", nargs, 3, 3)) { + goto exit; + } + code = args[0]; + instruction_offset = args[1]; + retval = args[2]; + return_value = _lsprof_Profiler__pyreturn_callback_impl(self, code, instruction_offset, retval); + +exit: + return return_value; +} + +PyDoc_STRVAR(_lsprof_Profiler__ccall_callback__doc__, +"_ccall_callback($self, code, instruction_offset, callable, self_arg, /)\n" +"--\n" +"\n"); + +#define _LSPROF_PROFILER__CCALL_CALLBACK_METHODDEF \ + {"_ccall_callback", _PyCFunction_CAST(_lsprof_Profiler__ccall_callback), METH_FASTCALL, _lsprof_Profiler__ccall_callback__doc__}, + +static PyObject * +_lsprof_Profiler__ccall_callback_impl(ProfilerObject *self, PyObject *code, + PyObject *instruction_offset, + PyObject *callable, PyObject *self_arg); + +static PyObject * +_lsprof_Profiler__ccall_callback(ProfilerObject *self, PyObject *const *args, Py_ssize_t nargs) +{ + PyObject *return_value = NULL; + PyObject *code; + PyObject *instruction_offset; + PyObject *callable; + PyObject *self_arg; + + if (!_PyArg_CheckPositional("_ccall_callback", nargs, 4, 4)) { + goto exit; + } + code = args[0]; + instruction_offset = args[1]; + callable = args[2]; + self_arg = args[3]; + return_value = _lsprof_Profiler__ccall_callback_impl(self, code, instruction_offset, callable, self_arg); + +exit: + return return_value; +} + +PyDoc_STRVAR(_lsprof_Profiler__creturn_callback__doc__, +"_creturn_callback($self, code, instruction_offset, callable, self_arg,\n" +" /)\n" +"--\n" +"\n"); + +#define _LSPROF_PROFILER__CRETURN_CALLBACK_METHODDEF \ + {"_creturn_callback", _PyCFunction_CAST(_lsprof_Profiler__creturn_callback), METH_FASTCALL, _lsprof_Profiler__creturn_callback__doc__}, + +static PyObject * +_lsprof_Profiler__creturn_callback_impl(ProfilerObject *self, PyObject *code, + PyObject *instruction_offset, + PyObject *callable, + PyObject *self_arg); + +static PyObject * +_lsprof_Profiler__creturn_callback(ProfilerObject *self, PyObject *const *args, Py_ssize_t nargs) +{ + PyObject *return_value = NULL; + PyObject *code; + PyObject *instruction_offset; + PyObject *callable; + PyObject *self_arg; + + if (!_PyArg_CheckPositional("_creturn_callback", nargs, 4, 4)) { + goto exit; + } + code = args[0]; + instruction_offset = args[1]; + callable = args[2]; + self_arg = args[3]; + return_value = _lsprof_Profiler__creturn_callback_impl(self, code, instruction_offset, callable, self_arg); + +exit: + return return_value; +} + +PyDoc_STRVAR(_lsprof_Profiler_enable__doc__, +"enable($self, /, subcalls=True, builtins=True)\n" +"--\n" +"\n" +"Start collecting profiling information.\n" +"\n" +" subcalls\n" +" If True, also records for each function\n" +" statistics separated according to its current caller.\n" +" builtins\n" +" If True, records the time spent in\n" +" built-in functions separately from their caller."); + +#define _LSPROF_PROFILER_ENABLE_METHODDEF \ + {"enable", _PyCFunction_CAST(_lsprof_Profiler_enable), METH_FASTCALL|METH_KEYWORDS, _lsprof_Profiler_enable__doc__}, + +static PyObject * +_lsprof_Profiler_enable_impl(ProfilerObject *self, int subcalls, + int builtins); + +static PyObject * +_lsprof_Profiler_enable(ProfilerObject *self, PyObject *const *args, Py_ssize_t nargs, PyObject *kwnames) +{ + PyObject *return_value = NULL; + #if defined(Py_BUILD_CORE) && !defined(Py_BUILD_CORE_MODULE) + + #define NUM_KEYWORDS 2 + static struct { + PyGC_Head _this_is_not_used; + PyObject_VAR_HEAD + PyObject *ob_item[NUM_KEYWORDS]; + } _kwtuple = { + .ob_base = PyVarObject_HEAD_INIT(&PyTuple_Type, NUM_KEYWORDS) + .ob_item = { &_Py_ID(subcalls), &_Py_ID(builtins), }, + }; + #undef NUM_KEYWORDS + #define KWTUPLE (&_kwtuple.ob_base.ob_base) + + #else // !Py_BUILD_CORE + # define KWTUPLE NULL + #endif // !Py_BUILD_CORE + + static const char * const _keywords[] = {"subcalls", "builtins", NULL}; + static _PyArg_Parser _parser = { + .keywords = _keywords, + .fname = "enable", + .kwtuple = KWTUPLE, + }; + #undef KWTUPLE + PyObject *argsbuf[2]; + Py_ssize_t noptargs = nargs + (kwnames ? PyTuple_GET_SIZE(kwnames) : 0) - 0; + int subcalls = 1; + int builtins = 1; + + args = _PyArg_UnpackKeywords(args, nargs, NULL, kwnames, &_parser, 0, 2, 0, argsbuf); + if (!args) { + goto exit; + } + if (!noptargs) { + goto skip_optional_pos; + } + if (args[0]) { + subcalls = PyObject_IsTrue(args[0]); + if (subcalls < 0) { + goto exit; + } + if (!--noptargs) { + goto skip_optional_pos; + } + } + builtins = PyObject_IsTrue(args[1]); + if (builtins < 0) { + goto exit; + } +skip_optional_pos: + return_value = _lsprof_Profiler_enable_impl(self, subcalls, builtins); + +exit: + return return_value; +} + +PyDoc_STRVAR(_lsprof_Profiler_disable__doc__, +"disable($self, /)\n" +"--\n" +"\n" +"Stop collecting profiling information."); + +#define _LSPROF_PROFILER_DISABLE_METHODDEF \ + {"disable", (PyCFunction)_lsprof_Profiler_disable, METH_NOARGS, _lsprof_Profiler_disable__doc__}, + +static PyObject * +_lsprof_Profiler_disable_impl(ProfilerObject *self); + +static PyObject * +_lsprof_Profiler_disable(ProfilerObject *self, PyObject *Py_UNUSED(ignored)) +{ + return _lsprof_Profiler_disable_impl(self); +} + +PyDoc_STRVAR(_lsprof_Profiler_clear__doc__, +"clear($self, /)\n" +"--\n" +"\n" +"Clear all profiling information collected so far."); + +#define _LSPROF_PROFILER_CLEAR_METHODDEF \ + {"clear", (PyCFunction)_lsprof_Profiler_clear, METH_NOARGS, _lsprof_Profiler_clear__doc__}, + +static PyObject * +_lsprof_Profiler_clear_impl(ProfilerObject *self); + +static PyObject * +_lsprof_Profiler_clear(ProfilerObject *self, PyObject *Py_UNUSED(ignored)) +{ + return _lsprof_Profiler_clear_impl(self); +} + +PyDoc_STRVAR(profiler_init__doc__, +"Profiler(timer=None, timeunit=0.0, subcalls=True, builtins=True)\n" +"--\n" +"\n" +"Build a profiler object using the specified timer function.\n" +"\n" +"The default timer is a fast built-in one based on real time.\n" +"For custom timer functions returning integers, \'timeunit\' can\n" +"be a float specifying a scale (that is, how long each integer unit\n" +"is, in seconds)."); + +static int +profiler_init_impl(ProfilerObject *self, PyObject *timer, double timeunit, + int subcalls, int builtins); + +static int +profiler_init(PyObject *self, PyObject *args, PyObject *kwargs) +{ + int return_value = -1; + #if defined(Py_BUILD_CORE) && !defined(Py_BUILD_CORE_MODULE) + + #define NUM_KEYWORDS 4 + static struct { + PyGC_Head _this_is_not_used; + PyObject_VAR_HEAD + PyObject *ob_item[NUM_KEYWORDS]; + } _kwtuple = { + .ob_base = PyVarObject_HEAD_INIT(&PyTuple_Type, NUM_KEYWORDS) + .ob_item = { &_Py_ID(timer), &_Py_ID(timeunit), &_Py_ID(subcalls), &_Py_ID(builtins), }, + }; + #undef NUM_KEYWORDS + #define KWTUPLE (&_kwtuple.ob_base.ob_base) + + #else // !Py_BUILD_CORE + # define KWTUPLE NULL + #endif // !Py_BUILD_CORE + + static const char * const _keywords[] = {"timer", "timeunit", "subcalls", "builtins", NULL}; + static _PyArg_Parser _parser = { + .keywords = _keywords, + .fname = "Profiler", + .kwtuple = KWTUPLE, + }; + #undef KWTUPLE + PyObject *argsbuf[4]; + PyObject * const *fastargs; + Py_ssize_t nargs = PyTuple_GET_SIZE(args); + Py_ssize_t noptargs = nargs + (kwargs ? PyDict_GET_SIZE(kwargs) : 0) - 0; + PyObject *timer = NULL; + double timeunit = 0.0; + int subcalls = 1; + int builtins = 1; + + fastargs = _PyArg_UnpackKeywords(_PyTuple_CAST(args)->ob_item, nargs, kwargs, NULL, &_parser, 0, 4, 0, argsbuf); + if (!fastargs) { + goto exit; + } + if (!noptargs) { + goto skip_optional_pos; + } + if (fastargs[0]) { + timer = fastargs[0]; + if (!--noptargs) { + goto skip_optional_pos; + } + } + if (fastargs[1]) { + if (PyFloat_CheckExact(fastargs[1])) { + timeunit = PyFloat_AS_DOUBLE(fastargs[1]); + } + else + { + timeunit = PyFloat_AsDouble(fastargs[1]); + if (timeunit == -1.0 && PyErr_Occurred()) { + goto exit; + } + } + if (!--noptargs) { + goto skip_optional_pos; + } + } + if (fastargs[2]) { + subcalls = PyObject_IsTrue(fastargs[2]); + if (subcalls < 0) { + goto exit; + } + if (!--noptargs) { + goto skip_optional_pos; + } + } + builtins = PyObject_IsTrue(fastargs[3]); + if (builtins < 0) { + goto exit; + } +skip_optional_pos: + return_value = profiler_init_impl((ProfilerObject *)self, timer, timeunit, subcalls, builtins); + +exit: + return return_value; +} +/*[clinic end generated code: output=0b71f52bee9a7bb1 input=a9049054013a1b77]*/ From eac41c5ddfadf52fbd84ee898ad56aedd5d90a41 Mon Sep 17 00:00:00 2001 From: Hugo van Kemenade <1324225+hugovk@users.noreply.github.com> Date: Mon, 4 Nov 2024 18:49:59 +0200 Subject: [PATCH 02/54] gh-101865: Docs: Keep co_lnotab deprecation for at least 3.14 (#126392) --- Doc/deprecations/pending-removal-in-3.14.rst | 7 ------- Doc/deprecations/pending-removal-in-3.15.rst | 9 +++++++++ Doc/reference/datamodel.rst | 2 +- Doc/whatsnew/3.12.rst | 4 ++-- 4 files changed, 12 insertions(+), 10 deletions(-) diff --git a/Doc/deprecations/pending-removal-in-3.14.rst b/Doc/deprecations/pending-removal-in-3.14.rst index 0863853339b8b52..1904465b8565069 100644 --- a/Doc/deprecations/pending-removal-in-3.14.rst +++ b/Doc/deprecations/pending-removal-in-3.14.rst @@ -103,13 +103,6 @@ Pending removal in Python 3.14 if :ref:`named placeholders ` are used and *parameters* is a sequence instead of a :class:`dict`. -* :class:`types.CodeType`: Accessing :attr:`~codeobject.co_lnotab` was - deprecated in :pep:`626` - since 3.10 and was planned to be removed in 3.12, - but it only got a proper :exc:`DeprecationWarning` in 3.12. - May be removed in 3.14. - (Contributed by Nikita Sobolev in :gh:`101866`.) - * :mod:`typing`: :class:`!typing.ByteString`, deprecated since Python 3.9, now causes a :exc:`DeprecationWarning` to be emitted when it is used. diff --git a/Doc/deprecations/pending-removal-in-3.15.rst b/Doc/deprecations/pending-removal-in-3.15.rst index 17029b8d4773bdb..3b03e1f49e6754a 100644 --- a/Doc/deprecations/pending-removal-in-3.15.rst +++ b/Doc/deprecations/pending-removal-in-3.15.rst @@ -59,6 +59,15 @@ Pending removal in Python 3.15 but the C version allows any number of positional or keyword arguments, ignoring every argument. +* :mod:`types`: + + * :class:`types.CodeType`: Accessing :attr:`~codeobject.co_lnotab` was + deprecated in :pep:`626` + since 3.10 and was planned to be removed in 3.12, + but it only got a proper :exc:`DeprecationWarning` in 3.12. + May be removed in 3.15. + (Contributed by Nikita Sobolev in :gh:`101866`.) + * :mod:`typing`: * The undocumented keyword argument syntax for creating diff --git a/Doc/reference/datamodel.rst b/Doc/reference/datamodel.rst index dfd1addf656a855..41133b92ed88ec1 100644 --- a/Doc/reference/datamodel.rst +++ b/Doc/reference/datamodel.rst @@ -1507,7 +1507,7 @@ Special read-only attributes .. deprecated:: 3.12 This attribute of code objects is deprecated, and may be removed in - Python 3.14. + Python 3.15. * - .. attribute:: codeobject.co_stacksize - The required stack size of the code object diff --git a/Doc/whatsnew/3.12.rst b/Doc/whatsnew/3.12.rst index 3640095acbaa2b2..d691185cb1ffc57 100644 --- a/Doc/whatsnew/3.12.rst +++ b/Doc/whatsnew/3.12.rst @@ -1327,8 +1327,8 @@ Deprecated * Accessing :attr:`~codeobject.co_lnotab` on code objects was deprecated in Python 3.10 via :pep:`626`, - but it only got a proper :exc:`DeprecationWarning` in 3.12, - therefore it will be removed in 3.14. + but it only got a proper :exc:`DeprecationWarning` in 3.12. + May be removed in 3.15. (Contributed by Nikita Sobolev in :gh:`101866`.) .. include:: ../deprecations/pending-removal-in-3.13.rst From e5a4b402ae55f5eeeb44d3e7bc3f3ec39b249846 Mon Sep 17 00:00:00 2001 From: Mikhail Efimov Date: Mon, 4 Nov 2024 20:28:05 +0300 Subject: [PATCH 03/54] Doc: Fix typo in documentation for ``MAKE_FUNCTION`` opcode (#126396) --- Doc/library/dis.rst | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Doc/library/dis.rst b/Doc/library/dis.rst index cf203a714ba126b..ecbe0fae8cd74cd 100644 --- a/Doc/library/dis.rst +++ b/Doc/library/dis.rst @@ -1562,7 +1562,7 @@ iterations of the loop. .. opcode:: MAKE_FUNCTION - Pushes a new function object on the stack built from the code object at ``STACK[1]``. + Pushes a new function object on the stack built from the code object at ``STACK[-1]``. .. versionchanged:: 3.10 Flag value ``0x04`` is a tuple of strings instead of dictionary From 2e95c5ba3bf7e5004c7e2304afda4a8f8e2443a7 Mon Sep 17 00:00:00 2001 From: mpage Date: Mon, 4 Nov 2024 11:13:32 -0800 Subject: [PATCH 04/54] gh-115999: Implement thread-local bytecode and enable specialization for `BINARY_OP` (#123926) Each thread specializes a thread-local copy of the bytecode, created on the first RESUME, in free-threaded builds. All copies of the bytecode for a code object are stored in the co_tlbc array on the code object. Threads reserve a globally unique index identifying its copy of the bytecode in all co_tlbc arrays at thread creation and release the index at thread destruction. The first entry in every co_tlbc array always points to the "main" copy of the bytecode that is stored at the end of the code object. This ensures that no bytecode is copied for programs that do not use threads. Thread-local bytecode can be disabled at runtime by providing either -X tlbc=0 or PYTHON_TLBC=0. Disabling thread-local bytecode also disables specialization. Concurrent modifications to the bytecode made by the specializing interpreter and instrumentation use atomics, with specialization taking care not to overwrite an instruction that was instrumented concurrently. --- Include/cpython/code.h | 19 ++ Include/cpython/initconfig.h | 1 + Include/internal/pycore_ceval.h | 12 + Include/internal/pycore_code.h | 41 ++++ Include/internal/pycore_frame.h | 56 ++++- Include/internal/pycore_gc.h | 4 + Include/internal/pycore_index_pool.h | 56 +++++ Include/internal/pycore_interp.h | 2 + Include/internal/pycore_tstate.h | 4 +- Include/internal/pycore_uop_ids.h | 123 +++++----- Include/internal/pycore_uop_metadata.h | 2 +- Lib/test/support/__init__.py | 5 + Lib/test/test_capi/test_config.py | 1 + Lib/test/test_capi/test_opt.py | 7 +- Lib/test/test_cmd_line.py | 52 ++++ Lib/test/test_dis.py | 8 +- Lib/test/test_embed.py | 1 + Lib/test/test_sys.py | 14 +- Lib/test/test_thread_local_bytecode.py | 198 ++++++++++++++++ Makefile.pre.in | 2 + Modules/_opcode.c | 3 + Modules/_testinternalcapi.c | 46 +++- Objects/codeobject.c | 313 ++++++++++++++++++++++++- Objects/frameobject.c | 14 +- Objects/typeobject.c | 7 +- PCbuild/_freeze_module.vcxproj | 1 + PCbuild/_freeze_module.vcxproj.filters | 3 + PCbuild/pythoncore.vcxproj | 2 + PCbuild/pythoncore.vcxproj.filters | 6 + Python/bytecodes.c | 68 +++--- Python/ceval.c | 23 +- Python/ceval_macros.h | 22 +- Python/executor_cases.c.h | 23 +- Python/frame.c | 3 +- Python/gc_free_threading.c | 12 +- Python/generated_cases.c.h | 100 +++++--- Python/index_pool.c | 193 +++++++++++++++ Python/initconfig.c | 49 +++- Python/instrumentation.c | 159 +++++++------ Python/optimizer_cases.c.h | 2 + Python/pystate.c | 10 + Python/specialize.c | 68 ++++-- Python/sysmodule.c | 5 + Tools/gdb/libpython.py | 23 +- 44 files changed, 1509 insertions(+), 254 deletions(-) create mode 100644 Include/internal/pycore_index_pool.h create mode 100644 Lib/test/test_thread_local_bytecode.py create mode 100644 Python/index_pool.c diff --git a/Include/cpython/code.h b/Include/cpython/code.h index 2561b2b88baacc6..370f1d259abe0f2 100644 --- a/Include/cpython/code.h +++ b/Include/cpython/code.h @@ -72,6 +72,24 @@ typedef struct { uint8_t *per_instruction_tools; } _PyCoMonitoringData; +#ifdef Py_GIL_DISABLED + +/* Each thread specializes a thread-local copy of the bytecode in free-threaded + * builds. These copies are stored on the code object in a `_PyCodeArray`. The + * first entry in the array always points to the "main" copy of the bytecode + * that is stored at the end of the code object. + */ +typedef struct { + Py_ssize_t size; + char *entries[1]; +} _PyCodeArray; + +#define _PyCode_DEF_THREAD_LOCAL_BYTECODE() \ + _PyCodeArray *co_tlbc; +#else +#define _PyCode_DEF_THREAD_LOCAL_BYTECODE() +#endif + // To avoid repeating ourselves in deepfreeze.py, all PyCodeObject members are // defined in this macro: #define _PyCode_DEF(SIZE) { \ @@ -138,6 +156,7 @@ typedef struct { Type is a void* to keep the format private in codeobject.c to force \ people to go through the proper APIs. */ \ void *co_extra; \ + _PyCode_DEF_THREAD_LOCAL_BYTECODE() \ char co_code_adaptive[(SIZE)]; \ } diff --git a/Include/cpython/initconfig.h b/Include/cpython/initconfig.h index c2cb4e3cdd92fb0..f69c586a4f96f38 100644 --- a/Include/cpython/initconfig.h +++ b/Include/cpython/initconfig.h @@ -183,6 +183,7 @@ typedef struct PyConfig { int cpu_count; #ifdef Py_GIL_DISABLED int enable_gil; + int tlbc_enabled; #endif /* --- Path configuration inputs ------------ */ diff --git a/Include/internal/pycore_ceval.h b/Include/internal/pycore_ceval.h index 411bbff106dd698..80bd19a887871cf 100644 --- a/Include/internal/pycore_ceval.h +++ b/Include/internal/pycore_ceval.h @@ -174,6 +174,18 @@ _PyEval_IsGILEnabled(PyThreadState *tstate) extern int _PyEval_EnableGILTransient(PyThreadState *tstate); extern int _PyEval_EnableGILPermanent(PyThreadState *tstate); extern int _PyEval_DisableGIL(PyThreadState *state); + + +static inline _Py_CODEUNIT * +_PyEval_GetExecutableCode(PyThreadState *tstate, PyCodeObject *co) +{ + _Py_CODEUNIT *bc = _PyCode_GetTLBCFast(tstate, co); + if (bc != NULL) { + return bc; + } + return _PyCode_GetTLBC(co); +} + #endif extern void _PyEval_DeactivateOpCache(void); diff --git a/Include/internal/pycore_code.h b/Include/internal/pycore_code.h index 57e0a14bb9b5bdf..a0acf76db6f04d1 100644 --- a/Include/internal/pycore_code.h +++ b/Include/internal/pycore_code.h @@ -11,6 +11,7 @@ extern "C" { #include "pycore_stackref.h" // _PyStackRef #include "pycore_lock.h" // PyMutex #include "pycore_backoff.h" // _Py_BackoffCounter +#include "pycore_tstate.h" // _PyThreadStateImpl /* Each instruction in a code object is a fixed-width value, @@ -313,11 +314,17 @@ extern int _PyLineTable_PreviousAddressRange(PyCodeAddressRange *range); /** API for executors */ extern void _PyCode_Clear_Executors(PyCodeObject *code); + #ifdef Py_GIL_DISABLED // gh-115999 tracks progress on addressing this. #define ENABLE_SPECIALIZATION 0 +// Use this to enable specialization families once they are thread-safe. All +// uses will be replaced with ENABLE_SPECIALIZATION once all families are +// thread-safe. +#define ENABLE_SPECIALIZATION_FT 1 #else #define ENABLE_SPECIALIZATION 1 +#define ENABLE_SPECIALIZATION_FT ENABLE_SPECIALIZATION #endif /* Specialization functions */ @@ -600,6 +607,40 @@ struct _PyCode8 _PyCode_DEF(8); PyAPI_DATA(const struct _PyCode8) _Py_InitCleanup; +#ifdef Py_GIL_DISABLED + +// Return a pointer to the thread-local bytecode for the current thread, if it +// exists. +static inline _Py_CODEUNIT * +_PyCode_GetTLBCFast(PyThreadState *tstate, PyCodeObject *co) +{ + _PyCodeArray *code = _Py_atomic_load_ptr_acquire(&co->co_tlbc); + int32_t idx = ((_PyThreadStateImpl*) tstate)->tlbc_index; + if (idx < code->size && code->entries[idx] != NULL) { + return (_Py_CODEUNIT *) code->entries[idx]; + } + return NULL; +} + +// Return a pointer to the thread-local bytecode for the current thread, +// creating it if necessary. +extern _Py_CODEUNIT *_PyCode_GetTLBC(PyCodeObject *co); + +// Reserve an index for the current thread into thread-local bytecode +// arrays +// +// Returns the reserved index or -1 on error. +extern int32_t _Py_ReserveTLBCIndex(PyInterpreterState *interp); + +// Release the current thread's index into thread-local bytecode arrays +extern void _Py_ClearTLBCIndex(_PyThreadStateImpl *tstate); + +// Free all TLBC copies not associated with live threads. +// +// Returns 0 on success or -1 on error. +extern int _Py_ClearUnusedTLBC(PyInterpreterState *interp); +#endif + #ifdef __cplusplus } #endif diff --git a/Include/internal/pycore_frame.h b/Include/internal/pycore_frame.h index c9ac3819d0390b2..8c0100390d036ee 100644 --- a/Include/internal/pycore_frame.h +++ b/Include/internal/pycore_frame.h @@ -68,6 +68,10 @@ typedef struct _PyInterpreterFrame { PyObject *f_locals; /* Strong reference, may be NULL. Only valid if not on C stack */ PyFrameObject *frame_obj; /* Strong reference, may be NULL. Only valid if not on C stack */ _Py_CODEUNIT *instr_ptr; /* Instruction currently executing (or about to begin) */ +#ifdef Py_GIL_DISABLED + /* Index of thread-local bytecode containing instr_ptr. */ + int32_t tlbc_index; +#endif _PyStackRef *stackpointer; uint16_t return_offset; /* Only relevant during a function call */ char owner; @@ -76,7 +80,7 @@ typedef struct _PyInterpreterFrame { } _PyInterpreterFrame; #define _PyInterpreterFrame_LASTI(IF) \ - ((int)((IF)->instr_ptr - _PyCode_CODE(_PyFrame_GetCode(IF)))) + ((int)((IF)->instr_ptr - _PyFrame_GetBytecode((IF)))) static inline PyCodeObject *_PyFrame_GetCode(_PyInterpreterFrame *f) { PyObject *executable = PyStackRef_AsPyObjectBorrow(f->f_executable); @@ -84,6 +88,19 @@ static inline PyCodeObject *_PyFrame_GetCode(_PyInterpreterFrame *f) { return (PyCodeObject *)executable; } +static inline _Py_CODEUNIT * +_PyFrame_GetBytecode(_PyInterpreterFrame *f) +{ +#ifdef Py_GIL_DISABLED + PyCodeObject *co = _PyFrame_GetCode(f); + _PyCodeArray *tlbc = _Py_atomic_load_ptr_acquire(&co->co_tlbc); + assert(f->tlbc_index >= 0 && f->tlbc_index < tlbc->size); + return (_Py_CODEUNIT *)tlbc->entries[f->tlbc_index]; +#else + return _PyCode_CODE(_PyFrame_GetCode(f)); +#endif +} + static inline PyFunctionObject *_PyFrame_GetFunction(_PyInterpreterFrame *f) { PyObject *func = PyStackRef_AsPyObjectBorrow(f->f_funcobj); assert(PyFunction_Check(func)); @@ -144,13 +161,33 @@ static inline void _PyFrame_Copy(_PyInterpreterFrame *src, _PyInterpreterFrame * #endif } +#ifdef Py_GIL_DISABLED +static inline void +_PyFrame_InitializeTLBC(PyThreadState *tstate, _PyInterpreterFrame *frame, + PyCodeObject *code) +{ + _Py_CODEUNIT *tlbc = _PyCode_GetTLBCFast(tstate, code); + if (tlbc == NULL) { + // No thread-local bytecode exists for this thread yet; use the main + // thread's copy, deferring thread-local bytecode creation to the + // execution of RESUME. + frame->instr_ptr = _PyCode_CODE(code); + frame->tlbc_index = 0; + } + else { + frame->instr_ptr = tlbc; + frame->tlbc_index = ((_PyThreadStateImpl *)tstate)->tlbc_index; + } +} +#endif + /* Consumes reference to func and locals. Does not initialize frame->previous, which happens when frame is linked into the frame stack. */ static inline void _PyFrame_Initialize( - _PyInterpreterFrame *frame, _PyStackRef func, + PyThreadState *tstate, _PyInterpreterFrame *frame, _PyStackRef func, PyObject *locals, PyCodeObject *code, int null_locals_from, _PyInterpreterFrame *previous) { frame->previous = previous; @@ -162,7 +199,12 @@ _PyFrame_Initialize( frame->f_locals = locals; frame->stackpointer = frame->localsplus + code->co_nlocalsplus; frame->frame_obj = NULL; +#ifdef Py_GIL_DISABLED + _PyFrame_InitializeTLBC(tstate, frame, code); +#else + (void)tstate; frame->instr_ptr = _PyCode_CODE(code); +#endif frame->return_offset = 0; frame->owner = FRAME_OWNED_BY_THREAD; @@ -224,7 +266,8 @@ _PyFrame_IsIncomplete(_PyInterpreterFrame *frame) return true; } return frame->owner != FRAME_OWNED_BY_GENERATOR && - frame->instr_ptr < _PyCode_CODE(_PyFrame_GetCode(frame)) + _PyFrame_GetCode(frame)->_co_firsttraceable; + frame->instr_ptr < _PyFrame_GetBytecode(frame) + + _PyFrame_GetCode(frame)->_co_firsttraceable; } static inline _PyInterpreterFrame * @@ -315,7 +358,8 @@ _PyFrame_PushUnchecked(PyThreadState *tstate, _PyStackRef func, int null_locals_ _PyInterpreterFrame *new_frame = (_PyInterpreterFrame *)tstate->datastack_top; tstate->datastack_top += code->co_framesize; assert(tstate->datastack_top < tstate->datastack_limit); - _PyFrame_Initialize(new_frame, func, NULL, code, null_locals_from, previous); + _PyFrame_Initialize(tstate, new_frame, func, NULL, code, null_locals_from, + previous); return new_frame; } @@ -339,7 +383,11 @@ _PyFrame_PushTrampolineUnchecked(PyThreadState *tstate, PyCodeObject *code, int assert(stackdepth <= code->co_stacksize); frame->stackpointer = frame->localsplus + code->co_nlocalsplus + stackdepth; frame->frame_obj = NULL; +#ifdef Py_GIL_DISABLED + _PyFrame_InitializeTLBC(tstate, frame, code); +#else frame->instr_ptr = _PyCode_CODE(code); +#endif frame->owner = FRAME_OWNED_BY_THREAD; frame->return_offset = 0; diff --git a/Include/internal/pycore_gc.h b/Include/internal/pycore_gc.h index b85957df5a6b9f4..38a1c56c09d9db2 100644 --- a/Include/internal/pycore_gc.h +++ b/Include/internal/pycore_gc.h @@ -389,6 +389,10 @@ extern int _PyGC_VisitStackRef(union _PyStackRef *ref, visitproc visit, void *ar } \ } while (0) +#ifdef Py_GIL_DISABLED +extern void _PyGC_VisitObjectsWorldStopped(PyInterpreterState *interp, + gcvisitobjects_t callback, void *arg); +#endif #ifdef __cplusplus } diff --git a/Include/internal/pycore_index_pool.h b/Include/internal/pycore_index_pool.h new file mode 100644 index 000000000000000..e81bfd4d6ed03dd --- /dev/null +++ b/Include/internal/pycore_index_pool.h @@ -0,0 +1,56 @@ +#ifndef Py_INTERNAL_INDEX_POOL_H +#define Py_INTERNAL_INDEX_POOL_H + +#include "Python.h" + +#ifdef __cplusplus +extern "C" { +#endif + +#ifndef Py_BUILD_CORE +# error "this header requires Py_BUILD_CORE define" +#endif + +#ifdef Py_GIL_DISABLED + +// This contains code for allocating unique indices in an array. It is used by +// the free-threaded build to assign each thread a globally unique index into +// each code object's thread-local bytecode array. + +// A min-heap of indices +typedef struct _PyIndexHeap { + int32_t *values; + + // Number of items stored in values + Py_ssize_t size; + + // Maximum number of items that can be stored in values + Py_ssize_t capacity; +} _PyIndexHeap; + +// An unbounded pool of indices. Indices are allocated starting from 0. They +// may be released back to the pool once they are no longer in use. +typedef struct _PyIndexPool { + PyMutex mutex; + + // Min heap of indices available for allocation + _PyIndexHeap free_indices; + + // Next index to allocate if no free indices are available + int32_t next_index; +} _PyIndexPool; + +// Allocate the smallest available index. Returns -1 on error. +extern int32_t _PyIndexPool_AllocIndex(_PyIndexPool *indices); + +// Release `index` back to the pool +extern void _PyIndexPool_FreeIndex(_PyIndexPool *indices, int32_t index); + +extern void _PyIndexPool_Fini(_PyIndexPool *indices); + +#endif // Py_GIL_DISABLED + +#ifdef __cplusplus +} +#endif +#endif // !Py_INTERNAL_INDEX_POOL_H diff --git a/Include/internal/pycore_interp.h b/Include/internal/pycore_interp.h index 36cd71e5a007d54..9e3b4299693bbc6 100644 --- a/Include/internal/pycore_interp.h +++ b/Include/internal/pycore_interp.h @@ -26,6 +26,7 @@ extern "C" { #include "pycore_genobject.h" // _PyGen_FetchStopIterationValue #include "pycore_global_objects.h"// struct _Py_interp_cached_objects #include "pycore_import.h" // struct _import_state +#include "pycore_index_pool.h" // _PyIndexPool #include "pycore_instruments.h" // _PY_MONITORING_EVENTS #include "pycore_list.h" // struct _Py_list_state #include "pycore_mimalloc.h" // struct _mimalloc_interp_state @@ -222,6 +223,7 @@ struct _is { struct _brc_state brc; // biased reference counting state struct _Py_unique_id_pool unique_ids; // object ids for per-thread refcounts PyMutex weakref_locks[NUM_WEAKREF_LIST_LOCKS]; + _PyIndexPool tlbc_indices; #endif // Per-interpreter state for the obmalloc allocator. For the main diff --git a/Include/internal/pycore_tstate.h b/Include/internal/pycore_tstate.h index e0e7d5ebf0912cf..b8bea72baeaaf53 100644 --- a/Include/internal/pycore_tstate.h +++ b/Include/internal/pycore_tstate.h @@ -42,6 +42,9 @@ typedef struct _PyThreadStateImpl { int is_finalized; } refcounts; + // Index to use to retrieve thread-local bytecode for this thread + int32_t tlbc_index; + // When >1, code objects do not immortalize their non-string constants. int suppress_co_const_immortalization; #endif @@ -52,7 +55,6 @@ typedef struct _PyThreadStateImpl { } _PyThreadStateImpl; - #ifdef __cplusplus } #endif diff --git a/Include/internal/pycore_uop_ids.h b/Include/internal/pycore_uop_ids.h index de628d240d1c072..55416d2aae1e1a1 100644 --- a/Include/internal/pycore_uop_ids.h +++ b/Include/internal/pycore_uop_ids.h @@ -193,106 +193,107 @@ extern "C" { #define _LOAD_ATTR_SLOT_1 423 #define _LOAD_ATTR_WITH_HINT 424 #define _LOAD_BUILD_CLASS LOAD_BUILD_CLASS +#define _LOAD_BYTECODE 425 #define _LOAD_COMMON_CONSTANT LOAD_COMMON_CONSTANT #define _LOAD_CONST LOAD_CONST #define _LOAD_CONST_IMMORTAL LOAD_CONST_IMMORTAL -#define _LOAD_CONST_INLINE 425 -#define _LOAD_CONST_INLINE_BORROW 426 -#define _LOAD_CONST_INLINE_BORROW_WITH_NULL 427 -#define _LOAD_CONST_INLINE_WITH_NULL 428 +#define _LOAD_CONST_INLINE 426 +#define _LOAD_CONST_INLINE_BORROW 427 +#define _LOAD_CONST_INLINE_BORROW_WITH_NULL 428 +#define _LOAD_CONST_INLINE_WITH_NULL 429 #define _LOAD_DEREF LOAD_DEREF -#define _LOAD_FAST 429 -#define _LOAD_FAST_0 430 -#define _LOAD_FAST_1 431 -#define _LOAD_FAST_2 432 -#define _LOAD_FAST_3 433 -#define _LOAD_FAST_4 434 -#define _LOAD_FAST_5 435 -#define _LOAD_FAST_6 436 -#define _LOAD_FAST_7 437 +#define _LOAD_FAST 430 +#define _LOAD_FAST_0 431 +#define _LOAD_FAST_1 432 +#define _LOAD_FAST_2 433 +#define _LOAD_FAST_3 434 +#define _LOAD_FAST_4 435 +#define _LOAD_FAST_5 436 +#define _LOAD_FAST_6 437 +#define _LOAD_FAST_7 438 #define _LOAD_FAST_AND_CLEAR LOAD_FAST_AND_CLEAR #define _LOAD_FAST_CHECK LOAD_FAST_CHECK #define _LOAD_FAST_LOAD_FAST LOAD_FAST_LOAD_FAST #define _LOAD_FROM_DICT_OR_DEREF LOAD_FROM_DICT_OR_DEREF #define _LOAD_FROM_DICT_OR_GLOBALS LOAD_FROM_DICT_OR_GLOBALS -#define _LOAD_GLOBAL 438 -#define _LOAD_GLOBAL_BUILTINS 439 -#define _LOAD_GLOBAL_BUILTINS_FROM_KEYS 440 -#define _LOAD_GLOBAL_MODULE 441 -#define _LOAD_GLOBAL_MODULE_FROM_KEYS 442 +#define _LOAD_GLOBAL 439 +#define _LOAD_GLOBAL_BUILTINS 440 +#define _LOAD_GLOBAL_BUILTINS_FROM_KEYS 441 +#define _LOAD_GLOBAL_MODULE 442 +#define _LOAD_GLOBAL_MODULE_FROM_KEYS 443 #define _LOAD_LOCALS LOAD_LOCALS #define _LOAD_NAME LOAD_NAME -#define _LOAD_SMALL_INT 443 -#define _LOAD_SMALL_INT_0 444 -#define _LOAD_SMALL_INT_1 445 -#define _LOAD_SMALL_INT_2 446 -#define _LOAD_SMALL_INT_3 447 +#define _LOAD_SMALL_INT 444 +#define _LOAD_SMALL_INT_0 445 +#define _LOAD_SMALL_INT_1 446 +#define _LOAD_SMALL_INT_2 447 +#define _LOAD_SMALL_INT_3 448 #define _LOAD_SPECIAL LOAD_SPECIAL #define _LOAD_SUPER_ATTR_ATTR LOAD_SUPER_ATTR_ATTR #define _LOAD_SUPER_ATTR_METHOD LOAD_SUPER_ATTR_METHOD -#define _MAKE_CALLARGS_A_TUPLE 448 +#define _MAKE_CALLARGS_A_TUPLE 449 #define _MAKE_CELL MAKE_CELL #define _MAKE_FUNCTION MAKE_FUNCTION -#define _MAKE_WARM 449 +#define _MAKE_WARM 450 #define _MAP_ADD MAP_ADD #define _MATCH_CLASS MATCH_CLASS #define _MATCH_KEYS MATCH_KEYS #define _MATCH_MAPPING MATCH_MAPPING #define _MATCH_SEQUENCE MATCH_SEQUENCE -#define _MAYBE_EXPAND_METHOD 450 -#define _MAYBE_EXPAND_METHOD_KW 451 -#define _MONITOR_CALL 452 -#define _MONITOR_JUMP_BACKWARD 453 -#define _MONITOR_RESUME 454 +#define _MAYBE_EXPAND_METHOD 451 +#define _MAYBE_EXPAND_METHOD_KW 452 +#define _MONITOR_CALL 453 +#define _MONITOR_JUMP_BACKWARD 454 +#define _MONITOR_RESUME 455 #define _NOP NOP #define _POP_EXCEPT POP_EXCEPT -#define _POP_JUMP_IF_FALSE 455 -#define _POP_JUMP_IF_TRUE 456 +#define _POP_JUMP_IF_FALSE 456 +#define _POP_JUMP_IF_TRUE 457 #define _POP_TOP POP_TOP -#define _POP_TOP_LOAD_CONST_INLINE_BORROW 457 +#define _POP_TOP_LOAD_CONST_INLINE_BORROW 458 #define _PUSH_EXC_INFO PUSH_EXC_INFO -#define _PUSH_FRAME 458 +#define _PUSH_FRAME 459 #define _PUSH_NULL PUSH_NULL -#define _PY_FRAME_GENERAL 459 -#define _PY_FRAME_KW 460 -#define _QUICKEN_RESUME 461 -#define _REPLACE_WITH_TRUE 462 +#define _PY_FRAME_GENERAL 460 +#define _PY_FRAME_KW 461 +#define _QUICKEN_RESUME 462 +#define _REPLACE_WITH_TRUE 463 #define _RESUME_CHECK RESUME_CHECK #define _RETURN_GENERATOR RETURN_GENERATOR #define _RETURN_VALUE RETURN_VALUE -#define _SAVE_RETURN_OFFSET 463 -#define _SEND 464 -#define _SEND_GEN_FRAME 465 +#define _SAVE_RETURN_OFFSET 464 +#define _SEND 465 +#define _SEND_GEN_FRAME 466 #define _SETUP_ANNOTATIONS SETUP_ANNOTATIONS #define _SET_ADD SET_ADD #define _SET_FUNCTION_ATTRIBUTE SET_FUNCTION_ATTRIBUTE #define _SET_UPDATE SET_UPDATE -#define _START_EXECUTOR 466 -#define _STORE_ATTR 467 -#define _STORE_ATTR_INSTANCE_VALUE 468 -#define _STORE_ATTR_SLOT 469 -#define _STORE_ATTR_WITH_HINT 470 +#define _START_EXECUTOR 467 +#define _STORE_ATTR 468 +#define _STORE_ATTR_INSTANCE_VALUE 469 +#define _STORE_ATTR_SLOT 470 +#define _STORE_ATTR_WITH_HINT 471 #define _STORE_DEREF STORE_DEREF -#define _STORE_FAST 471 -#define _STORE_FAST_0 472 -#define _STORE_FAST_1 473 -#define _STORE_FAST_2 474 -#define _STORE_FAST_3 475 -#define _STORE_FAST_4 476 -#define _STORE_FAST_5 477 -#define _STORE_FAST_6 478 -#define _STORE_FAST_7 479 +#define _STORE_FAST 472 +#define _STORE_FAST_0 473 +#define _STORE_FAST_1 474 +#define _STORE_FAST_2 475 +#define _STORE_FAST_3 476 +#define _STORE_FAST_4 477 +#define _STORE_FAST_5 478 +#define _STORE_FAST_6 479 +#define _STORE_FAST_7 480 #define _STORE_FAST_LOAD_FAST STORE_FAST_LOAD_FAST #define _STORE_FAST_STORE_FAST STORE_FAST_STORE_FAST #define _STORE_GLOBAL STORE_GLOBAL #define _STORE_NAME STORE_NAME -#define _STORE_SLICE 480 -#define _STORE_SUBSCR 481 +#define _STORE_SLICE 481 +#define _STORE_SUBSCR 482 #define _STORE_SUBSCR_DICT STORE_SUBSCR_DICT #define _STORE_SUBSCR_LIST_INT STORE_SUBSCR_LIST_INT #define _SWAP SWAP -#define _TIER2_RESUME_CHECK 482 -#define _TO_BOOL 483 +#define _TIER2_RESUME_CHECK 483 +#define _TO_BOOL 484 #define _TO_BOOL_BOOL TO_BOOL_BOOL #define _TO_BOOL_INT TO_BOOL_INT #define _TO_BOOL_LIST TO_BOOL_LIST @@ -302,13 +303,13 @@ extern "C" { #define _UNARY_NEGATIVE UNARY_NEGATIVE #define _UNARY_NOT UNARY_NOT #define _UNPACK_EX UNPACK_EX -#define _UNPACK_SEQUENCE 484 +#define _UNPACK_SEQUENCE 485 #define _UNPACK_SEQUENCE_LIST UNPACK_SEQUENCE_LIST #define _UNPACK_SEQUENCE_TUPLE UNPACK_SEQUENCE_TUPLE #define _UNPACK_SEQUENCE_TWO_TUPLE UNPACK_SEQUENCE_TWO_TUPLE #define _WITH_EXCEPT_START WITH_EXCEPT_START #define _YIELD_VALUE YIELD_VALUE -#define MAX_UOP_ID 484 +#define MAX_UOP_ID 485 #ifdef __cplusplus } diff --git a/Include/internal/pycore_uop_metadata.h b/Include/internal/pycore_uop_metadata.h index 4cfdecec78b0db4..ade297201f0ac29 100644 --- a/Include/internal/pycore_uop_metadata.h +++ b/Include/internal/pycore_uop_metadata.h @@ -289,7 +289,7 @@ const uint16_t _PyUop_Flags[MAX_UOP_ID+1] = { [_FATAL_ERROR] = 0, [_CHECK_VALIDITY_AND_SET_IP] = HAS_DEOPT_FLAG, [_DEOPT] = 0, - [_ERROR_POP_N] = HAS_ARG_FLAG, + [_ERROR_POP_N] = HAS_ARG_FLAG | HAS_ESCAPES_FLAG, [_TIER2_RESUME_CHECK] = HAS_DEOPT_FLAG, }; diff --git a/Lib/test/support/__init__.py b/Lib/test/support/__init__.py index 7c1ef42a4970d77..2ad267e3e08f0f0 100644 --- a/Lib/test/support/__init__.py +++ b/Lib/test/support/__init__.py @@ -1274,6 +1274,11 @@ def requires_specialization(test): _opcode.ENABLE_SPECIALIZATION, "requires specialization")(test) +def requires_specialization_ft(test): + return unittest.skipUnless( + _opcode.ENABLE_SPECIALIZATION_FT, "requires specialization")(test) + + #======================================================================= # Check for the presence of docstrings. diff --git a/Lib/test/test_capi/test_config.py b/Lib/test/test_capi/test_config.py index 71fb9ae45c7c306..77730ad2f320851 100644 --- a/Lib/test/test_capi/test_config.py +++ b/Lib/test/test_capi/test_config.py @@ -100,6 +100,7 @@ def test_config_get(self): options.append(("run_presite", str | None, None)) if sysconfig.get_config_var('Py_GIL_DISABLED'): options.append(("enable_gil", int, None)) + options.append(("tlbc_enabled", int, None)) if support.MS_WINDOWS: options.extend(( ("legacy_windows_stdio", bool, None), diff --git a/Lib/test/test_capi/test_opt.py b/Lib/test/test_capi/test_opt.py index f1ab72180d714d6..c352325ff3d08af 100644 --- a/Lib/test/test_capi/test_opt.py +++ b/Lib/test/test_capi/test_opt.py @@ -7,7 +7,8 @@ import _opcode -from test.support import script_helper, requires_specialization, import_helper +from test.support import (script_helper, requires_specialization, + import_helper, Py_GIL_DISABLED) _testinternalcapi = import_helper.import_module("_testinternalcapi") @@ -34,6 +35,7 @@ def clear_executors(func): @requires_specialization +@unittest.skipIf(Py_GIL_DISABLED, "optimizer not yet supported in free-threaded builds") @unittest.skipUnless(hasattr(_testinternalcapi, "get_optimizer"), "Requires optimizer infrastructure") class TestOptimizerAPI(unittest.TestCase): @@ -138,6 +140,7 @@ def get_opnames(ex): @requires_specialization +@unittest.skipIf(Py_GIL_DISABLED, "optimizer not yet supported in free-threaded builds") @unittest.skipUnless(hasattr(_testinternalcapi, "get_optimizer"), "Requires optimizer infrastructure") class TestExecutorInvalidation(unittest.TestCase): @@ -219,6 +222,7 @@ def f(): @requires_specialization +@unittest.skipIf(Py_GIL_DISABLED, "optimizer not yet supported in free-threaded builds") @unittest.skipUnless(hasattr(_testinternalcapi, "get_optimizer"), "Requires optimizer infrastructure") @unittest.skipIf(os.getenv("PYTHON_UOPS_OPTIMIZE") == "0", "Needs uop optimizer to run.") @@ -586,6 +590,7 @@ def testfunc(n): @requires_specialization +@unittest.skipIf(Py_GIL_DISABLED, "optimizer not yet supported in free-threaded builds") @unittest.skipUnless(hasattr(_testinternalcapi, "get_optimizer"), "Requires optimizer infrastructure") @unittest.skipIf(os.getenv("PYTHON_UOPS_OPTIMIZE") == "0", "Needs uop optimizer to run.") diff --git a/Lib/test/test_cmd_line.py b/Lib/test/test_cmd_line.py index eca9adf9a7dcbc5..634efda354407f3 100644 --- a/Lib/test/test_cmd_line.py +++ b/Lib/test/test_cmd_line.py @@ -12,6 +12,7 @@ from test import support from test.support import os_helper from test.support import force_not_colorized +from test.support import threading_helper from test.support.script_helper import ( spawn_python, kill_python, assert_python_ok, assert_python_failure, interpreter_requires_environment @@ -1068,6 +1069,57 @@ def res2int(self, res): out = res.out.strip().decode("utf-8") return tuple(int(i) for i in out.split()) + @unittest.skipUnless(support.Py_GIL_DISABLED, + "PYTHON_TLBC and -X tlbc" + " only supported in Py_GIL_DISABLED builds") + @threading_helper.requires_working_threading() + def test_disable_thread_local_bytecode(self): + code = """if 1: + import threading + def test(x, y): + return x + y + t = threading.Thread(target=test, args=(1,2)) + t.start() + t.join()""" + assert_python_ok("-W", "always", "-X", "tlbc=0", "-c", code) + assert_python_ok("-W", "always", "-c", code, PYTHON_TLBC="0") + + @unittest.skipUnless(support.Py_GIL_DISABLED, + "PYTHON_TLBC and -X tlbc" + " only supported in Py_GIL_DISABLED builds") + @threading_helper.requires_working_threading() + def test_enable_thread_local_bytecode(self): + code = """if 1: + import threading + def test(x, y): + return x + y + t = threading.Thread(target=test, args=(1,2)) + t.start() + t.join()""" + # The functionality of thread-local bytecode is tested more extensively + # in test_thread_local_bytecode + assert_python_ok("-W", "always", "-X", "tlbc=1", "-c", code) + assert_python_ok("-W", "always", "-c", code, PYTHON_TLBC="1") + + @unittest.skipUnless(support.Py_GIL_DISABLED, + "PYTHON_TLBC and -X tlbc" + " only supported in Py_GIL_DISABLED builds") + def test_invalid_thread_local_bytecode(self): + rc, out, err = assert_python_failure("-X", "tlbc") + self.assertIn(b"tlbc=n: n is missing or invalid", err) + rc, out, err = assert_python_failure("-X", "tlbc=foo") + self.assertIn(b"tlbc=n: n is missing or invalid", err) + rc, out, err = assert_python_failure("-X", "tlbc=-1") + self.assertIn(b"tlbc=n: n is missing or invalid", err) + rc, out, err = assert_python_failure("-X", "tlbc=2") + self.assertIn(b"tlbc=n: n is missing or invalid", err) + rc, out, err = assert_python_failure(PYTHON_TLBC="foo") + self.assertIn(b"PYTHON_TLBC=N: N is missing or invalid", err) + rc, out, err = assert_python_failure(PYTHON_TLBC="-1") + self.assertIn(b"PYTHON_TLBC=N: N is missing or invalid", err) + rc, out, err = assert_python_failure(PYTHON_TLBC="2") + self.assertIn(b"PYTHON_TLBC=N: N is missing or invalid", err) + @unittest.skipIf(interpreter_requires_environment(), 'Cannot run -I tests when PYTHON env vars are required.') diff --git a/Lib/test/test_dis.py b/Lib/test/test_dis.py index 3c6570afa50d455..a991c67fca46bea 100644 --- a/Lib/test/test_dis.py +++ b/Lib/test/test_dis.py @@ -10,7 +10,8 @@ import types import unittest from test.support import (captured_stdout, requires_debug_ranges, - requires_specialization, cpython_only) + requires_specialization, requires_specialization_ft, + cpython_only) from test.support.bytecode_helper import BytecodeTestCase import opcode @@ -1261,7 +1262,7 @@ def test_super_instructions(self): self.do_disassembly_compare(got, dis_load_test_quickened_code) @cpython_only - @requires_specialization + @requires_specialization_ft def test_binary_specialize(self): binary_op_quicken = """\ 0 RESUME_CHECK 0 @@ -1281,6 +1282,9 @@ def test_binary_specialize(self): got = self.get_disassembly(co_unicode, adaptive=True) self.do_disassembly_compare(got, binary_op_quicken % "BINARY_OP_ADD_UNICODE 0 (+)") + @cpython_only + @requires_specialization + def test_binary_subscr_specialize(self): binary_subscr_quicken = """\ 0 RESUME_CHECK 0 diff --git a/Lib/test/test_embed.py b/Lib/test/test_embed.py index 5e886b6c8c38ecf..bf861ef06ee2d3f 100644 --- a/Lib/test/test_embed.py +++ b/Lib/test/test_embed.py @@ -644,6 +644,7 @@ class InitConfigTests(EmbeddingTestsMixin, unittest.TestCase): CONFIG_COMPAT['run_presite'] = None if support.Py_GIL_DISABLED: CONFIG_COMPAT['enable_gil'] = -1 + CONFIG_COMPAT['tlbc_enabled'] = GET_DEFAULT_CONFIG if MS_WINDOWS: CONFIG_COMPAT.update({ 'legacy_windows_stdio': False, diff --git a/Lib/test/test_sys.py b/Lib/test/test_sys.py index c0862d7d15f39ec..d839893d2c657eb 100644 --- a/Lib/test/test_sys.py +++ b/Lib/test/test_sys.py @@ -1094,7 +1094,14 @@ def test_getallocatedblocks(self): # While we could imagine a Python session where the number of # multiple buffer objects would exceed the sharing of references, # it is unlikely to happen in a normal test run. - self.assertLess(a, sys.gettotalrefcount()) + # + # In free-threaded builds each code object owns an array of + # pointers to copies of the bytecode. When the number of + # code objects is a large fraction of the total number of + # references, this can cause the total number of allocated + # blocks to exceed the total number of references. + if not support.Py_GIL_DISABLED: + self.assertLess(a, sys.gettotalrefcount()) except AttributeError: # gettotalrefcount() not available pass @@ -1613,7 +1620,10 @@ class C(object): pass def func(): return sys._getframe() x = func() - INTERPRETER_FRAME = '9PhcP' + if support.Py_GIL_DISABLED: + INTERPRETER_FRAME = '10PhcP' + else: + INTERPRETER_FRAME = '9PhcP' check(x, size('3PiccPP' + INTERPRETER_FRAME + 'P')) # function def func(): pass diff --git a/Lib/test/test_thread_local_bytecode.py b/Lib/test/test_thread_local_bytecode.py new file mode 100644 index 000000000000000..7a8809c5ae7697f --- /dev/null +++ b/Lib/test/test_thread_local_bytecode.py @@ -0,0 +1,198 @@ +"""Tests for thread-local bytecode.""" +import dis +import textwrap +import unittest + +from test import support +from test.support import cpython_only, import_helper, requires_specialization_ft +from test.support.script_helper import assert_python_ok +from test.support.threading_helper import requires_working_threading + +# Skip this test if the _testinternalcapi module isn't available +_testinternalcapi = import_helper.import_module("_testinternalcapi") + + +@cpython_only +@requires_working_threading() +@unittest.skipUnless(support.Py_GIL_DISABLED, "only in free-threaded builds") +class TLBCTests(unittest.TestCase): + @requires_specialization_ft + def test_new_threads_start_with_unspecialized_code(self): + code = textwrap.dedent(""" + import dis + import queue + import threading + + from _testinternalcapi import get_tlbc + + def all_opnames(bc): + return {i.opname for i in dis._get_instructions_bytes(bc)} + + def f(a, b, q=None): + if q is not None: + q.put(get_tlbc(f)) + return a + b + + for _ in range(100): + # specialize + f(1, 2) + + q = queue.Queue() + t = threading.Thread(target=f, args=('a', 'b', q)) + t.start() + t.join() + + assert "BINARY_OP_ADD_INT" in all_opnames(get_tlbc(f)) + assert "BINARY_OP_ADD_INT" not in all_opnames(q.get()) + """) + assert_python_ok("-X", "tlbc=1", "-c", code) + + @requires_specialization_ft + def test_threads_specialize_independently(self): + code = textwrap.dedent(""" + import dis + import queue + import threading + + from _testinternalcapi import get_tlbc + + def all_opnames(bc): + return {i.opname for i in dis._get_instructions_bytes(bc)} + + def f(a, b): + return a + b + + def g(a, b, q=None): + for _ in range(100): + f(a, b) + if q is not None: + q.put(get_tlbc(f)) + + # specialize in main thread + g(1, 2) + + # specialize in other thread + q = queue.Queue() + t = threading.Thread(target=g, args=('a', 'b', q)) + t.start() + t.join() + + assert "BINARY_OP_ADD_INT" in all_opnames(get_tlbc(f)) + t_opnames = all_opnames(q.get()) + assert "BINARY_OP_ADD_INT" not in t_opnames + assert "BINARY_OP_ADD_UNICODE" in t_opnames + """) + assert_python_ok("-X", "tlbc=1", "-c", code) + + def test_reuse_tlbc_across_threads_different_lifetimes(self): + code = textwrap.dedent(""" + import queue + import threading + + from _testinternalcapi import get_tlbc_id + + def f(a, b, q=None): + if q is not None: + q.put(get_tlbc_id(f)) + return a + b + + q = queue.Queue() + tlbc_ids = [] + for _ in range(3): + t = threading.Thread(target=f, args=('a', 'b', q)) + t.start() + t.join() + tlbc_ids.append(q.get()) + + assert tlbc_ids[0] == tlbc_ids[1] + assert tlbc_ids[1] == tlbc_ids[2] + """) + assert_python_ok("-X", "tlbc=1", "-c", code) + + def test_no_copies_if_tlbc_disabled(self): + code = textwrap.dedent(""" + import queue + import threading + + from _testinternalcapi import get_tlbc_id + + def f(a, b, q=None): + if q is not None: + q.put(get_tlbc_id(f)) + return a + b + + q = queue.Queue() + threads = [] + for _ in range(3): + t = threading.Thread(target=f, args=('a', 'b', q)) + t.start() + threads.append(t) + + tlbc_ids = [] + for t in threads: + t.join() + tlbc_ids.append(q.get()) + + main_tlbc_id = get_tlbc_id(f) + assert main_tlbc_id is not None + assert tlbc_ids[0] == main_tlbc_id + assert tlbc_ids[1] == main_tlbc_id + assert tlbc_ids[2] == main_tlbc_id + """) + assert_python_ok("-X", "tlbc=0", "-c", code) + + def test_no_specialization_if_tlbc_disabled(self): + code = textwrap.dedent(""" + import dis + import queue + import threading + + from _testinternalcapi import get_tlbc + + def all_opnames(f): + bc = get_tlbc(f) + return {i.opname for i in dis._get_instructions_bytes(bc)} + + def f(a, b): + return a + b + + for _ in range(100): + f(1, 2) + + assert "BINARY_OP_ADD_INT" not in all_opnames(f) + """) + assert_python_ok("-X", "tlbc=0", "-c", code) + + def test_generator_throw(self): + code = textwrap.dedent(""" + import queue + import threading + + from _testinternalcapi import get_tlbc_id + + def g(): + try: + yield + except: + yield get_tlbc_id(g) + + def f(q): + gen = g() + next(gen) + q.put(gen.throw(ValueError)) + + q = queue.Queue() + t = threading.Thread(target=f, args=(q,)) + t.start() + t.join() + + gen = g() + next(gen) + main_id = gen.throw(ValueError) + assert main_id != q.get() + """) + assert_python_ok("-X", "tlbc=1", "-c", code) + + +if __name__ == "__main__": + unittest.main() diff --git a/Makefile.pre.in b/Makefile.pre.in index 1a9191ec0ce48f1..c650ecaf7be1373 100644 --- a/Makefile.pre.in +++ b/Makefile.pre.in @@ -460,6 +460,7 @@ PYTHON_OBJS= \ Python/hashtable.o \ Python/import.o \ Python/importdl.o \ + Python/index_pool.o \ Python/initconfig.o \ Python/interpconfig.o \ Python/instrumentation.o \ @@ -1228,6 +1229,7 @@ PYTHON_HEADERS= \ $(srcdir)/Include/internal/pycore_hashtable.h \ $(srcdir)/Include/internal/pycore_import.h \ $(srcdir)/Include/internal/pycore_importdl.h \ + $(srcdir)/Include/internal/pycore_index_pool.h \ $(srcdir)/Include/internal/pycore_initconfig.h \ $(srcdir)/Include/internal/pycore_instruments.h \ $(srcdir)/Include/internal/pycore_instruction_sequence.h \ diff --git a/Modules/_opcode.c b/Modules/_opcode.c index dc93063aee7e549..7ccf7af6bf908ff 100644 --- a/Modules/_opcode.c +++ b/Modules/_opcode.c @@ -422,6 +422,9 @@ _opcode_exec(PyObject *m) { if (PyModule_AddIntMacro(m, ENABLE_SPECIALIZATION) < 0) { return -1; } + if (PyModule_AddIntMacro(m, ENABLE_SPECIALIZATION_FT) < 0) { + return -1; + } return 0; } diff --git a/Modules/_testinternalcapi.c b/Modules/_testinternalcapi.c index eb98b433c6c6af5..883f32599fbc99e 100644 --- a/Modules/_testinternalcapi.c +++ b/Modules/_testinternalcapi.c @@ -14,6 +14,7 @@ #include "pycore_bitutils.h" // _Py_bswap32() #include "pycore_bytesobject.h" // _PyBytes_Find() #include "pycore_ceval.h" // _PyEval_AddPendingCall() +#include "pycore_code.h" // _PyCode_GetTLBCFast() #include "pycore_compile.h" // _PyCompile_CodeGen() #include "pycore_context.h" // _PyContext_NewHamtForTests() #include "pycore_dict.h" // _PyManagedDictPointer_GetValues() @@ -1963,6 +1964,48 @@ get_py_thread_id(PyObject *self, PyObject *Py_UNUSED(ignored)) Py_BUILD_ASSERT(sizeof(unsigned long long) >= sizeof(tid)); return PyLong_FromUnsignedLongLong(tid); } + +static PyCodeObject * +get_code(PyObject *obj) +{ + if (PyCode_Check(obj)) { + return (PyCodeObject *)obj; + } + else if (PyFunction_Check(obj)) { + return (PyCodeObject *)PyFunction_GetCode(obj); + } + return (PyCodeObject *)PyErr_Format( + PyExc_TypeError, "expected function or code object, got %s", + Py_TYPE(obj)->tp_name); +} + +static PyObject * +get_tlbc(PyObject *Py_UNUSED(module), PyObject *obj) +{ + PyCodeObject *code = get_code(obj); + if (code == NULL) { + return NULL; + } + _Py_CODEUNIT *bc = _PyCode_GetTLBCFast(PyThreadState_GET(), code); + if (bc == NULL) { + Py_RETURN_NONE; + } + return PyBytes_FromStringAndSize((const char *)bc, _PyCode_NBYTES(code)); +} + +static PyObject * +get_tlbc_id(PyObject *Py_UNUSED(module), PyObject *obj) +{ + PyCodeObject *code = get_code(obj); + if (code == NULL) { + return NULL; + } + _Py_CODEUNIT *bc = _PyCode_GetTLBCFast(PyThreadState_GET(), code); + if (bc == NULL) { + Py_RETURN_NONE; + } + return PyLong_FromVoidPtr(bc); +} #endif static PyObject * @@ -2022,7 +2065,6 @@ identify_type_slot_wrappers(PyObject *self, PyObject *Py_UNUSED(ignored)) return _PyType_GetSlotWrapperNames(); } - static PyMethodDef module_functions[] = { {"get_configs", get_configs, METH_NOARGS}, {"get_recursion_depth", get_recursion_depth, METH_NOARGS}, @@ -2110,6 +2152,8 @@ static PyMethodDef module_functions[] = { #ifdef Py_GIL_DISABLED {"py_thread_id", get_py_thread_id, METH_NOARGS}, + {"get_tlbc", get_tlbc, METH_O, NULL}, + {"get_tlbc_id", get_tlbc_id, METH_O, NULL}, #endif #ifdef _Py_TIER2 {"uop_symbols_test", _Py_uop_symbols_test, METH_NOARGS}, diff --git a/Objects/codeobject.c b/Objects/codeobject.c index 775ea7aca824c42..1cf9740af9a2095 100644 --- a/Objects/codeobject.c +++ b/Objects/codeobject.c @@ -6,17 +6,22 @@ #include "pycore_code.h" // _PyCodeConstructor #include "pycore_frame.h" // FRAME_SPECIALS_SIZE #include "pycore_hashtable.h" // _Py_hashtable_t +#include "pycore_index_pool.h" // _PyIndexPool #include "pycore_initconfig.h" // _PyStatus_OK() #include "pycore_interp.h" // PyInterpreterState.co_extra_freefuncs #include "pycore_object.h" // _PyObject_SetDeferredRefcount +#include "pycore_object_stack.h" #include "pycore_opcode_metadata.h" // _PyOpcode_Deopt, _PyOpcode_Caches #include "pycore_opcode_utils.h" // RESUME_AT_FUNC_START +#include "pycore_pymem.h" // _PyMem_FreeDelayed #include "pycore_pystate.h" // _PyInterpreterState_GET() #include "pycore_setobject.h" // _PySet_NextEntry() #include "pycore_tuple.h" // _PyTuple_ITEMS() #include "pycore_uniqueid.h" // _PyObject_AssignUniqueId() #include "clinic/codeobject.c.h" +#define INITIAL_SPECIALIZED_CODE_SIZE 16 + static const char * code_event_name(PyCodeEvent event) { switch (event) { @@ -440,9 +445,15 @@ _PyCode_Validate(struct _PyCodeConstructor *con) return 0; } -extern void _PyCode_Quicken(PyCodeObject *code); +extern void +_PyCode_Quicken(_Py_CODEUNIT *instructions, Py_ssize_t size, PyObject *consts, + int enable_counters); -static void +#ifdef Py_GIL_DISABLED +static _PyCodeArray * _PyCodeArray_New(Py_ssize_t size); +#endif + +static int init_code(PyCodeObject *co, struct _PyCodeConstructor *con) { int nlocalsplus = (int)PyTuple_GET_SIZE(con->localsplusnames); @@ -505,14 +516,27 @@ init_code(PyCodeObject *co, struct _PyCodeConstructor *con) memcpy(_PyCode_CODE(co), PyBytes_AS_STRING(con->code), PyBytes_GET_SIZE(con->code)); +#ifdef Py_GIL_DISABLED + co->co_tlbc = _PyCodeArray_New(INITIAL_SPECIALIZED_CODE_SIZE); + if (co->co_tlbc == NULL) { + return -1; + } + co->co_tlbc->entries[0] = co->co_code_adaptive; +#endif int entry_point = 0; while (entry_point < Py_SIZE(co) && _PyCode_CODE(co)[entry_point].op.code != RESUME) { entry_point++; } co->_co_firsttraceable = entry_point; - _PyCode_Quicken(co); +#ifdef Py_GIL_DISABLED + _PyCode_Quicken(_PyCode_CODE(co), Py_SIZE(co), co->co_consts, + interp->config.tlbc_enabled); +#else + _PyCode_Quicken(_PyCode_CODE(co), Py_SIZE(co), co->co_consts, 1); +#endif notify_code_watchers(PY_CODE_EVENT_CREATE, co); + return 0; } static int @@ -667,7 +691,12 @@ _PyCode_New(struct _PyCodeConstructor *con) PyErr_NoMemory(); return NULL; } - init_code(co, con); + + if (init_code(co, con) < 0) { + Py_DECREF(co); + return NULL; + } + #ifdef Py_GIL_DISABLED co->_co_unique_id = _PyObject_AssignUniqueId((PyObject *)co); _PyObject_GC_TRACK(co); @@ -1871,6 +1900,17 @@ code_dealloc(PyCodeObject *co) PyObject_ClearWeakRefs((PyObject*)co); } free_monitoring_data(co->_co_monitoring); +#ifdef Py_GIL_DISABLED + // The first element always points to the mutable bytecode at the end of + // the code object, which will be freed when the code object is freed. + for (Py_ssize_t i = 1; i < co->co_tlbc->size; i++) { + char *entry = co->co_tlbc->entries[i]; + if (entry != NULL) { + PyMem_Free(entry); + } + } + PyMem_Free(co->co_tlbc); +#endif PyObject_Free(co); } @@ -2646,5 +2686,270 @@ _PyCode_Fini(PyInterpreterState *interp) _Py_hashtable_destroy(state->constants); state->constants = NULL; } + _PyIndexPool_Fini(&interp->tlbc_indices); #endif } + +#ifdef Py_GIL_DISABLED + +// Thread-local bytecode (TLBC) +// +// Each thread specializes a thread-local copy of the bytecode, created on the +// first RESUME, in free-threaded builds. All copies of the bytecode for a code +// object are stored in the `co_tlbc` array. Threads reserve a globally unique +// index identifying its copy of the bytecode in all `co_tlbc` arrays at thread +// creation and release the index at thread destruction. The first entry in +// every `co_tlbc` array always points to the "main" copy of the bytecode that +// is stored at the end of the code object. This ensures that no bytecode is +// copied for programs that do not use threads. +// +// Thread-local bytecode can be disabled at runtime by providing either `-X +// tlbc=0` or `PYTHON_TLBC=0`. Disabling thread-local bytecode also disables +// specialization. All threads share the main copy of the bytecode when +// thread-local bytecode is disabled. +// +// Concurrent modifications to the bytecode made by the specializing +// interpreter and instrumentation use atomics, with specialization taking care +// not to overwrite an instruction that was instrumented concurrently. + +int32_t +_Py_ReserveTLBCIndex(PyInterpreterState *interp) +{ + if (interp->config.tlbc_enabled) { + return _PyIndexPool_AllocIndex(&interp->tlbc_indices); + } + // All threads share the main copy of the bytecode when TLBC is disabled + return 0; +} + +void +_Py_ClearTLBCIndex(_PyThreadStateImpl *tstate) +{ + PyInterpreterState *interp = ((PyThreadState *)tstate)->interp; + if (interp->config.tlbc_enabled) { + _PyIndexPool_FreeIndex(&interp->tlbc_indices, tstate->tlbc_index); + } +} + +static _PyCodeArray * +_PyCodeArray_New(Py_ssize_t size) +{ + _PyCodeArray *arr = PyMem_Calloc( + 1, offsetof(_PyCodeArray, entries) + sizeof(void *) * size); + if (arr == NULL) { + PyErr_NoMemory(); + return NULL; + } + arr->size = size; + return arr; +} + +static void +copy_code(_Py_CODEUNIT *dst, PyCodeObject *co) +{ + int code_len = (int) Py_SIZE(co); + for (int i = 0; i < code_len; i += _PyInstruction_GetLength(co, i)) { + dst[i] = _Py_GetBaseCodeUnit(co, i); + } + _PyCode_Quicken(dst, code_len, co->co_consts, 1); +} + +static Py_ssize_t +get_pow2_greater(Py_ssize_t initial, Py_ssize_t limit) +{ + // initial must be a power of two + assert(!(initial & (initial - 1))); + Py_ssize_t res = initial; + while (res && res < limit) { + res <<= 1; + } + return res; +} + +static _Py_CODEUNIT * +create_tlbc_lock_held(PyCodeObject *co, Py_ssize_t idx) +{ + _PyCodeArray *tlbc = co->co_tlbc; + if (idx >= tlbc->size) { + Py_ssize_t new_size = get_pow2_greater(tlbc->size, idx + 1); + if (!new_size) { + PyErr_NoMemory(); + return NULL; + } + _PyCodeArray *new_tlbc = _PyCodeArray_New(new_size); + if (new_tlbc == NULL) { + return NULL; + } + memcpy(new_tlbc->entries, tlbc->entries, tlbc->size * sizeof(void *)); + _Py_atomic_store_ptr_release(&co->co_tlbc, new_tlbc); + _PyMem_FreeDelayed(tlbc); + tlbc = new_tlbc; + } + char *bc = PyMem_Calloc(1, _PyCode_NBYTES(co)); + if (bc == NULL) { + PyErr_NoMemory(); + return NULL; + } + copy_code((_Py_CODEUNIT *) bc, co); + assert(tlbc->entries[idx] == NULL); + tlbc->entries[idx] = bc; + return (_Py_CODEUNIT *) bc; +} + +static _Py_CODEUNIT * +get_tlbc_lock_held(PyCodeObject *co) +{ + _PyCodeArray *tlbc = co->co_tlbc; + _PyThreadStateImpl *tstate = (_PyThreadStateImpl *)PyThreadState_GET(); + int32_t idx = tstate->tlbc_index; + if (idx < tlbc->size && tlbc->entries[idx] != NULL) { + return (_Py_CODEUNIT *)tlbc->entries[idx]; + } + return create_tlbc_lock_held(co, idx); +} + +_Py_CODEUNIT * +_PyCode_GetTLBC(PyCodeObject *co) +{ + _Py_CODEUNIT *result; + Py_BEGIN_CRITICAL_SECTION(co); + result = get_tlbc_lock_held(co); + Py_END_CRITICAL_SECTION(); + return result; +} + +// My kingdom for a bitset +struct flag_set { + uint8_t *flags; + Py_ssize_t size; +}; + +static inline int +flag_is_set(struct flag_set *flags, Py_ssize_t idx) +{ + assert(idx >= 0); + return (idx < flags->size) && flags->flags[idx]; +} + +// Set the flag for each tlbc index in use +static int +get_indices_in_use(PyInterpreterState *interp, struct flag_set *in_use) +{ + assert(interp->stoptheworld.world_stopped); + assert(in_use->flags == NULL); + int32_t max_index = 0; + for (PyThreadState *p = interp->threads.head; p != NULL; p = p->next) { + int32_t idx = ((_PyThreadStateImpl *) p)->tlbc_index; + if (idx > max_index) { + max_index = idx; + } + } + in_use->size = (size_t) max_index + 1; + in_use->flags = PyMem_Calloc(in_use->size, sizeof(*in_use->flags)); + if (in_use->flags == NULL) { + return -1; + } + for (PyThreadState *p = interp->threads.head; p != NULL; p = p->next) { + in_use->flags[((_PyThreadStateImpl *) p)->tlbc_index] = 1; + } + return 0; +} + +struct get_code_args { + _PyObjectStack code_objs; + struct flag_set indices_in_use; + int err; +}; + +static void +clear_get_code_args(struct get_code_args *args) +{ + if (args->indices_in_use.flags != NULL) { + PyMem_Free(args->indices_in_use.flags); + args->indices_in_use.flags = NULL; + } + _PyObjectStack_Clear(&args->code_objs); +} + +static inline int +is_bytecode_unused(_PyCodeArray *tlbc, Py_ssize_t idx, + struct flag_set *indices_in_use) +{ + assert(idx > 0 && idx < tlbc->size); + return tlbc->entries[idx] != NULL && !flag_is_set(indices_in_use, idx); +} + +static int +get_code_with_unused_tlbc(PyObject *obj, struct get_code_args *args) +{ + if (!PyCode_Check(obj)) { + return 1; + } + PyCodeObject *co = (PyCodeObject *) obj; + _PyCodeArray *tlbc = co->co_tlbc; + // The first index always points at the main copy of the bytecode embedded + // in the code object. + for (Py_ssize_t i = 1; i < tlbc->size; i++) { + if (is_bytecode_unused(tlbc, i, &args->indices_in_use)) { + if (_PyObjectStack_Push(&args->code_objs, obj) < 0) { + args->err = -1; + return 0; + } + return 1; + } + } + return 1; +} + +static void +free_unused_bytecode(PyCodeObject *co, struct flag_set *indices_in_use) +{ + _PyCodeArray *tlbc = co->co_tlbc; + // The first index always points at the main copy of the bytecode embedded + // in the code object. + for (Py_ssize_t i = 1; i < tlbc->size; i++) { + if (is_bytecode_unused(tlbc, i, indices_in_use)) { + PyMem_Free(tlbc->entries[i]); + tlbc->entries[i] = NULL; + } + } +} + +int +_Py_ClearUnusedTLBC(PyInterpreterState *interp) +{ + struct get_code_args args = { + .code_objs = {NULL}, + .indices_in_use = {NULL, 0}, + .err = 0, + }; + _PyEval_StopTheWorld(interp); + // Collect in-use tlbc indices + if (get_indices_in_use(interp, &args.indices_in_use) < 0) { + goto err; + } + // Collect code objects that have bytecode not in use by any thread + _PyGC_VisitObjectsWorldStopped( + interp, (gcvisitobjects_t)get_code_with_unused_tlbc, &args); + if (args.err < 0) { + goto err; + } + // Free unused bytecode. This must happen outside of gc_visit_heaps; it is + // unsafe to allocate or free any mimalloc managed memory when it's + // running. + PyObject *obj; + while ((obj = _PyObjectStack_Pop(&args.code_objs)) != NULL) { + free_unused_bytecode((PyCodeObject*) obj, &args.indices_in_use); + } + _PyEval_StartTheWorld(interp); + clear_get_code_args(&args); + return 0; + +err: + _PyEval_StartTheWorld(interp); + clear_get_code_args(&args); + PyErr_NoMemory(); + return -1; +} + +#endif diff --git a/Objects/frameobject.c b/Objects/frameobject.c index 55394afa5232130..c743c254848d3ac 100644 --- a/Objects/frameobject.c +++ b/Objects/frameobject.c @@ -1651,7 +1651,7 @@ frame_setlineno(PyFrameObject *f, PyObject* p_new_lineno, void *Py_UNUSED(ignore } /* Finally set the new lasti and return OK. */ f->f_lineno = 0; - f->f_frame->instr_ptr = _PyCode_CODE(code) + best_addr; + f->f_frame->instr_ptr = _PyFrame_GetBytecode(f->f_frame) + best_addr; return 0; } @@ -1867,10 +1867,11 @@ PyTypeObject PyFrame_Type = { }; static void -init_frame(_PyInterpreterFrame *frame, PyFunctionObject *func, PyObject *locals) +init_frame(PyThreadState *tstate, _PyInterpreterFrame *frame, + PyFunctionObject *func, PyObject *locals) { PyCodeObject *code = (PyCodeObject *)func->func_code; - _PyFrame_Initialize(frame, PyStackRef_FromPyObjectNew(func), + _PyFrame_Initialize(tstate, frame, PyStackRef_FromPyObjectNew(func), Py_XNewRef(locals), code, 0, NULL); } @@ -1922,7 +1923,7 @@ PyFrame_New(PyThreadState *tstate, PyCodeObject *code, Py_DECREF(func); return NULL; } - init_frame((_PyInterpreterFrame *)f->_f_frame_data, func, locals); + init_frame(tstate, (_PyInterpreterFrame *)f->_f_frame_data, func, locals); f->f_frame = (_PyInterpreterFrame *)f->_f_frame_data; f->f_frame->owner = FRAME_OWNED_BY_FRAME_OBJECT; // This frame needs to be "complete", so pretend that the first RESUME ran: @@ -1941,7 +1942,8 @@ frame_init_get_vars(_PyInterpreterFrame *frame) // here: PyCodeObject *co = _PyFrame_GetCode(frame); int lasti = _PyInterpreterFrame_LASTI(frame); - if (!(lasti < 0 && _PyCode_CODE(co)->op.code == COPY_FREE_VARS + if (!(lasti < 0 + && _PyFrame_GetBytecode(frame)->op.code == COPY_FREE_VARS && PyStackRef_FunctionCheck(frame->f_funcobj))) { /* Free vars are initialized */ @@ -1957,7 +1959,7 @@ frame_init_get_vars(_PyInterpreterFrame *frame) frame->localsplus[offset + i] = PyStackRef_FromPyObjectNew(o); } // COPY_FREE_VARS doesn't have inline CACHEs, either: - frame->instr_ptr = _PyCode_CODE(_PyFrame_GetCode(frame)); + frame->instr_ptr = _PyFrame_GetBytecode(frame); } diff --git a/Objects/typeobject.c b/Objects/typeobject.c index b4a11195613d744..40225313a8a33b9 100644 --- a/Objects/typeobject.c +++ b/Objects/typeobject.c @@ -11638,9 +11638,10 @@ super_descr_get(PyObject *self, PyObject *obj, PyObject *type) } static int -super_init_without_args(_PyInterpreterFrame *cframe, PyCodeObject *co, - PyTypeObject **type_p, PyObject **obj_p) +super_init_without_args(_PyInterpreterFrame *cframe, PyTypeObject **type_p, + PyObject **obj_p) { + PyCodeObject *co = _PyFrame_GetCode(cframe); if (co->co_argcount == 0) { PyErr_SetString(PyExc_RuntimeError, "super(): no arguments"); @@ -11740,7 +11741,7 @@ super_init_impl(PyObject *self, PyTypeObject *type, PyObject *obj) { "super(): no current frame"); return -1; } - int res = super_init_without_args(frame, _PyFrame_GetCode(frame), &type, &obj); + int res = super_init_without_args(frame, &type, &obj); if (res < 0) { return -1; diff --git a/PCbuild/_freeze_module.vcxproj b/PCbuild/_freeze_module.vcxproj index a3c2d32c454e049..51b493f8a84c6f0 100644 --- a/PCbuild/_freeze_module.vcxproj +++ b/PCbuild/_freeze_module.vcxproj @@ -222,6 +222,7 @@ + diff --git a/PCbuild/_freeze_module.vcxproj.filters b/PCbuild/_freeze_module.vcxproj.filters index 91b1d75fb8df5ee..09a5f4d30ef4905 100644 --- a/PCbuild/_freeze_module.vcxproj.filters +++ b/PCbuild/_freeze_module.vcxproj.filters @@ -232,6 +232,9 @@ Source Files + + Source Files + Source Files diff --git a/PCbuild/pythoncore.vcxproj b/PCbuild/pythoncore.vcxproj index a4881e9256e4ddc..f840e7fd61f9853 100644 --- a/PCbuild/pythoncore.vcxproj +++ b/PCbuild/pythoncore.vcxproj @@ -255,6 +255,7 @@ + @@ -614,6 +615,7 @@ + diff --git a/PCbuild/pythoncore.vcxproj.filters b/PCbuild/pythoncore.vcxproj.filters index 740790cc5e1119d..a930cd0b0b10c60 100644 --- a/PCbuild/pythoncore.vcxproj.filters +++ b/PCbuild/pythoncore.vcxproj.filters @@ -687,6 +687,9 @@ Include\internal + + Include\internal + Include\internal @@ -1373,6 +1376,9 @@ Python + + Python + Python diff --git a/Python/bytecodes.c b/Python/bytecodes.c index fa98af12c69aefa..2c78cb9931733d2 100644 --- a/Python/bytecodes.c +++ b/Python/bytecodes.c @@ -168,11 +168,11 @@ dummy_func( } op(_QUICKEN_RESUME, (--)) { - #if ENABLE_SPECIALIZATION + #if ENABLE_SPECIALIZATION_FT if (tstate->tracing == 0 && this_instr->op.code == RESUME) { FT_ATOMIC_STORE_UINT8_RELAXED(this_instr->op.code, RESUME_CHECK); } - #endif /* ENABLE_SPECIALIZATION */ + #endif /* ENABLE_SPECIALIZATION_FT */ } tier1 op(_MAYBE_INSTRUMENT, (--)) { @@ -190,7 +190,26 @@ dummy_func( } } + op(_LOAD_BYTECODE, (--)) { + #ifdef Py_GIL_DISABLED + if (frame->tlbc_index != + ((_PyThreadStateImpl *)tstate)->tlbc_index) { + _Py_CODEUNIT *bytecode = + _PyEval_GetExecutableCode(tstate, _PyFrame_GetCode(frame)); + ERROR_IF(bytecode == NULL, error); + int off = this_instr - _PyFrame_GetBytecode(frame); + frame->tlbc_index = ((_PyThreadStateImpl *)tstate)->tlbc_index; + frame->instr_ptr = bytecode + off; + // Make sure this_instr gets reset correctley for any uops that + // follow + next_instr = frame->instr_ptr; + DISPATCH(); + } + #endif + } + macro(RESUME) = + _LOAD_BYTECODE + _MAYBE_INSTRUMENT + _QUICKEN_RESUME + _CHECK_PERIODIC_IF_NOT_YIELD_FROM; @@ -204,6 +223,10 @@ dummy_func( uintptr_t version = FT_ATOMIC_LOAD_UINTPTR_ACQUIRE(_PyFrame_GetCode(frame)->_co_instrumentation_version); assert((version & _PY_EVAL_EVENTS_MASK) == 0); DEOPT_IF(eval_breaker != version); + #ifdef Py_GIL_DISABLED + DEOPT_IF(frame->tlbc_index != + ((_PyThreadStateImpl *)tstate)->tlbc_index); + #endif } op(_MONITOR_RESUME, (--)) { @@ -217,6 +240,7 @@ dummy_func( } macro(INSTRUMENTED_RESUME) = + _LOAD_BYTECODE + _MAYBE_INSTRUMENT + _CHECK_PERIODIC_IF_NOT_YIELD_FROM + _MONITOR_RESUME; @@ -682,8 +706,8 @@ dummy_func( }; specializing op(_SPECIALIZE_BINARY_SUBSCR, (counter/1, container, sub -- container, sub)) { - assert(frame->stackpointer == NULL); #if ENABLE_SPECIALIZATION + assert(frame->stackpointer == NULL); if (ADAPTIVE_COUNTER_TRIGGERS(counter)) { next_instr = this_instr; _Py_Specialize_BinarySubscr(container, sub, next_instr); @@ -1236,7 +1260,7 @@ dummy_func( if (oparg) { PyObject *lasti = PyStackRef_AsPyObjectBorrow(values[0]); if (PyLong_Check(lasti)) { - frame->instr_ptr = _PyCode_CODE(_PyFrame_GetCode(frame)) + PyLong_AsLong(lasti); + frame->instr_ptr = _PyFrame_GetBytecode(frame) + PyLong_AsLong(lasti); assert(!_PyErr_Occurred(tstate)); } else { @@ -2671,9 +2695,7 @@ dummy_func( assert(PyStackRef_BoolCheck(cond)); int flag = PyStackRef_Is(cond, PyStackRef_False); DEAD(cond); - #if ENABLE_SPECIALIZATION - this_instr[1].cache = (this_instr[1].cache << 1) | flag; - #endif + RECORD_BRANCH_TAKEN(this_instr[1].cache, flag); JUMPBY(oparg * flag); } @@ -2681,9 +2703,7 @@ dummy_func( assert(PyStackRef_BoolCheck(cond)); int flag = PyStackRef_Is(cond, PyStackRef_True); DEAD(cond); - #if ENABLE_SPECIALIZATION - this_instr[1].cache = (this_instr[1].cache << 1) | flag; - #endif + RECORD_BRANCH_TAKEN(this_instr[1].cache, flag); JUMPBY(oparg * flag); } @@ -3697,7 +3717,7 @@ dummy_func( op(_CREATE_INIT_FRAME, (init[1], self[1], args[oparg] -- init_frame: _PyInterpreterFrame *)) { _PyInterpreterFrame *shim = _PyFrame_PushTrampolineUnchecked( tstate, (PyCodeObject *)&_Py_InitCleanup, 1, frame); - assert(_PyCode_CODE(_PyFrame_GetCode(shim))[0].op.code == EXIT_INIT_CHECK); + assert(_PyFrame_GetBytecode(shim)[0].op.code == EXIT_INIT_CHECK); /* Push self onto stack of shim */ shim->localsplus[0] = PyStackRef_DUP(self[0]); DEAD(init); @@ -4593,7 +4613,7 @@ dummy_func( } specializing op(_SPECIALIZE_BINARY_OP, (counter/1, lhs, rhs -- lhs, rhs)) { - #if ENABLE_SPECIALIZATION + #if ENABLE_SPECIALIZATION_FT if (ADAPTIVE_COUNTER_TRIGGERS(counter)) { next_instr = this_instr; _Py_Specialize_BinaryOp(lhs, rhs, next_instr, oparg, LOCALS_ARRAY); @@ -4601,7 +4621,7 @@ dummy_func( } OPCODE_DEFERRED_INC(BINARY_OP); ADVANCE_ADAPTIVE_COUNTER(this_instr[1].counter); - #endif /* ENABLE_SPECIALIZATION */ + #endif /* ENABLE_SPECIALIZATION_FT */ assert(NB_ADD <= oparg); assert(oparg <= NB_INPLACE_XOR); } @@ -4632,7 +4652,7 @@ dummy_func( int original_opcode = 0; if (tstate->tracing) { PyCodeObject *code = _PyFrame_GetCode(frame); - original_opcode = code->_co_monitoring->lines[(int)(this_instr - _PyCode_CODE(code))].original_opcode; + original_opcode = code->_co_monitoring->lines[(int)(this_instr - _PyFrame_GetBytecode(frame))].original_opcode; next_instr = this_instr; } else { original_opcode = _Py_call_instrumentation_line( @@ -4687,9 +4707,7 @@ dummy_func( assert(PyStackRef_BoolCheck(cond)); int flag = PyStackRef_Is(cond, PyStackRef_True); int offset = flag * oparg; - #if ENABLE_SPECIALIZATION - this_instr[1].cache = (this_instr[1].cache << 1) | flag; - #endif + RECORD_BRANCH_TAKEN(this_instr[1].cache, flag); INSTRUMENTED_JUMP(this_instr, next_instr + offset, PY_MONITORING_EVENT_BRANCH); } @@ -4698,9 +4716,7 @@ dummy_func( assert(PyStackRef_BoolCheck(cond)); int flag = PyStackRef_Is(cond, PyStackRef_False); int offset = flag * oparg; - #if ENABLE_SPECIALIZATION - this_instr[1].cache = (this_instr[1].cache << 1) | flag; - #endif + RECORD_BRANCH_TAKEN(this_instr[1].cache, flag); INSTRUMENTED_JUMP(this_instr, next_instr + offset, PY_MONITORING_EVENT_BRANCH); } @@ -4715,9 +4731,7 @@ dummy_func( PyStackRef_CLOSE(value_stackref); offset = 0; } - #if ENABLE_SPECIALIZATION - this_instr[1].cache = (this_instr[1].cache << 1) | flag; - #endif + RECORD_BRANCH_TAKEN(this_instr[1].cache, flag); INSTRUMENTED_JUMP(this_instr, next_instr + offset, PY_MONITORING_EVENT_BRANCH); } @@ -4815,7 +4829,7 @@ dummy_func( tier2 op(_EXIT_TRACE, (exit_p/4 --)) { _PyExitData *exit = (_PyExitData *)exit_p; PyCodeObject *code = _PyFrame_GetCode(frame); - _Py_CODEUNIT *target = _PyCode_CODE(code) + exit->target; + _Py_CODEUNIT *target = _PyFrame_GetBytecode(frame) + exit->target; #if defined(Py_DEBUG) && !defined(_Py_JIT) OPT_HIST(trace_uop_execution_counter, trace_run_length_hist); if (lltrace >= 2) { @@ -4823,7 +4837,7 @@ dummy_func( _PyUOpPrint(&next_uop[-1]); printf(", exit %u, temp %d, target %d -> %s]\n", exit - current_executor->exits, exit->temperature.value_and_backoff, - (int)(target - _PyCode_CODE(code)), + (int)(target - _PyFrame_GetBytecode(frame)), _PyOpcode_OpName[target->op.code]); } #endif @@ -4933,7 +4947,7 @@ dummy_func( _PyUOpPrint(&next_uop[-1]); printf(", exit %u, temp %d, target %d -> %s]\n", exit - current_executor->exits, exit->temperature.value_and_backoff, - (int)(target - _PyCode_CODE(_PyFrame_GetCode(frame))), + (int)(target - _PyFrame_GetBytecode(frame)), _PyOpcode_OpName[target->op.code]); } #endif @@ -4995,7 +5009,7 @@ dummy_func( } tier2 op(_ERROR_POP_N, (target/2, unused[oparg] --)) { - frame->instr_ptr = ((_Py_CODEUNIT *)_PyFrame_GetCode(frame)->co_code_adaptive) + target; + frame->instr_ptr = _PyFrame_GetBytecode(frame) + target; SYNC_SP(); GOTO_UNWIND(); } diff --git a/Python/ceval.c b/Python/ceval.c index beee5325cd62591..9a608f06966688c 100644 --- a/Python/ceval.c +++ b/Python/ceval.c @@ -189,7 +189,7 @@ lltrace_instruction(_PyInterpreterFrame *frame, dump_stack(frame, stack_pointer); const char *opname = _PyOpcode_OpName[opcode]; assert(opname != NULL); - int offset = (int)(next_instr - _PyCode_CODE(_PyFrame_GetCode(frame))); + int offset = (int)(next_instr - _PyFrame_GetBytecode(frame)); if (OPCODE_HAS_ARG((int)_PyOpcode_Deopt[opcode])) { printf("%d: %s %d\n", offset * 2, opname, oparg); } @@ -841,6 +841,19 @@ _PyEval_EvalFrameDefault(PyThreadState *tstate, _PyInterpreterFrame *frame, int } /* Because this avoids the RESUME, * we need to update instrumentation */ +#ifdef Py_GIL_DISABLED + /* Load thread-local bytecode */ + if (frame->tlbc_index != ((_PyThreadStateImpl *)tstate)->tlbc_index) { + _Py_CODEUNIT *bytecode = + _PyEval_GetExecutableCode(tstate, _PyFrame_GetCode(frame)); + if (bytecode == NULL) { + goto error; + } + ptrdiff_t off = frame->instr_ptr - _PyFrame_GetBytecode(frame); + frame->tlbc_index = ((_PyThreadStateImpl *)tstate)->tlbc_index; + frame->instr_ptr = bytecode + off; + } +#endif _Py_Instrument(_PyFrame_GetCode(frame), tstate->interp); monitor_throw(tstate, frame, frame->instr_ptr); /* TO DO -- Monitor throw entry. */ @@ -983,7 +996,7 @@ _PyEval_EvalFrameDefault(PyThreadState *tstate, _PyInterpreterFrame *frame, int Python main loop. */ PyObject *exc = _PyErr_GetRaisedException(tstate); PUSH(PyStackRef_FromPyObjectSteal(exc)); - next_instr = _PyCode_CODE(_PyFrame_GetCode(frame)) + handler; + next_instr = _PyFrame_GetBytecode(frame) + handler; if (monitor_handled(tstate, frame, next_instr, exc) < 0) { goto exception_unwind; @@ -1045,6 +1058,8 @@ _PyEval_EvalFrameDefault(PyThreadState *tstate, _PyInterpreterFrame *frame, int #undef ENABLE_SPECIALIZATION #define ENABLE_SPECIALIZATION 0 +#undef ENABLE_SPECIALIZATION_FT +#define ENABLE_SPECIALIZATION_FT 0 #ifdef Py_DEBUG #define DPRINTF(level, ...) \ @@ -1139,7 +1154,7 @@ _PyEval_EvalFrameDefault(PyThreadState *tstate, _PyInterpreterFrame *frame, int goto goto_to_tier1; exit_to_tier1: assert(next_uop[-1].format == UOP_FORMAT_TARGET); - next_instr = next_uop[-1].target + _PyCode_CODE(_PyFrame_GetCode(frame)); + next_instr = next_uop[-1].target + _PyFrame_GetBytecode(frame); goto_to_tier1: #ifdef Py_DEBUG if (lltrace >= 2) { @@ -1764,7 +1779,7 @@ _PyEvalFramePushAndInit(PyThreadState *tstate, _PyStackRef func, if (frame == NULL) { goto fail; } - _PyFrame_Initialize(frame, func, locals, code, 0, previous); + _PyFrame_Initialize(tstate, frame, func, locals, code, 0, previous); if (initialize_locals(tstate, func_obj, frame->localsplus, args, argcount, kwnames)) { assert(frame->owner == FRAME_OWNED_BY_THREAD); clear_thread_frame(tstate, frame); diff --git a/Python/ceval_macros.h b/Python/ceval_macros.h index 6674c4ccf9f6930..5df55813a0ddebc 100644 --- a/Python/ceval_macros.h +++ b/Python/ceval_macros.h @@ -151,7 +151,7 @@ GETITEM(PyObject *v, Py_ssize_t i) { /* Code access macros */ /* The integer overflow is checked by an assertion below. */ -#define INSTR_OFFSET() ((int)(next_instr - _PyCode_CODE(_PyFrame_GetCode(frame)))) +#define INSTR_OFFSET() ((int)(next_instr - _PyFrame_GetBytecode(frame))) #define NEXTOPARG() do { \ _Py_CODEUNIT word = {.cache = FT_ATOMIC_LOAD_UINT16_RELAXED(*(uint16_t*)next_instr)}; \ opcode = word.op.code; \ @@ -301,14 +301,6 @@ GETITEM(PyObject *v, Py_ssize_t i) { #define ADAPTIVE_COUNTER_TRIGGERS(COUNTER) \ backoff_counter_triggers(forge_backoff_counter((COUNTER))) -#ifdef Py_GIL_DISABLED -#define ADVANCE_ADAPTIVE_COUNTER(COUNTER) \ - do { \ - /* gh-115999 tracks progress on addressing this. */ \ - static_assert(0, "The specializing interpreter is not yet thread-safe"); \ - } while (0); -#define PAUSE_ADAPTIVE_COUNTER(COUNTER) ((void)COUNTER) -#else #define ADVANCE_ADAPTIVE_COUNTER(COUNTER) \ do { \ (COUNTER) = advance_backoff_counter((COUNTER)); \ @@ -318,6 +310,18 @@ GETITEM(PyObject *v, Py_ssize_t i) { do { \ (COUNTER) = pause_backoff_counter((COUNTER)); \ } while (0); + +#ifdef ENABLE_SPECIALIZATION_FT +/* Multiple threads may execute these concurrently if thread-local bytecode is + * disabled and they all execute the main copy of the bytecode. Specialization + * is disabled in that case so the value is unused, but the RMW cycle should be + * free of data races. + */ +#define RECORD_BRANCH_TAKEN(bitset, flag) \ + FT_ATOMIC_STORE_UINT16_RELAXED( \ + bitset, (FT_ATOMIC_LOAD_UINT16_RELAXED(bitset) << 1) | (flag)) +#else +#define RECORD_BRANCH_TAKEN(bitset, flag) #endif #define UNBOUNDLOCAL_ERROR_MSG \ diff --git a/Python/executor_cases.c.h b/Python/executor_cases.c.h index ff4a0a52a0b4451..9fac4e881b81e25 100644 --- a/Python/executor_cases.c.h +++ b/Python/executor_cases.c.h @@ -41,6 +41,8 @@ /* _QUICKEN_RESUME is not a viable micro-op for tier 2 because it uses the 'this_instr' variable */ + /* _LOAD_BYTECODE is not a viable micro-op for tier 2 because it uses the 'this_instr' variable */ + case _RESUME_CHECK: { #if defined(__EMSCRIPTEN__) if (_Py_emscripten_signal_clock == 0) { @@ -56,6 +58,13 @@ UOP_STAT_INC(uopcode, miss); JUMP_TO_JUMP_TARGET(); } + #ifdef Py_GIL_DISABLED + if (frame->tlbc_index != + ((_PyThreadStateImpl *)tstate)->tlbc_index) { + UOP_STAT_INC(uopcode, miss); + JUMP_TO_JUMP_TARGET(); + } + #endif break; } @@ -4480,8 +4489,8 @@ _PyFrame_SetStackPointer(frame, stack_pointer); _PyInterpreterFrame *shim = _PyFrame_PushTrampolineUnchecked( tstate, (PyCodeObject *)&_Py_InitCleanup, 1, frame); + assert(_PyFrame_GetBytecode(shim)[0].op.code == EXIT_INIT_CHECK); stack_pointer = _PyFrame_GetStackPointer(frame); - assert(_PyCode_CODE(_PyFrame_GetCode(shim))[0].op.code == EXIT_INIT_CHECK); /* Push self onto stack of shim */ shim->localsplus[0] = PyStackRef_DUP(self[0]); _PyFrame_SetStackPointer(frame, stack_pointer); @@ -5683,7 +5692,9 @@ PyObject *exit_p = (PyObject *)CURRENT_OPERAND(); _PyExitData *exit = (_PyExitData *)exit_p; PyCodeObject *code = _PyFrame_GetCode(frame); - _Py_CODEUNIT *target = _PyCode_CODE(code) + exit->target; + _PyFrame_SetStackPointer(frame, stack_pointer); + _Py_CODEUNIT *target = _PyFrame_GetBytecode(frame) + exit->target; + stack_pointer = _PyFrame_GetStackPointer(frame); #if defined(Py_DEBUG) && !defined(_Py_JIT) OPT_HIST(trace_uop_execution_counter, trace_run_length_hist); if (lltrace >= 2) { @@ -5692,7 +5703,7 @@ _PyUOpPrint(&next_uop[-1]); printf(", exit %u, temp %d, target %d -> %s]\n", exit - current_executor->exits, exit->temperature.value_and_backoff, - (int)(target - _PyCode_CODE(code)), + (int)(target - _PyFrame_GetBytecode(frame)), _PyOpcode_OpName[target->op.code]); stack_pointer = _PyFrame_GetStackPointer(frame); } @@ -5878,7 +5889,7 @@ _PyUOpPrint(&next_uop[-1]); printf(", exit %u, temp %d, target %d -> %s]\n", exit - current_executor->exits, exit->temperature.value_and_backoff, - (int)(target - _PyCode_CODE(_PyFrame_GetCode(frame))), + (int)(target - _PyFrame_GetBytecode(frame)), _PyOpcode_OpName[target->op.code]); stack_pointer = _PyFrame_GetStackPointer(frame); } @@ -5956,9 +5967,11 @@ case _ERROR_POP_N: { oparg = CURRENT_OPARG(); uint32_t target = (uint32_t)CURRENT_OPERAND(); - frame->instr_ptr = ((_Py_CODEUNIT *)_PyFrame_GetCode(frame)->co_code_adaptive) + target; stack_pointer += -oparg; assert(WITHIN_STACK_BOUNDS()); + _PyFrame_SetStackPointer(frame, stack_pointer); + frame->instr_ptr = _PyFrame_GetBytecode(frame) + target; + stack_pointer = _PyFrame_GetStackPointer(frame); GOTO_UNWIND(); break; } diff --git a/Python/frame.c b/Python/frame.c index 35e6c2d0a93333c..9a865e57d97cc6f 100644 --- a/Python/frame.c +++ b/Python/frame.c @@ -63,7 +63,8 @@ take_ownership(PyFrameObject *f, _PyInterpreterFrame *frame) // This may be a newly-created generator or coroutine frame. Since it's // dead anyways, just pretend that the first RESUME ran: PyCodeObject *code = _PyFrame_GetCode(frame); - frame->instr_ptr = _PyCode_CODE(code) + code->_co_firsttraceable + 1; + frame->instr_ptr = + _PyFrame_GetBytecode(frame) + code->_co_firsttraceable + 1; } assert(!_PyFrame_IsIncomplete(frame)); assert(f->f_back == NULL); diff --git a/Python/gc_free_threading.c b/Python/gc_free_threading.c index 1969ed608ea524c..986d80c18d36c80 100644 --- a/Python/gc_free_threading.c +++ b/Python/gc_free_threading.c @@ -1953,16 +1953,22 @@ custom_visitor_wrapper(const mi_heap_t *heap, const mi_heap_area_t *area, } void -PyUnstable_GC_VisitObjects(gcvisitobjects_t callback, void *arg) +_PyGC_VisitObjectsWorldStopped(PyInterpreterState *interp, + gcvisitobjects_t callback, void *arg) { - PyInterpreterState *interp = _PyInterpreterState_GET(); struct custom_visitor_args wrapper = { .callback = callback, .arg = arg, }; + gc_visit_heaps(interp, &custom_visitor_wrapper, &wrapper.base); +} +void +PyUnstable_GC_VisitObjects(gcvisitobjects_t callback, void *arg) +{ + PyInterpreterState *interp = _PyInterpreterState_GET(); _PyEval_StopTheWorld(interp); - gc_visit_heaps(interp, &custom_visitor_wrapper, &wrapper.base); + _PyGC_VisitObjectsWorldStopped(interp, callback, arg); _PyEval_StartTheWorld(interp); } diff --git a/Python/generated_cases.c.h b/Python/generated_cases.c.h index 632cbc7790a4d88..eff246f19972767 100644 --- a/Python/generated_cases.c.h +++ b/Python/generated_cases.c.h @@ -25,7 +25,7 @@ lhs = stack_pointer[-2]; uint16_t counter = read_u16(&this_instr[1].cache); (void)counter; - #if ENABLE_SPECIALIZATION + #if ENABLE_SPECIALIZATION_FT if (ADAPTIVE_COUNTER_TRIGGERS(counter)) { next_instr = this_instr; _PyFrame_SetStackPointer(frame, stack_pointer); @@ -35,7 +35,7 @@ } OPCODE_DEFERRED_INC(BINARY_OP); ADVANCE_ADAPTIVE_COUNTER(this_instr[1].counter); - #endif /* ENABLE_SPECIALIZATION */ + #endif /* ENABLE_SPECIALIZATION_FT */ assert(NB_ADD <= oparg); assert(oparg <= NB_INPLACE_XOR); } @@ -435,8 +435,8 @@ container = stack_pointer[-2]; uint16_t counter = read_u16(&this_instr[1].cache); (void)counter; - assert(frame->stackpointer == NULL); #if ENABLE_SPECIALIZATION + assert(frame->stackpointer == NULL); if (ADAPTIVE_COUNTER_TRIGGERS(counter)) { next_instr = this_instr; _PyFrame_SetStackPointer(frame, stack_pointer); @@ -1066,8 +1066,8 @@ _PyFrame_SetStackPointer(frame, stack_pointer); _PyInterpreterFrame *shim = _PyFrame_PushTrampolineUnchecked( tstate, (PyCodeObject *)&_Py_InitCleanup, 1, frame); + assert(_PyFrame_GetBytecode(shim)[0].op.code == EXIT_INIT_CHECK); stack_pointer = _PyFrame_GetStackPointer(frame); - assert(_PyCode_CODE(_PyFrame_GetCode(shim))[0].op.code == EXIT_INIT_CHECK); /* Push self onto stack of shim */ shim->localsplus[0] = PyStackRef_DUP(self[0]); _PyFrame_SetStackPointer(frame, stack_pointer); @@ -4711,7 +4711,9 @@ int original_opcode = 0; if (tstate->tracing) { PyCodeObject *code = _PyFrame_GetCode(frame); - original_opcode = code->_co_monitoring->lines[(int)(this_instr - _PyCode_CODE(code))].original_opcode; + _PyFrame_SetStackPointer(frame, stack_pointer); + original_opcode = code->_co_monitoring->lines[(int)(this_instr - _PyFrame_GetBytecode(frame))].original_opcode; + stack_pointer = _PyFrame_GetStackPointer(frame); next_instr = this_instr; } else { _PyFrame_SetStackPointer(frame, stack_pointer); @@ -4759,9 +4761,7 @@ assert(PyStackRef_BoolCheck(cond)); int flag = PyStackRef_Is(cond, PyStackRef_False); int offset = flag * oparg; - #if ENABLE_SPECIALIZATION - this_instr[1].cache = (this_instr[1].cache << 1) | flag; - #endif + RECORD_BRANCH_TAKEN(this_instr[1].cache, flag); INSTRUMENTED_JUMP(this_instr, next_instr + offset, PY_MONITORING_EVENT_BRANCH); DISPATCH(); } @@ -4782,9 +4782,7 @@ PyStackRef_CLOSE(value_stackref); offset = 0; } - #if ENABLE_SPECIALIZATION - this_instr[1].cache = (this_instr[1].cache << 1) | flag; - #endif + RECORD_BRANCH_TAKEN(this_instr[1].cache, flag); INSTRUMENTED_JUMP(this_instr, next_instr + offset, PY_MONITORING_EVENT_BRANCH); DISPATCH(); } @@ -4822,9 +4820,7 @@ assert(PyStackRef_BoolCheck(cond)); int flag = PyStackRef_Is(cond, PyStackRef_True); int offset = flag * oparg; - #if ENABLE_SPECIALIZATION - this_instr[1].cache = (this_instr[1].cache << 1) | flag; - #endif + RECORD_BRANCH_TAKEN(this_instr[1].cache, flag); INSTRUMENTED_JUMP(this_instr, next_instr + offset, PY_MONITORING_EVENT_BRANCH); DISPATCH(); } @@ -4834,6 +4830,28 @@ (void)this_instr; next_instr += 1; INSTRUCTION_STATS(INSTRUMENTED_RESUME); + // _LOAD_BYTECODE + { + #ifdef Py_GIL_DISABLED + if (frame->tlbc_index != + ((_PyThreadStateImpl *)tstate)->tlbc_index) { + _PyFrame_SetStackPointer(frame, stack_pointer); + _Py_CODEUNIT *bytecode = + _PyEval_GetExecutableCode(tstate, _PyFrame_GetCode(frame)); + stack_pointer = _PyFrame_GetStackPointer(frame); + if (bytecode == NULL) goto error; + _PyFrame_SetStackPointer(frame, stack_pointer); + int off = this_instr - _PyFrame_GetBytecode(frame); + stack_pointer = _PyFrame_GetStackPointer(frame); + frame->tlbc_index = ((_PyThreadStateImpl *)tstate)->tlbc_index; + frame->instr_ptr = bytecode + off; + // Make sure this_instr gets reset correctley for any uops that + // follow + next_instr = frame->instr_ptr; + DISPATCH(); + } + #endif + } // _MAYBE_INSTRUMENT { if (tstate->tracing == 0) { @@ -6646,9 +6664,7 @@ cond = stack_pointer[-1]; assert(PyStackRef_BoolCheck(cond)); int flag = PyStackRef_Is(cond, PyStackRef_False); - #if ENABLE_SPECIALIZATION - this_instr[1].cache = (this_instr[1].cache << 1) | flag; - #endif + RECORD_BRANCH_TAKEN(this_instr[1].cache, flag); JUMPBY(oparg * flag); stack_pointer += -1; assert(WITHIN_STACK_BOUNDS()); @@ -6680,9 +6696,7 @@ cond = b; assert(PyStackRef_BoolCheck(cond)); int flag = PyStackRef_Is(cond, PyStackRef_True); - #if ENABLE_SPECIALIZATION - this_instr[1].cache = (this_instr[1].cache << 1) | flag; - #endif + RECORD_BRANCH_TAKEN(this_instr[1].cache, flag); JUMPBY(oparg * flag); } stack_pointer += -1; @@ -6715,9 +6729,7 @@ cond = b; assert(PyStackRef_BoolCheck(cond)); int flag = PyStackRef_Is(cond, PyStackRef_False); - #if ENABLE_SPECIALIZATION - this_instr[1].cache = (this_instr[1].cache << 1) | flag; - #endif + RECORD_BRANCH_TAKEN(this_instr[1].cache, flag); JUMPBY(oparg * flag); } stack_pointer += -1; @@ -6735,9 +6747,7 @@ cond = stack_pointer[-1]; assert(PyStackRef_BoolCheck(cond)); int flag = PyStackRef_Is(cond, PyStackRef_True); - #if ENABLE_SPECIALIZATION - this_instr[1].cache = (this_instr[1].cache << 1) | flag; - #endif + RECORD_BRANCH_TAKEN(this_instr[1].cache, flag); JUMPBY(oparg * flag); stack_pointer += -1; assert(WITHIN_STACK_BOUNDS()); @@ -6832,7 +6842,11 @@ if (oparg) { PyObject *lasti = PyStackRef_AsPyObjectBorrow(values[0]); if (PyLong_Check(lasti)) { - frame->instr_ptr = _PyCode_CODE(_PyFrame_GetCode(frame)) + PyLong_AsLong(lasti); + stack_pointer += -1; + assert(WITHIN_STACK_BOUNDS()); + _PyFrame_SetStackPointer(frame, stack_pointer); + frame->instr_ptr = _PyFrame_GetBytecode(frame) + PyLong_AsLong(lasti); + stack_pointer = _PyFrame_GetStackPointer(frame); assert(!_PyErr_Occurred(tstate)); } else { @@ -6844,6 +6858,8 @@ Py_DECREF(exc); goto error; } + stack_pointer += 1; + assert(WITHIN_STACK_BOUNDS()); } assert(exc && PyExceptionInstance_Check(exc)); stack_pointer += -1; @@ -6871,6 +6887,28 @@ PREDICTED(RESUME); _Py_CODEUNIT* const this_instr = next_instr - 1; (void)this_instr; + // _LOAD_BYTECODE + { + #ifdef Py_GIL_DISABLED + if (frame->tlbc_index != + ((_PyThreadStateImpl *)tstate)->tlbc_index) { + _PyFrame_SetStackPointer(frame, stack_pointer); + _Py_CODEUNIT *bytecode = + _PyEval_GetExecutableCode(tstate, _PyFrame_GetCode(frame)); + stack_pointer = _PyFrame_GetStackPointer(frame); + if (bytecode == NULL) goto error; + _PyFrame_SetStackPointer(frame, stack_pointer); + int off = this_instr - _PyFrame_GetBytecode(frame); + stack_pointer = _PyFrame_GetStackPointer(frame); + frame->tlbc_index = ((_PyThreadStateImpl *)tstate)->tlbc_index; + frame->instr_ptr = bytecode + off; + // Make sure this_instr gets reset correctley for any uops that + // follow + next_instr = frame->instr_ptr; + DISPATCH(); + } + #endif + } // _MAYBE_INSTRUMENT { if (tstate->tracing == 0) { @@ -6890,11 +6928,11 @@ } // _QUICKEN_RESUME { - #if ENABLE_SPECIALIZATION + #if ENABLE_SPECIALIZATION_FT if (tstate->tracing == 0 && this_instr->op.code == RESUME) { FT_ATOMIC_STORE_UINT8_RELAXED(this_instr->op.code, RESUME_CHECK); } - #endif /* ENABLE_SPECIALIZATION */ + #endif /* ENABLE_SPECIALIZATION_FT */ } // _CHECK_PERIODIC_IF_NOT_YIELD_FROM { @@ -6925,6 +6963,10 @@ uintptr_t version = FT_ATOMIC_LOAD_UINTPTR_ACQUIRE(_PyFrame_GetCode(frame)->_co_instrumentation_version); assert((version & _PY_EVAL_EVENTS_MASK) == 0); DEOPT_IF(eval_breaker != version, RESUME); + #ifdef Py_GIL_DISABLED + DEOPT_IF(frame->tlbc_index != + ((_PyThreadStateImpl *)tstate)->tlbc_index, RESUME); + #endif DISPATCH(); } diff --git a/Python/index_pool.c b/Python/index_pool.c new file mode 100644 index 000000000000000..526eccff74af00e --- /dev/null +++ b/Python/index_pool.c @@ -0,0 +1,193 @@ +#include + +#include "Python.h" + +#include "pycore_index_pool.h" +#include "pycore_lock.h" + +#ifdef Py_GIL_DISABLED + +static inline void +swap(int32_t *values, Py_ssize_t i, Py_ssize_t j) +{ + int32_t tmp = values[i]; + values[i] = values[j]; + values[j] = tmp; +} + +static bool +heap_try_swap(_PyIndexHeap *heap, Py_ssize_t i, Py_ssize_t j) +{ + if (i < 0 || i >= heap->size) { + return 0; + } + if (j < 0 || j >= heap->size) { + return 0; + } + if (i <= j) { + if (heap->values[i] <= heap->values[j]) { + return 0; + } + } + else if (heap->values[j] <= heap->values[i]) { + return 0; + } + swap(heap->values, i, j); + return 1; +} + +static inline Py_ssize_t +parent(Py_ssize_t i) +{ + return (i - 1) / 2; +} + +static inline Py_ssize_t +left_child(Py_ssize_t i) +{ + return 2 * i + 1; +} + +static inline Py_ssize_t +right_child(Py_ssize_t i) +{ + return 2 * i + 2; +} + +static void +heap_add(_PyIndexHeap *heap, int32_t val) +{ + assert(heap->size < heap->capacity); + // Add val to end + heap->values[heap->size] = val; + heap->size++; + // Sift up + for (Py_ssize_t cur = heap->size - 1; cur > 0; cur = parent(cur)) { + if (!heap_try_swap(heap, cur, parent(cur))) { + break; + } + } +} + +static Py_ssize_t +heap_min_child(_PyIndexHeap *heap, Py_ssize_t i) +{ + if (left_child(i) < heap->size) { + if (right_child(i) < heap->size) { + Py_ssize_t lval = heap->values[left_child(i)]; + Py_ssize_t rval = heap->values[right_child(i)]; + return lval < rval ? left_child(i) : right_child(i); + } + return left_child(i); + } + else if (right_child(i) < heap->size) { + return right_child(i); + } + return -1; +} + +static int32_t +heap_pop(_PyIndexHeap *heap) +{ + assert(heap->size > 0); + // Pop smallest and replace with the last element + int32_t result = heap->values[0]; + heap->values[0] = heap->values[heap->size - 1]; + heap->size--; + // Sift down + for (Py_ssize_t cur = 0; cur < heap->size;) { + Py_ssize_t min_child = heap_min_child(heap, cur); + if (min_child > -1 && heap_try_swap(heap, cur, min_child)) { + cur = min_child; + } + else { + break; + } + } + return result; +} + +static int +heap_ensure_capacity(_PyIndexHeap *heap, Py_ssize_t limit) +{ + assert(limit > 0); + if (heap->capacity > limit) { + return 0; + } + Py_ssize_t new_capacity = heap->capacity ? heap->capacity : 1024; + while (new_capacity && new_capacity < limit) { + new_capacity <<= 1; + } + if (!new_capacity) { + return -1; + } + int32_t *new_values = PyMem_RawCalloc(new_capacity, sizeof(int32_t)); + if (new_values == NULL) { + return -1; + } + if (heap->values != NULL) { + memcpy(new_values, heap->values, heap->capacity); + PyMem_RawFree(heap->values); + } + heap->values = new_values; + heap->capacity = new_capacity; + return 0; +} + +static void +heap_fini(_PyIndexHeap *heap) +{ + if (heap->values != NULL) { + PyMem_RawFree(heap->values); + heap->values = NULL; + } + heap->size = -1; + heap->capacity = -1; +} + +#define LOCK_POOL(pool) PyMutex_LockFlags(&pool->mutex, _Py_LOCK_DONT_DETACH) +#define UNLOCK_POOL(pool) PyMutex_Unlock(&pool->mutex) + +int32_t +_PyIndexPool_AllocIndex(_PyIndexPool *pool) +{ + LOCK_POOL(pool); + int32_t index; + _PyIndexHeap *free_indices = &pool->free_indices; + if (free_indices->size == 0) { + // No free indices. Make sure the heap can always store all of the + // indices that have been allocated to avoid having to allocate memory + // (which can fail) when freeing an index. Freeing indices happens when + // threads are being destroyed, which makes error handling awkward / + // impossible. This arrangement shifts handling of allocation failures + // to when indices are allocated, which happens at thread creation, + // where we are better equipped to deal with failure. + if (heap_ensure_capacity(free_indices, pool->next_index + 1) < 0) { + UNLOCK_POOL(pool); + PyErr_NoMemory(); + return -1; + } + index = pool->next_index++; + } + else { + index = heap_pop(free_indices); + } + UNLOCK_POOL(pool); + return index; +} + +void +_PyIndexPool_FreeIndex(_PyIndexPool *pool, int32_t index) +{ + LOCK_POOL(pool); + heap_add(&pool->free_indices, index); + UNLOCK_POOL(pool); +} + +void +_PyIndexPool_Fini(_PyIndexPool *pool) +{ + heap_fini(&pool->free_indices); +} + +#endif // Py_GIL_DISABLED diff --git a/Python/initconfig.c b/Python/initconfig.c index c142438b02bfd9a..438f8a5c1cf1cec 100644 --- a/Python/initconfig.c +++ b/Python/initconfig.c @@ -134,6 +134,7 @@ static const PyConfigSpec PYCONFIG_SPEC[] = { SPEC(dump_refs_file, WSTR_OPT, READ_ONLY, NO_SYS), #ifdef Py_GIL_DISABLED SPEC(enable_gil, INT, READ_ONLY, NO_SYS), + SPEC(tlbc_enabled, INT, READ_ONLY, NO_SYS), #endif SPEC(faulthandler, BOOL, READ_ONLY, NO_SYS), SPEC(filesystem_encoding, WSTR, READ_ONLY, NO_SYS), @@ -315,8 +316,13 @@ The following implementation-specific options are available:\n\ "\ -X showrefcount: output the total reference count and number of used\n\ memory blocks when the program finishes or after each statement in\n\ - the interactive interpreter; only works on debug builds\n\ --X tracemalloc[=N]: trace Python memory allocations; N sets a traceback limit\n\ + the interactive interpreter; only works on debug builds\n" +#ifdef Py_GIL_DISABLED +"-X tlbc=[0|1]: enable (1) or disable (0) thread-local bytecode. Also\n\ + PYTHON_TLBC\n" +#endif +"\ +-X tracemalloc[=N]: trace Python memory allocations; N sets a traceback limit\n \ of N frames (default: 1); also PYTHONTRACEMALLOC=N\n\ -X utf8[=0|1]: enable (1) or disable (0) UTF-8 mode; also PYTHONUTF8\n\ -X warn_default_encoding: enable opt-in EncodingWarning for 'encoding=None';\n\ @@ -400,6 +406,9 @@ static const char usage_envvars[] = #ifdef Py_STATS "PYTHONSTATS : turns on statistics gathering (-X pystats)\n" #endif +#ifdef Py_GIL_DISABLED +"PYTHON_TLBC : when set to 0, disables thread-local bytecode (-X tlbc)\n" +#endif "PYTHONTRACEMALLOC: trace Python memory allocations (-X tracemalloc)\n" "PYTHONUNBUFFERED: disable stdout/stderr buffering (-u)\n" "PYTHONUTF8 : control the UTF-8 mode (-X utf8)\n" @@ -979,6 +988,7 @@ _PyConfig_InitCompatConfig(PyConfig *config) config->cpu_count = -1; #ifdef Py_GIL_DISABLED config->enable_gil = _PyConfig_GIL_DEFAULT; + config->tlbc_enabled = 1; #endif } @@ -1862,6 +1872,36 @@ config_init_cpu_count(PyConfig *config) "n must be greater than 0"); } +static PyStatus +config_init_tlbc(PyConfig *config) +{ +#ifdef Py_GIL_DISABLED + const char *env = config_get_env(config, "PYTHON_TLBC"); + if (env) { + int enabled; + if (_Py_str_to_int(env, &enabled) < 0 || (enabled < 0) || (enabled > 1)) { + return _PyStatus_ERR( + "PYTHON_TLBC=N: N is missing or invalid"); + } + config->tlbc_enabled = enabled; + } + + const wchar_t *xoption = config_get_xoption(config, L"tlbc"); + if (xoption) { + int enabled; + const wchar_t *sep = wcschr(xoption, L'='); + if (!sep || (config_wstr_to_int(sep + 1, &enabled) < 0) || (enabled < 0) || (enabled > 1)) { + return _PyStatus_ERR( + "-X tlbc=n: n is missing or invalid"); + } + config->tlbc_enabled = enabled; + } + return _PyStatus_OK(); +#else + return _PyStatus_OK(); +#endif +} + static PyStatus config_init_perf_profiling(PyConfig *config) { @@ -2111,6 +2151,11 @@ config_read_complex_options(PyConfig *config) } #endif + status = config_init_tlbc(config); + if (_PyStatus_EXCEPTION(status)) { + return status; + } + return _PyStatus_OK(); } diff --git a/Python/instrumentation.c b/Python/instrumentation.c index d4568764117563c..87c2addaf809eb2 100644 --- a/Python/instrumentation.c +++ b/Python/instrumentation.c @@ -44,10 +44,24 @@ #define UNLOCK_CODE() Py_END_CRITICAL_SECTION() +#define MODIFY_BYTECODE(code, func, ...) \ + do { \ + PyCodeObject *co = (code); \ + for (Py_ssize_t i = 0; i < code->co_tlbc->size; i++) { \ + char *bc = co->co_tlbc->entries[i]; \ + if (bc == NULL) { \ + continue; \ + } \ + (func)((_Py_CODEUNIT *)bc, __VA_ARGS__); \ + } \ + } while (0) + #else #define LOCK_CODE(code) #define UNLOCK_CODE() +#define MODIFY_BYTECODE(code, func, ...) \ + (func)(_PyCode_CODE(code), __VA_ARGS__) #endif @@ -309,7 +323,8 @@ _PyInstruction_GetLength(PyCodeObject *code, int offset) { ASSERT_WORLD_STOPPED_OR_LOCKED(code); - int opcode = _PyCode_CODE(code)[offset].op.code; + int opcode = + FT_ATOMIC_LOAD_UINT8_RELAXED(_PyCode_CODE(code)[offset].op.code); assert(opcode != 0); assert(opcode != RESERVED); if (opcode == INSTRUMENTED_LINE) { @@ -578,7 +593,9 @@ sanity_check_instrumentation(PyCodeObject *code) _Py_CODEUNIT _Py_GetBaseCodeUnit(PyCodeObject *code, int i) { - _Py_CODEUNIT inst = _PyCode_CODE(code)[i]; + _Py_CODEUNIT *src_instr = _PyCode_CODE(code) + i; + _Py_CODEUNIT inst = { + .cache = FT_ATOMIC_LOAD_UINT16_RELAXED(*(uint16_t *)src_instr)}; int opcode = inst.op.code; if (opcode < MIN_INSTRUMENTED_OPCODE) { inst.op.code = _PyOpcode_Deopt[opcode]; @@ -614,21 +631,22 @@ _Py_GetBaseCodeUnit(PyCodeObject *code, int i) } static void -de_instrument(PyCodeObject *code, int i, int event) +de_instrument(_Py_CODEUNIT *bytecode, _PyCoMonitoringData *monitoring, int i, + int event) { assert(event != PY_MONITORING_EVENT_INSTRUCTION); assert(event != PY_MONITORING_EVENT_LINE); - _Py_CODEUNIT *instr = &_PyCode_CODE(code)[i]; + _Py_CODEUNIT *instr = &bytecode[i]; uint8_t *opcode_ptr = &instr->op.code; int opcode = *opcode_ptr; assert(opcode != ENTER_EXECUTOR); if (opcode == INSTRUMENTED_LINE) { - opcode_ptr = &code->_co_monitoring->lines[i].original_opcode; + opcode_ptr = &monitoring->lines[i].original_opcode; opcode = *opcode_ptr; } if (opcode == INSTRUMENTED_INSTRUCTION) { - opcode_ptr = &code->_co_monitoring->per_instruction_opcodes[i]; + opcode_ptr = &monitoring->per_instruction_opcodes[i]; opcode = *opcode_ptr; } int deinstrumented = DE_INSTRUMENT[opcode]; @@ -644,65 +662,68 @@ de_instrument(PyCodeObject *code, int i, int event) } static void -de_instrument_line(PyCodeObject *code, int i) +de_instrument_line(_Py_CODEUNIT *bytecode, _PyCoMonitoringData *monitoring, + int i) { - _Py_CODEUNIT *instr = &_PyCode_CODE(code)[i]; + _Py_CODEUNIT *instr = &bytecode[i]; int opcode = instr->op.code; if (opcode != INSTRUMENTED_LINE) { return; } - _PyCoLineInstrumentationData *lines = &code->_co_monitoring->lines[i]; + _PyCoLineInstrumentationData *lines = &monitoring->lines[i]; int original_opcode = lines->original_opcode; if (original_opcode == INSTRUMENTED_INSTRUCTION) { - lines->original_opcode = code->_co_monitoring->per_instruction_opcodes[i]; + lines->original_opcode = monitoring->per_instruction_opcodes[i]; } CHECK(original_opcode != 0); CHECK(original_opcode == _PyOpcode_Deopt[original_opcode]); - instr->op.code = original_opcode; + FT_ATOMIC_STORE_UINT8(instr->op.code, original_opcode); if (_PyOpcode_Caches[original_opcode]) { - instr[1].counter = adaptive_counter_warmup(); + FT_ATOMIC_STORE_UINT16_RELAXED(instr[1].counter.value_and_backoff, + adaptive_counter_warmup().value_and_backoff); } assert(instr->op.code != INSTRUMENTED_LINE); } static void -de_instrument_per_instruction(PyCodeObject *code, int i) +de_instrument_per_instruction(_Py_CODEUNIT *bytecode, + _PyCoMonitoringData *monitoring, int i) { - _Py_CODEUNIT *instr = &_PyCode_CODE(code)[i]; + _Py_CODEUNIT *instr = &bytecode[i]; uint8_t *opcode_ptr = &instr->op.code; int opcode = *opcode_ptr; if (opcode == INSTRUMENTED_LINE) { - opcode_ptr = &code->_co_monitoring->lines[i].original_opcode; + opcode_ptr = &monitoring->lines[i].original_opcode; opcode = *opcode_ptr; } if (opcode != INSTRUMENTED_INSTRUCTION) { return; } - int original_opcode = code->_co_monitoring->per_instruction_opcodes[i]; + int original_opcode = monitoring->per_instruction_opcodes[i]; CHECK(original_opcode != 0); CHECK(original_opcode == _PyOpcode_Deopt[original_opcode]); - *opcode_ptr = original_opcode; + FT_ATOMIC_STORE_UINT8_RELAXED(*opcode_ptr, original_opcode); if (_PyOpcode_Caches[original_opcode]) { - instr[1].counter = adaptive_counter_warmup(); + FT_ATOMIC_STORE_UINT16_RELAXED(instr[1].counter.value_and_backoff, + adaptive_counter_warmup().value_and_backoff); } assert(*opcode_ptr != INSTRUMENTED_INSTRUCTION); assert(instr->op.code != INSTRUMENTED_INSTRUCTION); } - static void -instrument(PyCodeObject *code, int i) +instrument(_Py_CODEUNIT *bytecode, _PyCoMonitoringData *monitoring, int i) { - _Py_CODEUNIT *instr = &_PyCode_CODE(code)[i]; + _Py_CODEUNIT *instr = &bytecode[i]; uint8_t *opcode_ptr = &instr->op.code; int opcode =*opcode_ptr; if (opcode == INSTRUMENTED_LINE) { - _PyCoLineInstrumentationData *lines = &code->_co_monitoring->lines[i]; + _PyCoLineInstrumentationData *lines = &monitoring->lines[i]; opcode_ptr = &lines->original_opcode; opcode = *opcode_ptr; } if (opcode == INSTRUMENTED_INSTRUCTION) { - opcode_ptr = &code->_co_monitoring->per_instruction_opcodes[i]; + opcode_ptr = &monitoring->per_instruction_opcodes[i]; opcode = *opcode_ptr; CHECK(opcode != INSTRUMENTED_INSTRUCTION && opcode != INSTRUMENTED_LINE); CHECK(opcode == _PyOpcode_Deopt[opcode]); @@ -716,52 +737,52 @@ instrument(PyCodeObject *code, int i) if (_PyOpcode_Caches[deopt]) { FT_ATOMIC_STORE_UINT16_RELAXED(instr[1].counter.value_and_backoff, adaptive_counter_warmup().value_and_backoff); - instr[1].counter = adaptive_counter_warmup(); } } } static void -instrument_line(PyCodeObject *code, int i) +instrument_line(_Py_CODEUNIT *bytecode, _PyCoMonitoringData *monitoring, int i) { - uint8_t *opcode_ptr = &_PyCode_CODE(code)[i].op.code; + uint8_t *opcode_ptr = &bytecode[i].op.code; int opcode = *opcode_ptr; if (opcode == INSTRUMENTED_LINE) { return; } - _PyCoLineInstrumentationData *lines = &code->_co_monitoring->lines[i]; + _PyCoLineInstrumentationData *lines = &monitoring->lines[i]; lines->original_opcode = _PyOpcode_Deopt[opcode]; CHECK(lines->original_opcode > 0); - *opcode_ptr = INSTRUMENTED_LINE; + FT_ATOMIC_STORE_UINT8_RELAXED(*opcode_ptr, INSTRUMENTED_LINE); } static void -instrument_per_instruction(PyCodeObject *code, int i) +instrument_per_instruction(_Py_CODEUNIT *bytecode, + _PyCoMonitoringData *monitoring, int i) { - _Py_CODEUNIT *instr = &_PyCode_CODE(code)[i]; + _Py_CODEUNIT *instr = &bytecode[i]; uint8_t *opcode_ptr = &instr->op.code; int opcode = *opcode_ptr; if (opcode == INSTRUMENTED_LINE) { - _PyCoLineInstrumentationData *lines = &code->_co_monitoring->lines[i]; + _PyCoLineInstrumentationData *lines = &monitoring->lines[i]; opcode_ptr = &lines->original_opcode; opcode = *opcode_ptr; } if (opcode == INSTRUMENTED_INSTRUCTION) { - assert(code->_co_monitoring->per_instruction_opcodes[i] > 0); + assert(monitoring->per_instruction_opcodes[i] > 0); return; } CHECK(opcode != 0); if (is_instrumented(opcode)) { - code->_co_monitoring->per_instruction_opcodes[i] = opcode; + monitoring->per_instruction_opcodes[i] = opcode; } else { assert(opcode != 0); assert(_PyOpcode_Deopt[opcode] != 0); assert(_PyOpcode_Deopt[opcode] != RESUME); - code->_co_monitoring->per_instruction_opcodes[i] = _PyOpcode_Deopt[opcode]; + monitoring->per_instruction_opcodes[i] = _PyOpcode_Deopt[opcode]; } - assert(code->_co_monitoring->per_instruction_opcodes[i] > 0); - *opcode_ptr = INSTRUMENTED_INSTRUCTION; + assert(monitoring->per_instruction_opcodes[i] > 0); + FT_ATOMIC_STORE_UINT8_RELAXED(*opcode_ptr, INSTRUMENTED_INSTRUCTION); } static void @@ -773,19 +794,19 @@ remove_tools(PyCodeObject * code, int offset, int event, int tools) assert(PY_MONITORING_IS_INSTRUMENTED_EVENT(event)); assert(opcode_has_event(_Py_GetBaseCodeUnit(code, offset).op.code)); _PyCoMonitoringData *monitoring = code->_co_monitoring; + bool should_de_instrument; if (monitoring && monitoring->tools) { monitoring->tools[offset] &= ~tools; - if (monitoring->tools[offset] == 0) { - de_instrument(code, offset, event); - } + should_de_instrument = (monitoring->tools[offset] == 0); } else { /* Single tool */ uint8_t single_tool = code->_co_monitoring->active_monitors.tools[event]; assert(_Py_popcount32(single_tool) <= 1); - if (((single_tool & tools) == single_tool)) { - de_instrument(code, offset, event); - } + should_de_instrument = ((single_tool & tools) == single_tool); + } + if (should_de_instrument) { + MODIFY_BYTECODE(code, de_instrument, monitoring, offset, event); } } @@ -804,22 +825,23 @@ remove_line_tools(PyCodeObject * code, int offset, int tools) { ASSERT_WORLD_STOPPED_OR_LOCKED(code); - assert(code->_co_monitoring); - if (code->_co_monitoring->line_tools) + _PyCoMonitoringData *monitoring = code->_co_monitoring; + assert(monitoring); + bool should_de_instrument; + if (monitoring->line_tools) { - uint8_t *toolsptr = &code->_co_monitoring->line_tools[offset]; + uint8_t *toolsptr = &monitoring->line_tools[offset]; *toolsptr &= ~tools; - if (*toolsptr == 0 ) { - de_instrument_line(code, offset); - } + should_de_instrument = (*toolsptr == 0); } else { /* Single tool */ - uint8_t single_tool = code->_co_monitoring->active_monitors.tools[PY_MONITORING_EVENT_LINE]; + uint8_t single_tool = monitoring->active_monitors.tools[PY_MONITORING_EVENT_LINE]; assert(_Py_popcount32(single_tool) <= 1); - if (((single_tool & tools) == single_tool)) { - de_instrument_line(code, offset); - } + should_de_instrument = ((single_tool & tools) == single_tool); + } + if (should_de_instrument) { + MODIFY_BYTECODE(code, de_instrument_line, monitoring, offset); } } @@ -841,7 +863,7 @@ add_tools(PyCodeObject * code, int offset, int event, int tools) assert(_Py_popcount32(tools) == 1); assert(tools_is_subset_for_event(code, event, tools)); } - instrument(code, offset); + MODIFY_BYTECODE(code, instrument, code->_co_monitoring, offset); } static void @@ -858,7 +880,7 @@ add_line_tools(PyCodeObject * code, int offset, int tools) /* Single tool */ assert(_Py_popcount32(tools) == 1); } - instrument_line(code, offset); + MODIFY_BYTECODE(code, instrument_line, code->_co_monitoring, offset); } @@ -876,7 +898,7 @@ add_per_instruction_tools(PyCodeObject * code, int offset, int tools) /* Single tool */ assert(_Py_popcount32(tools) == 1); } - instrument_per_instruction(code, offset); + MODIFY_BYTECODE(code, instrument_per_instruction, code->_co_monitoring, offset); } @@ -885,21 +907,22 @@ remove_per_instruction_tools(PyCodeObject * code, int offset, int tools) { ASSERT_WORLD_STOPPED_OR_LOCKED(code); + _PyCoMonitoringData *monitoring = code->_co_monitoring; assert(code->_co_monitoring); + bool should_de_instrument; if (code->_co_monitoring->per_instruction_tools) { uint8_t *toolsptr = &code->_co_monitoring->per_instruction_tools[offset]; *toolsptr &= ~tools; - if (*toolsptr == 0) { - de_instrument_per_instruction(code, offset); - } + should_de_instrument = (*toolsptr == 0); } else { /* Single tool */ uint8_t single_tool = code->_co_monitoring->active_monitors.tools[PY_MONITORING_EVENT_INSTRUCTION]; assert(_Py_popcount32(single_tool) <= 1); - if (((single_tool & tools) == single_tool)) { - de_instrument_per_instruction(code, offset); - } + should_de_instrument = ((single_tool & tools) == single_tool); + } + if (should_de_instrument) { + MODIFY_BYTECODE(code, de_instrument_per_instruction, monitoring, offset); } } @@ -1087,7 +1110,7 @@ call_instrumentation_vector( PyCodeObject *code = _PyFrame_GetCode(frame); assert(args[1] == NULL); args[1] = (PyObject *)code; - int offset = (int)(instr - _PyCode_CODE(code)); + int offset = (int)(instr - _PyFrame_GetBytecode(frame)); /* Offset visible to user should be the offset in bytes, as that is the * convention for APIs involving code offsets. */ int bytes_offset = offset * (int)sizeof(_Py_CODEUNIT); @@ -1173,8 +1196,7 @@ _Py_call_instrumentation_jump( assert(event == PY_MONITORING_EVENT_JUMP || event == PY_MONITORING_EVENT_BRANCH); assert(frame->instr_ptr == instr); - PyCodeObject *code = _PyFrame_GetCode(frame); - int to = (int)(target - _PyCode_CODE(code)); + int to = (int)(target - _PyFrame_GetBytecode(frame)); PyObject *to_obj = PyLong_FromLong(to * (int)sizeof(_Py_CODEUNIT)); if (to_obj == NULL) { return NULL; @@ -1240,7 +1262,8 @@ _Py_call_instrumentation_line(PyThreadState *tstate, _PyInterpreterFrame* frame, PyCodeObject *code = _PyFrame_GetCode(frame); assert(tstate->tracing == 0); assert(debug_check_sanity(tstate->interp, code)); - int i = (int)(instr - _PyCode_CODE(code)); + _Py_CODEUNIT *bytecode = _PyFrame_GetBytecode(frame); + int i = (int)(instr - bytecode); _PyCoMonitoringData *monitoring = code->_co_monitoring; _PyCoLineInstrumentationData *line_data = &monitoring->lines[i]; @@ -1256,10 +1279,10 @@ _Py_call_instrumentation_line(PyThreadState *tstate, _PyInterpreterFrame* frame, line = compute_line(code, i, line_delta); assert(line >= 0); assert(prev != NULL); - int prev_index = (int)(prev - _PyCode_CODE(code)); + int prev_index = (int)(prev - bytecode); int prev_line = _Py_Instrumentation_GetLine(code, prev_index); if (prev_line == line) { - int prev_opcode = _PyCode_CODE(code)[prev_index].op.code; + int prev_opcode = bytecode[prev_index].op.code; /* RESUME and INSTRUMENTED_RESUME are needed for the operation of * instrumentation, so must never be hidden by an INSTRUMENTED_LINE. */ @@ -1359,7 +1382,7 @@ int _Py_call_instrumentation_instruction(PyThreadState *tstate, _PyInterpreterFrame* frame, _Py_CODEUNIT *instr) { PyCodeObject *code = _PyFrame_GetCode(frame); - int offset = (int)(instr - _PyCode_CODE(code)); + int offset = (int)(instr - _PyFrame_GetBytecode(frame)); _PyCoMonitoringData *instrumentation_data = code->_co_monitoring; assert(instrumentation_data->per_instruction_opcodes); int next_opcode = instrumentation_data->per_instruction_opcodes[offset]; diff --git a/Python/optimizer_cases.c.h b/Python/optimizer_cases.c.h index 0a7e44ef78dda9c..54821b23716eeb5 100644 --- a/Python/optimizer_cases.c.h +++ b/Python/optimizer_cases.c.h @@ -17,6 +17,8 @@ /* _QUICKEN_RESUME is not a viable micro-op for tier 2 */ + /* _LOAD_BYTECODE is not a viable micro-op for tier 2 */ + case _RESUME_CHECK: { break; } diff --git a/Python/pystate.c b/Python/pystate.c index 36b31f3b9e42005..ded5fde9c4bb511 100644 --- a/Python/pystate.c +++ b/Python/pystate.c @@ -1513,6 +1513,11 @@ new_threadstate(PyInterpreterState *interp, int whence) PyMem_RawFree(new_tstate); return NULL; } + int32_t tlbc_idx = _Py_ReserveTLBCIndex(interp); + if (tlbc_idx < 0) { + PyMem_RawFree(new_tstate); + return NULL; + } #endif /* We serialize concurrent creation to protect global state. */ @@ -1555,6 +1560,7 @@ new_threadstate(PyInterpreterState *interp, int whence) #ifdef Py_GIL_DISABLED // Must be called with lock unlocked to avoid lock ordering deadlocks. _Py_qsbr_register(tstate, interp, qsbr_idx); + tstate->tlbc_index = tlbc_idx; #endif return (PyThreadState *)tstate; @@ -1706,6 +1712,10 @@ PyThreadState_Clear(PyThreadState *tstate) // Remove ourself from the biased reference counting table of threads. _Py_brc_remove_thread(tstate); + + // Release our thread-local copies of the bytecode for reuse by another + // thread + _Py_ClearTLBCIndex((_PyThreadStateImpl *)tstate); #endif // Merge our queue of pointers to be freed into the interpreter queue. diff --git a/Python/specialize.c b/Python/specialize.c index ae47809305a3007..86cb997ca2ced3a 100644 --- a/Python/specialize.c +++ b/Python/specialize.c @@ -24,6 +24,25 @@ extern const char *_PyUOpName(int index); * ./adaptive.md */ +#ifdef Py_GIL_DISABLED +#define SET_OPCODE_OR_RETURN(instr, opcode) \ + do { \ + uint8_t old_op = _Py_atomic_load_uint8_relaxed(&(instr)->op.code); \ + if (old_op >= MIN_INSTRUMENTED_OPCODE) { \ + /* Lost race with instrumentation */ \ + return; \ + } \ + if (!_Py_atomic_compare_exchange_uint8(&(instr)->op.code, &old_op, \ + (opcode))) { \ + /* Lost race with instrumentation */ \ + assert(old_op >= MIN_INSTRUMENTED_OPCODE); \ + return; \ + } \ + } while (0) +#else +#define SET_OPCODE_OR_RETURN(instr, opcode) (instr)->op.code = (opcode) +#endif + #ifdef Py_STATS GCStats _py_gc_stats[NUM_GENERATIONS] = { 0 }; static PyStats _Py_stats_struct = { .gc_stats = _py_gc_stats }; @@ -436,16 +455,25 @@ do { \ # define SPECIALIZATION_FAIL(opcode, kind) ((void)0) #endif -// Initialize warmup counters and insert superinstructions. This cannot fail. +// Initialize warmup counters and optimize instructions. This cannot fail. void -_PyCode_Quicken(PyCodeObject *code) +_PyCode_Quicken(_Py_CODEUNIT *instructions, Py_ssize_t size, PyObject *consts, + int enable_counters) { - #if ENABLE_SPECIALIZATION + #if ENABLE_SPECIALIZATION_FT + _Py_BackoffCounter jump_counter, adaptive_counter; + if (enable_counters) { + jump_counter = initial_jump_backoff_counter(); + adaptive_counter = adaptive_counter_warmup(); + } + else { + jump_counter = initial_unreachable_backoff_counter(); + adaptive_counter = initial_unreachable_backoff_counter(); + } int opcode = 0; int oparg = 0; - _Py_CODEUNIT *instructions = _PyCode_CODE(code); /* The last code unit cannot have a cache, so we don't need to check it */ - for (int i = 0; i < Py_SIZE(code)-1; i++) { + for (Py_ssize_t i = 0; i < size-1; i++) { opcode = instructions[i].op.code; int caches = _PyOpcode_Caches[opcode]; oparg = (oparg << 8) | instructions[i].op.arg; @@ -453,7 +481,7 @@ _PyCode_Quicken(PyCodeObject *code) // The initial value depends on the opcode switch (opcode) { case JUMP_BACKWARD: - instructions[i + 1].counter = initial_jump_backoff_counter(); + instructions[i + 1].counter = jump_counter; break; case POP_JUMP_IF_FALSE: case POP_JUMP_IF_TRUE: @@ -462,7 +490,7 @@ _PyCode_Quicken(PyCodeObject *code) instructions[i + 1].cache = 0x5555; // Alternating 0, 1 bits break; default: - instructions[i + 1].counter = adaptive_counter_warmup(); + instructions[i + 1].counter = adaptive_counter; break; } i += caches; @@ -471,7 +499,7 @@ _PyCode_Quicken(PyCodeObject *code) /* We can't do this in the bytecode compiler as * marshalling can intern strings and make them immortal. */ - PyObject *obj = PyTuple_GET_ITEM(code->co_consts, oparg); + PyObject *obj = PyTuple_GET_ITEM(consts, oparg); if (_Py_IsImmortal(obj)) { instructions[i].op.code = LOAD_CONST_IMMORTAL; } @@ -480,7 +508,7 @@ _PyCode_Quicken(PyCodeObject *code) oparg = 0; } } - #endif /* ENABLE_SPECIALIZATION */ + #endif /* ENABLE_SPECIALIZATION_FT */ } #define SIMPLE_FUNCTION 0 @@ -2243,9 +2271,10 @@ _Py_Specialize_BinaryOp(_PyStackRef lhs_st, _PyStackRef rhs_st, _Py_CODEUNIT *in { PyObject *lhs = PyStackRef_AsPyObjectBorrow(lhs_st); PyObject *rhs = PyStackRef_AsPyObjectBorrow(rhs_st); - assert(ENABLE_SPECIALIZATION); + assert(ENABLE_SPECIALIZATION_FT); assert(_PyOpcode_Caches[BINARY_OP] == INLINE_CACHE_ENTRIES_BINARY_OP); _PyBinaryOpCache *cache = (_PyBinaryOpCache *)(instr + 1); + uint8_t specialized_op; switch (oparg) { case NB_ADD: case NB_INPLACE_ADD: @@ -2256,18 +2285,18 @@ _Py_Specialize_BinaryOp(_PyStackRef lhs_st, _PyStackRef rhs_st, _Py_CODEUNIT *in _Py_CODEUNIT next = instr[INLINE_CACHE_ENTRIES_BINARY_OP + 1]; bool to_store = (next.op.code == STORE_FAST); if (to_store && PyStackRef_AsPyObjectBorrow(locals[next.op.arg]) == lhs) { - instr->op.code = BINARY_OP_INPLACE_ADD_UNICODE; + specialized_op = BINARY_OP_INPLACE_ADD_UNICODE; goto success; } - instr->op.code = BINARY_OP_ADD_UNICODE; + specialized_op = BINARY_OP_ADD_UNICODE; goto success; } if (PyLong_CheckExact(lhs)) { - instr->op.code = BINARY_OP_ADD_INT; + specialized_op = BINARY_OP_ADD_INT; goto success; } if (PyFloat_CheckExact(lhs)) { - instr->op.code = BINARY_OP_ADD_FLOAT; + specialized_op = BINARY_OP_ADD_FLOAT; goto success; } break; @@ -2277,11 +2306,11 @@ _Py_Specialize_BinaryOp(_PyStackRef lhs_st, _PyStackRef rhs_st, _Py_CODEUNIT *in break; } if (PyLong_CheckExact(lhs)) { - instr->op.code = BINARY_OP_MULTIPLY_INT; + specialized_op = BINARY_OP_MULTIPLY_INT; goto success; } if (PyFloat_CheckExact(lhs)) { - instr->op.code = BINARY_OP_MULTIPLY_FLOAT; + specialized_op = BINARY_OP_MULTIPLY_FLOAT; goto success; } break; @@ -2291,22 +2320,23 @@ _Py_Specialize_BinaryOp(_PyStackRef lhs_st, _PyStackRef rhs_st, _Py_CODEUNIT *in break; } if (PyLong_CheckExact(lhs)) { - instr->op.code = BINARY_OP_SUBTRACT_INT; + specialized_op = BINARY_OP_SUBTRACT_INT; goto success; } if (PyFloat_CheckExact(lhs)) { - instr->op.code = BINARY_OP_SUBTRACT_FLOAT; + specialized_op = BINARY_OP_SUBTRACT_FLOAT; goto success; } break; } SPECIALIZATION_FAIL(BINARY_OP, binary_op_fail_kind(oparg, lhs, rhs)); STAT_INC(BINARY_OP, failure); - instr->op.code = BINARY_OP; + SET_OPCODE_OR_RETURN(instr, BINARY_OP); cache->counter = adaptive_counter_backoff(cache->counter); return; success: STAT_INC(BINARY_OP, success); + SET_OPCODE_OR_RETURN(instr, specialized_op); cache->counter = adaptive_counter_cooldown(); } diff --git a/Python/sysmodule.c b/Python/sysmodule.c index a4abd7c3c45709b..a086bb979efa9c3 100644 --- a/Python/sysmodule.c +++ b/Python/sysmodule.c @@ -2174,6 +2174,11 @@ sys__clear_internal_caches_impl(PyObject *module) #ifdef _Py_TIER2 PyInterpreterState *interp = _PyInterpreterState_GET(); _Py_Executors_InvalidateAll(interp, 0); +#endif +#ifdef Py_GIL_DISABLED + if (_Py_ClearUnusedTLBC(_PyInterpreterState_GET()) < 0) { + return NULL; + } #endif PyType_ClearCache(); Py_RETURN_NONE; diff --git a/Tools/gdb/libpython.py b/Tools/gdb/libpython.py index 946af4be1a7589c..ed254152d7da41f 100755 --- a/Tools/gdb/libpython.py +++ b/Tools/gdb/libpython.py @@ -77,6 +77,10 @@ def _managed_dict_offset(): else: return -3 * _sizeof_void_p() +def _interp_frame_has_tlbc_index(): + interp_frame = gdb.lookup_type("_PyInterpreterFrame") + return any(field.name == "tlbc_index" for field in interp_frame.fields()) + Py_TPFLAGS_INLINE_VALUES = (1 << 2) Py_TPFLAGS_MANAGED_DICT = (1 << 4) @@ -105,6 +109,8 @@ def _managed_dict_offset(): UNABLE_READ_INFO_PYTHON_FRAME = 'Unable to read information on python frame' EVALFRAME = '_PyEval_EvalFrameDefault' +INTERP_FRAME_HAS_TLBC_INDEX = _interp_frame_has_tlbc_index() + class NullPyObjectPtr(RuntimeError): pass @@ -693,6 +699,16 @@ def parse_location_table(firstlineno, linetable): yield addr, end_addr, line addr = end_addr + +class PyCodeArrayPtr: + def __init__(self, gdbval): + self._gdbval = gdbval + + def get_entry(self, index): + assert (index >= 0) and (index < self._gdbval["size"]) + return self._gdbval["entries"][index] + + class PyCodeObjectPtr(PyObjectPtr): """ Class wrapping a gdb.Value that's a PyCodeObject* i.e. a instance @@ -1085,7 +1101,12 @@ def _f_nlocalsplus(self): def _f_lasti(self): codeunit_p = gdb.lookup_type("_Py_CODEUNIT").pointer() instr_ptr = self._gdbval["instr_ptr"] - first_instr = self._f_code().field("co_code_adaptive").cast(codeunit_p) + if INTERP_FRAME_HAS_TLBC_INDEX: + tlbc_index = self._gdbval["tlbc_index"] + code_arr = PyCodeArrayPtr(self._f_code().field("co_tlbc")) + first_instr = code_arr.get_entry(tlbc_index).cast(codeunit_p) + else: + first_instr = self._f_code().field("co_code_adaptive").cast(codeunit_p) return int(instr_ptr - first_instr) def is_shim(self): From 9b7294c3a560f43f1e26a0f48c258829076d6464 Mon Sep 17 00:00:00 2001 From: Barney Gale Date: Mon, 4 Nov 2024 19:29:57 +0000 Subject: [PATCH 05/54] GH-126363: Speed up pattern parsing in `pathlib.Path.glob()` (#126364) The implementation of `Path.glob()` does rather a hacky thing: it calls `self.with_segments()` to convert the given pattern to a `Path` object, and then peeks at the private `_raw_path` attribute to see if pathlib removed a trailing slash from the pattern. In this patch, we make `glob()` use a new `_parse_pattern()` classmethod that splits the pattern into parts while preserving information about any trailing slash. This skips the cost of creating a `Path` object, and avoids some path anchor normalization, which makes `Path.glob()` slightly faster. But mostly it's about making the code less naughty. Co-authored-by: Tomas R. --- Lib/pathlib/_local.py | 41 ++++++++++++------- ...-11-03-14-43-51.gh-issue-126363.Xus7vU.rst | 2 + 2 files changed, 29 insertions(+), 14 deletions(-) create mode 100644 Misc/NEWS.d/next/Library/2024-11-03-14-43-51.gh-issue-126363.Xus7vU.rst diff --git a/Lib/pathlib/_local.py b/Lib/pathlib/_local.py index ef072b83d969045..99474e1f71a3073 100644 --- a/Lib/pathlib/_local.py +++ b/Lib/pathlib/_local.py @@ -274,6 +274,31 @@ def _parse_path(cls, path): root = sep return drv, root, [x for x in rel.split(sep) if x and x != '.'] + @classmethod + def _parse_pattern(cls, pattern): + """Parse a glob pattern to a list of parts. This is much like + _parse_path, except: + + - Rather than normalizing and returning the drive and root, we raise + NotImplementedError if either are present. + - If the path has no real parts, we raise ValueError. + - If the path ends in a slash, then a final empty part is added. + """ + drv, root, rel = cls.parser.splitroot(pattern) + if root or drv: + raise NotImplementedError("Non-relative patterns are unsupported") + sep = cls.parser.sep + altsep = cls.parser.altsep + if altsep: + rel = rel.replace(altsep, sep) + parts = [x for x in rel.split(sep) if x and x != '.'] + if not parts: + raise ValueError(f"Unacceptable pattern: {str(pattern)!r}") + elif rel.endswith(sep): + # GH-65238: preserve trailing slash in glob patterns. + parts.append('') + return parts + @property def _raw_path(self): """The joined but unnormalized path.""" @@ -641,17 +666,7 @@ def glob(self, pattern, *, case_sensitive=None, recurse_symlinks=False): kind, including directories) matching the given relative pattern. """ sys.audit("pathlib.Path.glob", self, pattern) - if not isinstance(pattern, PurePath): - pattern = self.with_segments(pattern) - if pattern.anchor: - raise NotImplementedError("Non-relative patterns are unsupported") - parts = pattern._tail.copy() - if not parts: - raise ValueError("Unacceptable pattern: {!r}".format(pattern)) - raw = pattern._raw_path - if raw[-1] in (self.parser.sep, self.parser.altsep): - # GH-65238: pathlib doesn't preserve trailing slash. Add it back. - parts.append('') + parts = self._parse_pattern(pattern) select = self._glob_selector(parts[::-1], case_sensitive, recurse_symlinks) root = str(self) paths = select(root) @@ -672,9 +687,7 @@ def rglob(self, pattern, *, case_sensitive=None, recurse_symlinks=False): this subtree. """ sys.audit("pathlib.Path.rglob", self, pattern) - if not isinstance(pattern, PurePath): - pattern = self.with_segments(pattern) - pattern = '**' / pattern + pattern = self.parser.join('**', pattern) return self.glob(pattern, case_sensitive=case_sensitive, recurse_symlinks=recurse_symlinks) def walk(self, top_down=True, on_error=None, follow_symlinks=False): diff --git a/Misc/NEWS.d/next/Library/2024-11-03-14-43-51.gh-issue-126363.Xus7vU.rst b/Misc/NEWS.d/next/Library/2024-11-03-14-43-51.gh-issue-126363.Xus7vU.rst new file mode 100644 index 000000000000000..20fea9b9ef99a03 --- /dev/null +++ b/Misc/NEWS.d/next/Library/2024-11-03-14-43-51.gh-issue-126363.Xus7vU.rst @@ -0,0 +1,2 @@ +Speed up pattern parsing in :meth:`pathlib.Path.glob` by skipping creation +of a :class:`pathlib.Path` object for the pattern. From 532fc08102d62c04d55f5b8aac00bd9e7e12ff4b Mon Sep 17 00:00:00 2001 From: "Erlend E. Aasland" Date: Mon, 4 Nov 2024 21:48:09 +0100 Subject: [PATCH 06/54] gh-89640: Hardcode WASM float word ordering as little endian (#126387) --- ...4-11-04-09-42-04.gh-issue-89640.QBv05o.rst | 1 + configure | 47 ++++++++----------- configure.ac | 42 ++++++++--------- pyconfig.h.in | 4 -- 4 files changed, 41 insertions(+), 53 deletions(-) create mode 100644 Misc/NEWS.d/next/Build/2024-11-04-09-42-04.gh-issue-89640.QBv05o.rst diff --git a/Misc/NEWS.d/next/Build/2024-11-04-09-42-04.gh-issue-89640.QBv05o.rst b/Misc/NEWS.d/next/Build/2024-11-04-09-42-04.gh-issue-89640.QBv05o.rst new file mode 100644 index 000000000000000..4fa44a1d6493b43 --- /dev/null +++ b/Misc/NEWS.d/next/Build/2024-11-04-09-42-04.gh-issue-89640.QBv05o.rst @@ -0,0 +1 @@ +Hard-code float word ordering as little endian on WASM. diff --git a/configure b/configure index 1097747e0551792..e529527214da291 100755 --- a/configure +++ b/configure @@ -24227,41 +24227,34 @@ printf "%s\n" "$ax_cv_c_float_words_bigendian" >&6; } case $ax_cv_c_float_words_bigendian in yes) -printf "%s\n" "#define FLOAT_WORDS_BIGENDIAN 1" >>confdefs.h +printf "%s\n" "#define DOUBLE_IS_BIG_ENDIAN_IEEE754 1" >>confdefs.h ;; no) - ;; - *) - as_fn_error $? " - -Unknown float word ordering. You need to manually preset -ax_cv_c_float_words_bigendian=no (or yes) according to your system. - - " "$LINENO" 5 ;; -esac +printf "%s\n" "#define DOUBLE_IS_LITTLE_ENDIAN_IEEE754 1" >>confdefs.h + ;; + *) + case $host_cpu in #( + *arm*) : + # Some ARM platforms use a mixed-endian representation for + # doubles. While Python doesn't currently have full support + # for these platforms (see e.g., issue 1762561), we can at + # least make sure that float <-> string conversions work. + # FLOAT_WORDS_BIGENDIAN doesn't actually detect this case, + # but if it's not big or little, then it must be this? -if test "$ax_cv_c_float_words_bigendian" = "yes" -then - -printf "%s\n" "#define DOUBLE_IS_BIG_ENDIAN_IEEE754 1" >>confdefs.h - -elif test "$ax_cv_c_float_words_bigendian" = "no" -then +printf "%s\n" "#define DOUBLE_IS_ARM_MIXED_ENDIAN_IEEE754 1" >>confdefs.h + ;; #( + wasm*) : printf "%s\n" "#define DOUBLE_IS_LITTLE_ENDIAN_IEEE754 1" >>confdefs.h + ;; #( + *) : + ;; +esac ;; +esac -else - # Some ARM platforms use a mixed-endian representation for doubles. - # While Python doesn't currently have full support for these platforms - # (see e.g., issue 1762561), we can at least make sure that float <-> string - # conversions work. - # FLOAT_WORDS_BIGENDIAN doesn't actually detect this case, but if it's not big - # or little, then it must be this? - -printf "%s\n" "#define DOUBLE_IS_ARM_MIXED_ENDIAN_IEEE754 1" >>confdefs.h -fi # The short float repr introduced in Python 3.1 requires the # correctly-rounded string <-> double conversion functions from diff --git a/configure.ac b/configure.ac index 6d514705e91ce54..bc67a0596ac2b43 100644 --- a/configure.ac +++ b/configure.ac @@ -5946,28 +5946,26 @@ AS_VAR_IF([ac_cv_gcc_asm_for_x64], [yes], [ # * Check for various properties of floating point * # ************************************************** -AX_C_FLOAT_WORDS_BIGENDIAN -if test "$ax_cv_c_float_words_bigendian" = "yes" -then - AC_DEFINE([DOUBLE_IS_BIG_ENDIAN_IEEE754], [1], - [Define if C doubles are 64-bit IEEE 754 binary format, stored - with the most significant byte first]) -elif test "$ax_cv_c_float_words_bigendian" = "no" -then - AC_DEFINE([DOUBLE_IS_LITTLE_ENDIAN_IEEE754], [1], - [Define if C doubles are 64-bit IEEE 754 binary format, stored - with the least significant byte first]) -else - # Some ARM platforms use a mixed-endian representation for doubles. - # While Python doesn't currently have full support for these platforms - # (see e.g., issue 1762561), we can at least make sure that float <-> string - # conversions work. - # FLOAT_WORDS_BIGENDIAN doesn't actually detect this case, but if it's not big - # or little, then it must be this? - AC_DEFINE([DOUBLE_IS_ARM_MIXED_ENDIAN_IEEE754], [1], - [Define if C doubles are 64-bit IEEE 754 binary format, stored - in ARM mixed-endian order (byte order 45670123)]) -fi +AX_C_FLOAT_WORDS_BIGENDIAN( + [AC_DEFINE([DOUBLE_IS_BIG_ENDIAN_IEEE754], [1], + [Define if C doubles are 64-bit IEEE 754 binary format, + stored with the most significant byte first])], + [AC_DEFINE([DOUBLE_IS_LITTLE_ENDIAN_IEEE754], [1], + [Define if C doubles are 64-bit IEEE 754 binary format, + stored with the least significant byte first])], + [AS_CASE([$host_cpu], + [*arm*], [# Some ARM platforms use a mixed-endian representation for + # doubles. While Python doesn't currently have full support + # for these platforms (see e.g., issue 1762561), we can at + # least make sure that float <-> string conversions work. + # FLOAT_WORDS_BIGENDIAN doesn't actually detect this case, + # but if it's not big or little, then it must be this? + AC_DEFINE([DOUBLE_IS_ARM_MIXED_ENDIAN_IEEE754], [1], + [Define if C doubles are 64-bit IEEE 754 binary format, + stored in ARM mixed-endian order (byte order 45670123)])], + [wasm*], [AC_DEFINE([DOUBLE_IS_LITTLE_ENDIAN_IEEE754], [1], + [Define if C doubles are 64-bit IEEE 754 binary format, + stored with the least significant byte first])])]) # The short float repr introduced in Python 3.1 requires the # correctly-rounded string <-> double conversion functions from diff --git a/pyconfig.h.in b/pyconfig.h.in index fcb8a965b1e4769..924d86627b0e9ba 100644 --- a/pyconfig.h.in +++ b/pyconfig.h.in @@ -47,10 +47,6 @@ /* Define if --enable-ipv6 is specified */ #undef ENABLE_IPV6 -/* Define to 1 if your system stores words within floats with the most - significant word first */ -#undef FLOAT_WORDS_BIGENDIAN - /* Define if getpgrp() must be called as getpgrp(0). */ #undef GETPGRP_HAVE_ARG From 78015818c2601db842d101cad6ce2319c921935f Mon Sep 17 00:00:00 2001 From: Kirill Podoprigora Date: Tue, 5 Nov 2024 04:12:31 +0200 Subject: [PATCH 07/54] gh-126415: Fix conversion warning in `Python/bytecodes.c` (#126416) Fix conversion warning in bytecodes Co-authored-by: mpage --- Python/bytecodes.c | 2 +- Python/generated_cases.c.h | 4 ++-- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/Python/bytecodes.c b/Python/bytecodes.c index 2c78cb9931733d2..81b527e8c050b95 100644 --- a/Python/bytecodes.c +++ b/Python/bytecodes.c @@ -197,7 +197,7 @@ dummy_func( _Py_CODEUNIT *bytecode = _PyEval_GetExecutableCode(tstate, _PyFrame_GetCode(frame)); ERROR_IF(bytecode == NULL, error); - int off = this_instr - _PyFrame_GetBytecode(frame); + ptrdiff_t off = this_instr - _PyFrame_GetBytecode(frame); frame->tlbc_index = ((_PyThreadStateImpl *)tstate)->tlbc_index; frame->instr_ptr = bytecode + off; // Make sure this_instr gets reset correctley for any uops that diff --git a/Python/generated_cases.c.h b/Python/generated_cases.c.h index eff246f19972767..c6b8fbc50f388af 100644 --- a/Python/generated_cases.c.h +++ b/Python/generated_cases.c.h @@ -4841,7 +4841,7 @@ stack_pointer = _PyFrame_GetStackPointer(frame); if (bytecode == NULL) goto error; _PyFrame_SetStackPointer(frame, stack_pointer); - int off = this_instr - _PyFrame_GetBytecode(frame); + ptrdiff_t off = this_instr - _PyFrame_GetBytecode(frame); stack_pointer = _PyFrame_GetStackPointer(frame); frame->tlbc_index = ((_PyThreadStateImpl *)tstate)->tlbc_index; frame->instr_ptr = bytecode + off; @@ -6898,7 +6898,7 @@ stack_pointer = _PyFrame_GetStackPointer(frame); if (bytecode == NULL) goto error; _PyFrame_SetStackPointer(frame, stack_pointer); - int off = this_instr - _PyFrame_GetBytecode(frame); + ptrdiff_t off = this_instr - _PyFrame_GetBytecode(frame); stack_pointer = _PyFrame_GetStackPointer(frame); frame->tlbc_index = ((_PyThreadStateImpl *)tstate)->tlbc_index; frame->instr_ptr = bytecode + off; From d9602265479bcd96dc377d92a34556baf34ac3cd Mon Sep 17 00:00:00 2001 From: lit Date: Tue, 5 Nov 2024 10:58:15 +0800 Subject: [PATCH 08/54] gh-125436: Doc: Add missing ``allow_unnamed_section`` parameter to ``ConfigParser`` documentation (#125437) Add missing ``allow_unnamed_section`` parameter to ``ConfigParser`` doc, as well as to it's parent ``RawConfigParser``. Split too long line on ``ConfigParser`` signature. Add some sections about when some of ``RawConfigParser`` parameters were added. --- Doc/library/configparser.rst | 31 ++++++++++++++++++++++++++++--- 1 file changed, 28 insertions(+), 3 deletions(-) diff --git a/Doc/library/configparser.rst b/Doc/library/configparser.rst index 3aad6f7b5d2d20d..ac0f3fca3d72fdf 100644 --- a/Doc/library/configparser.rst +++ b/Doc/library/configparser.rst @@ -942,7 +942,13 @@ interpolation if an option used is not defined elsewhere. :: ConfigParser Objects -------------------- -.. class:: ConfigParser(defaults=None, dict_type=dict, allow_no_value=False, delimiters=('=', ':'), comment_prefixes=('#', ';'), inline_comment_prefixes=None, strict=True, empty_lines_in_values=True, default_section=configparser.DEFAULTSECT, interpolation=BasicInterpolation(), converters={}) +.. class:: ConfigParser(defaults=None, dict_type=dict, allow_no_value=False, *, \ + delimiters=('=', ':'), comment_prefixes=('#', ';'), \ + inline_comment_prefixes=None, strict=True, \ + empty_lines_in_values=True, \ + default_section=configparser.DEFAULTSECT, \ + interpolation=BasicInterpolation(), converters={}, \ + allow_unnamed_section=False) The main configuration parser. When *defaults* is given, it is initialized into the dictionary of intrinsic defaults. When *dict_type* is given, it @@ -990,6 +996,10 @@ ConfigParser Objects converter gets its own corresponding :meth:`!get*` method on the parser object and section proxies. + When *allow_unnamed_section* is ``True`` (default: ``False``), + the first section name can be omitted. See the + `"Unnamed Sections" section <#unnamed-sections>`_. + It is possible to read several configurations into a single :class:`ConfigParser`, where the most recently added configuration has the highest priority. Any conflicting keys are taken from the more recent @@ -1039,6 +1049,9 @@ ConfigParser Objects Raise a :exc:`MultilineContinuationError` when *allow_no_value* is ``True``, and a key without a value is continued with an indented line. + .. versionchanged:: 3.13 + The *allow_unnamed_section* argument was added. + .. method:: defaults() Return a dictionary containing the instance-wide defaults. @@ -1295,18 +1308,30 @@ RawConfigParser Objects comment_prefixes=('#', ';'), \ inline_comment_prefixes=None, strict=True, \ empty_lines_in_values=True, \ - default_section=configparser.DEFAULTSECT[, \ - interpolation]) + default_section=configparser.DEFAULTSECT, \ + interpolation=BasicInterpolation(), converters={}, \ + allow_unnamed_section=False) Legacy variant of the :class:`ConfigParser`. It has interpolation disabled by default and allows for non-string section names, option names, and values via its unsafe ``add_section`` and ``set`` methods, as well as the legacy ``defaults=`` keyword argument handling. + .. versionchanged:: 3.2 + *allow_no_value*, *delimiters*, *comment_prefixes*, *strict*, + *empty_lines_in_values*, *default_section* and *interpolation* were + added. + + .. versionchanged:: 3.5 + The *converters* argument was added. + .. versionchanged:: 3.8 The default *dict_type* is :class:`dict`, since it now preserves insertion order. + .. versionchanged:: 3.13 + The *allow_unnamed_section* argument was added. + .. note:: Consider using :class:`ConfigParser` instead which checks types of the values to be stored internally. If you don't want interpolation, you From d3840503b0f590ee574fbdf3c96626ff8b3c45f6 Mon Sep 17 00:00:00 2001 From: Serhiy Storchaka Date: Tue, 5 Nov 2024 08:23:17 +0200 Subject: [PATCH 09/54] gh-126303: Fix pickling and copying of os.sched_param objects (GH-126336) --- Include/internal/pycore_typeobject.h | 1 + Lib/test/test_posix.py | 21 +++++++++++++++++++ ...-11-02-19-20-44.gh-issue-126303.yVvyWB.rst | 1 + Modules/posixmodule.c | 17 +++++++++++++++ Objects/typeobject.c | 6 ++++++ 5 files changed, 46 insertions(+) create mode 100644 Misc/NEWS.d/next/Library/2024-11-02-19-20-44.gh-issue-126303.yVvyWB.rst diff --git a/Include/internal/pycore_typeobject.h b/Include/internal/pycore_typeobject.h index 118bc98b35d5e3f..e72592b8e98ef84 100644 --- a/Include/internal/pycore_typeobject.h +++ b/Include/internal/pycore_typeobject.h @@ -243,6 +243,7 @@ extern PyObject* _PyType_GetFullyQualifiedName(PyTypeObject *type, char sep); // self->tp_flags = (self->tp_flags & ~mask) | flags; extern void _PyType_SetFlags(PyTypeObject *self, unsigned long mask, unsigned long flags); +extern int _PyType_AddMethod(PyTypeObject *, PyMethodDef *); // Like _PyType_SetFlags(), but apply the operation to self and any of its // subclasses without Py_TPFLAGS_IMMUTABLETYPE set. diff --git a/Lib/test/test_posix.py b/Lib/test/test_posix.py index 35016b83a477fc5..ef9d617f66feec3 100644 --- a/Lib/test/test_posix.py +++ b/Lib/test/test_posix.py @@ -6,12 +6,14 @@ from test.support import warnings_helper from test.support.script_helper import assert_python_ok +import copy import errno import sys import signal import time import os import platform +import pickle import stat import tempfile import unittest @@ -1317,6 +1319,25 @@ def test_get_and_set_scheduler_and_param(self): param = posix.sched_param(sched_priority=-large) self.assertRaises(OverflowError, posix.sched_setparam, 0, param) + @requires_sched + def test_sched_param(self): + param = posix.sched_param(1) + for proto in range(pickle.HIGHEST_PROTOCOL+1): + newparam = pickle.loads(pickle.dumps(param, proto)) + self.assertEqual(newparam, param) + newparam = copy.copy(param) + self.assertIsNot(newparam, param) + self.assertEqual(newparam, param) + newparam = copy.deepcopy(param) + self.assertIsNot(newparam, param) + self.assertEqual(newparam, param) + newparam = copy.replace(param) + self.assertIsNot(newparam, param) + self.assertEqual(newparam, param) + newparam = copy.replace(param, sched_priority=0) + self.assertNotEqual(newparam, param) + self.assertEqual(newparam.sched_priority, 0) + @unittest.skipUnless(hasattr(posix, "sched_rr_get_interval"), "no function") def test_sched_rr_get_interval(self): try: diff --git a/Misc/NEWS.d/next/Library/2024-11-02-19-20-44.gh-issue-126303.yVvyWB.rst b/Misc/NEWS.d/next/Library/2024-11-02-19-20-44.gh-issue-126303.yVvyWB.rst new file mode 100644 index 000000000000000..0072c97338c2518 --- /dev/null +++ b/Misc/NEWS.d/next/Library/2024-11-02-19-20-44.gh-issue-126303.yVvyWB.rst @@ -0,0 +1 @@ +Fix pickling and copying of :class:`os.sched_param` objects. diff --git a/Modules/posixmodule.c b/Modules/posixmodule.c index bb5077cc7f0f097..1ce2baecb8a9649 100644 --- a/Modules/posixmodule.c +++ b/Modules/posixmodule.c @@ -24,6 +24,7 @@ #include "pycore_pystate.h" // _PyInterpreterState_GET() #include "pycore_signal.h" // Py_NSIG #include "pycore_time.h" // _PyLong_FromTime_t() +#include "pycore_typeobject.h" // _PyType_AddMethod() #ifdef HAVE_UNISTD_H # include // symlink() @@ -8210,6 +8211,16 @@ os_sched_param_impl(PyTypeObject *type, PyObject *sched_priority) return res; } +static PyObject * +os_sched_param_reduce(PyObject *self, PyObject *Py_UNUSED(ignored)) +{ + return Py_BuildValue("(O(N))", Py_TYPE(self), PyStructSequence_GetItem(self, 0)); +} + +static PyMethodDef os_sched_param_reduce_method = { + "__reduce__", (PyCFunction)os_sched_param_reduce, METH_NOARGS|METH_COEXIST, NULL, +}; + PyDoc_VAR(os_sched_param__doc__); static PyStructSequence_Field sched_param_fields[] = { @@ -18033,6 +18044,12 @@ posixmodule_exec(PyObject *m) return -1; } ((PyTypeObject *)state->SchedParamType)->tp_new = os_sched_param; + if (_PyType_AddMethod((PyTypeObject *)state->SchedParamType, + &os_sched_param_reduce_method) < 0) + { + return -1; + } + PyType_Modified((PyTypeObject *)state->SchedParamType); #endif /* initialize TerminalSize_info */ diff --git a/Objects/typeobject.c b/Objects/typeobject.c index 40225313a8a33b9..88db29e14b0d441 100644 --- a/Objects/typeobject.c +++ b/Objects/typeobject.c @@ -7656,6 +7656,12 @@ type_add_method(PyTypeObject *type, PyMethodDef *meth) return 0; } +int +_PyType_AddMethod(PyTypeObject *type, PyMethodDef *meth) +{ + return type_add_method(type, meth); +} + /* Add the methods from tp_methods to the __dict__ in a type object */ static int From 4a0d574273819b2b5006decb661da05b3baa8a4b Mon Sep 17 00:00:00 2001 From: Victor Stinner Date: Tue, 5 Nov 2024 08:43:34 +0100 Subject: [PATCH 10/54] gh-120057: Add os.reload_environ() function (#126268) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Replace the os.environ.refresh() method with a new os.reload_environ() function. Co-authored-by: Bénédikt Tran <10796600+picnixz@users.noreply.github.com> Co-authored-by: Adam Turner <9087854+AA-Turner@users.noreply.github.com> --- Doc/library/os.rst | 33 ++++++++++++++----- Doc/whatsnew/3.14.rst | 7 ++-- Lib/os.py | 25 +++++++------- Lib/test/test_os.py | 16 ++++----- ...-11-01-10-35-49.gh-issue-120057.YWy81Q.rst | 2 ++ 5 files changed, 52 insertions(+), 31 deletions(-) create mode 100644 Misc/NEWS.d/next/Library/2024-11-01-10-35-49.gh-issue-120057.YWy81Q.rst diff --git a/Doc/library/os.rst b/Doc/library/os.rst index f9cded40c2c7556..c0354b2280c45c0 100644 --- a/Doc/library/os.rst +++ b/Doc/library/os.rst @@ -193,10 +193,6 @@ process and user. to the environment made after this time are not reflected in :data:`os.environ`, except for changes made by modifying :data:`os.environ` directly. - The :meth:`!os.environ.refresh` method updates :data:`os.environ` with - changes to the environment made by :func:`os.putenv`, by - :func:`os.unsetenv`, or made outside Python in the same process. - This mapping may be used to modify the environment as well as query the environment. :func:`putenv` will be called automatically when the mapping is modified. @@ -226,12 +222,13 @@ process and user. :data:`os.environ`, and when one of the :meth:`pop` or :meth:`clear` methods is called. + .. seealso:: + + The :func:`os.reload_environ` function. + .. versionchanged:: 3.9 Updated to support :pep:`584`'s merge (``|``) and update (``|=``) operators. - .. versionchanged:: 3.14 - Added the :meth:`!os.environ.refresh` method. - .. data:: environb @@ -249,6 +246,24 @@ process and user. Updated to support :pep:`584`'s merge (``|``) and update (``|=``) operators. +.. function:: reload_environ() + + The :data:`os.environ` and :data:`os.environb` mappings are a cache of + environment variables at the time that Python started. + As such, changes to the current process environment are not reflected + if made outside Python, or by :func:`os.putenv` or :func:`os.unsetenv`. + Use :func:`!os.reload_environ` to update :data:`os.environ` and :data:`os.environb` + with any such changes to the current process environment. + + .. warning:: + This function is not thread-safe. Calling it while the environment is + being modified in an other thread is an undefined behavior. Reading from + :data:`os.environ` or :data:`os.environb`, or calling :func:`os.getenv` + while reloading, may return an empty result. + + .. versionadded:: next + + .. function:: chdir(path) fchdir(fd) getcwd() @@ -568,7 +583,7 @@ process and user. of :data:`os.environ`. This also applies to :func:`getenv` and :func:`getenvb`, which respectively use :data:`os.environ` and :data:`os.environb` in their implementations. - See also the :data:`os.environ.refresh() ` method. + See also the :func:`os.reload_environ` function. .. note:: @@ -818,7 +833,7 @@ process and user. don't update :data:`os.environ`, so it is actually preferable to delete items of :data:`os.environ`. - See also the :data:`os.environ.refresh() ` method. + See also the :func:`os.reload_environ` function. .. audit-event:: os.unsetenv key os.unsetenv diff --git a/Doc/whatsnew/3.14.rst b/Doc/whatsnew/3.14.rst index 80c1a93b95a6afa..9300de440cdc305 100644 --- a/Doc/whatsnew/3.14.rst +++ b/Doc/whatsnew/3.14.rst @@ -365,9 +365,10 @@ operator os -- -* Add the :data:`os.environ.refresh() ` method to update - :data:`os.environ` with changes to the environment made by :func:`os.putenv`, - by :func:`os.unsetenv`, or made outside Python in the same process. +* Add the :func:`os.reload_environ` function to update :data:`os.environ` and + :data:`os.environb` with changes to the environment made by + :func:`os.putenv`, by :func:`os.unsetenv`, or made outside Python in the + same process. (Contributed by Victor Stinner in :gh:`120057`.) diff --git a/Lib/os.py b/Lib/os.py index aaa758d955fe4c2..9c2258e6ccf5bab 100644 --- a/Lib/os.py +++ b/Lib/os.py @@ -765,17 +765,6 @@ def __ror__(self, other): new.update(self) return new - if _exists("_create_environ"): - def refresh(self): - data = _create_environ() - if name == 'nt': - data = {self.encodekey(key): value - for key, value in data.items()} - - # modify in-place to keep os.environb in sync - self._data.clear() - self._data.update(data) - def _create_environ_mapping(): if name == 'nt': # Where Env Var Names Must Be UPPERCASE @@ -810,6 +799,20 @@ def decode(value): del _create_environ_mapping +if _exists("_create_environ"): + def reload_environ(): + data = _create_environ() + if name == 'nt': + encodekey = environ.encodekey + data = {encodekey(key): value + for key, value in data.items()} + + # modify in-place to keep os.environb in sync + env_data = environ._data + env_data.clear() + env_data.update(data) + + def getenv(key, default=None): """Get an environment variable, return None if it doesn't exist. The optional second argument can specify an alternate default. diff --git a/Lib/test/test_os.py b/Lib/test/test_os.py index 307f0f11ddc33fe..9a4be78556c648a 100644 --- a/Lib/test/test_os.py +++ b/Lib/test/test_os.py @@ -1298,8 +1298,8 @@ def test_ror_operator(self): self._test_underlying_process_env('_A_', '') self._test_underlying_process_env(overridden_key, original_value) - def test_refresh(self): - # Test os.environ.refresh() + def test_reload_environ(self): + # Test os.reload_environ() has_environb = hasattr(os, 'environb') # Test with putenv() which doesn't update os.environ @@ -1309,7 +1309,7 @@ def test_refresh(self): if has_environb: self.assertEqual(os.environb[b'test_env'], b'python_value') - os.environ.refresh() + os.reload_environ() self.assertEqual(os.environ['test_env'], 'new_value') if has_environb: self.assertEqual(os.environb[b'test_env'], b'new_value') @@ -1320,28 +1320,28 @@ def test_refresh(self): if has_environb: self.assertEqual(os.environb[b'test_env'], b'new_value') - os.environ.refresh() + os.reload_environ() self.assertNotIn('test_env', os.environ) if has_environb: self.assertNotIn(b'test_env', os.environb) if has_environb: - # test os.environb.refresh() with putenv() + # test reload_environ() on os.environb with putenv() os.environb[b'test_env'] = b'python_value2' os.putenv("test_env", "new_value2") self.assertEqual(os.environb[b'test_env'], b'python_value2') self.assertEqual(os.environ['test_env'], 'python_value2') - os.environb.refresh() + os.reload_environ() self.assertEqual(os.environb[b'test_env'], b'new_value2') self.assertEqual(os.environ['test_env'], 'new_value2') - # test os.environb.refresh() with unsetenv() + # test reload_environ() on os.environb with unsetenv() os.unsetenv('test_env') self.assertEqual(os.environb[b'test_env'], b'new_value2') self.assertEqual(os.environ['test_env'], 'new_value2') - os.environb.refresh() + os.reload_environ() self.assertNotIn(b'test_env', os.environb) self.assertNotIn('test_env', os.environ) diff --git a/Misc/NEWS.d/next/Library/2024-11-01-10-35-49.gh-issue-120057.YWy81Q.rst b/Misc/NEWS.d/next/Library/2024-11-01-10-35-49.gh-issue-120057.YWy81Q.rst new file mode 100644 index 000000000000000..ded60a3f57bca3b --- /dev/null +++ b/Misc/NEWS.d/next/Library/2024-11-01-10-35-49.gh-issue-120057.YWy81Q.rst @@ -0,0 +1,2 @@ +Replace the ``os.environ.refresh()`` method with a new +:func:`os.reload_environ` function. Patch by Victor Stinner. From 3d4fda2165e7c97116e69d6efef187873b57d01f Mon Sep 17 00:00:00 2001 From: Lukas Geiger Date: Tue, 5 Nov 2024 07:53:32 +0000 Subject: [PATCH 11/54] gh-119793: Prefer `map(..., strict=True)` over starmap/zip in examples (#126407) --- Doc/library/math.rst | 2 +- Modules/clinic/mathmodule.c.h | 4 ++-- Modules/mathmodule.c | 4 ++-- 3 files changed, 5 insertions(+), 5 deletions(-) diff --git a/Doc/library/math.rst b/Doc/library/math.rst index 2ecee89a7db165b..5ce2ad2d6aec475 100644 --- a/Doc/library/math.rst +++ b/Doc/library/math.rst @@ -406,7 +406,7 @@ Number-theoretic and representation functions Roughly equivalent to:: - sum(itertools.starmap(operator.mul, zip(p, q, strict=True))) + sum(map(operator.mul, p, q, strict=True)) For float and mixed int/float inputs, the intermediate products and sums are computed with extended precision. diff --git a/Modules/clinic/mathmodule.c.h b/Modules/clinic/mathmodule.c.h index 7d0b98d5502267e..e4bda8a3e62aba4 100644 --- a/Modules/clinic/mathmodule.c.h +++ b/Modules/clinic/mathmodule.c.h @@ -457,7 +457,7 @@ PyDoc_STRVAR(math_sumprod__doc__, "\n" "Roughly equivalent to:\n" "\n" -" sum(itertools.starmap(operator.mul, zip(p, q, strict=True)))\n" +" sum(map(operator.mul, p, q, strict=True))\n" "\n" "For float and mixed int/float inputs, the intermediate products\n" "and sums are computed with extended precision."); @@ -1109,4 +1109,4 @@ math_ulp(PyObject *module, PyObject *arg) exit: return return_value; } -/*[clinic end generated code: output=ee0a2f6bd1220061 input=a9049054013a1b77]*/ +/*[clinic end generated code: output=ff99a737c18d9210 input=a9049054013a1b77]*/ diff --git a/Modules/mathmodule.c b/Modules/mathmodule.c index 7e8d8b3f5bafa23..77f50a2001634b6 100644 --- a/Modules/mathmodule.c +++ b/Modules/mathmodule.c @@ -2710,7 +2710,7 @@ Return the sum of products of values from two iterables p and q. Roughly equivalent to: - sum(itertools.starmap(operator.mul, zip(p, q, strict=True))) + sum(map(operator.mul, p, q, strict=True)) For float and mixed int/float inputs, the intermediate products and sums are computed with extended precision. @@ -2718,7 +2718,7 @@ and sums are computed with extended precision. static PyObject * math_sumprod_impl(PyObject *module, PyObject *p, PyObject *q) -/*[clinic end generated code: output=6722dbfe60664554 input=82be54fe26f87e30]*/ +/*[clinic end generated code: output=6722dbfe60664554 input=a2880317828c61d2]*/ { PyObject *p_i = NULL, *q_i = NULL, *term_i = NULL, *new_total = NULL; PyObject *p_it, *q_it, *total; From 407c0366d9ccd2a36c6cc8bf92324856b16fd604 Mon Sep 17 00:00:00 2001 From: Richard Hansen Date: Tue, 5 Nov 2024 04:48:46 -0500 Subject: [PATCH 12/54] Doc: C API: Delete claim that `PyObject_Init` is GC-aware (#126418) --- Doc/c-api/allocation.rst | 6 ++---- 1 file changed, 2 insertions(+), 4 deletions(-) diff --git a/Doc/c-api/allocation.rst b/Doc/c-api/allocation.rst index 0d53b18ea87d5e2..b7e0f22b52c57e9 100644 --- a/Doc/c-api/allocation.rst +++ b/Doc/c-api/allocation.rst @@ -15,10 +15,8 @@ Allocating Objects on the Heap .. c:function:: PyObject* PyObject_Init(PyObject *op, PyTypeObject *type) Initialize a newly allocated object *op* with its type and initial - reference. Returns the initialized object. If *type* indicates that the - object participates in the cyclic garbage detector, it is added to the - detector's set of observed objects. Other fields of the object are not - affected. + reference. Returns the initialized object. Other fields of the object are + not affected. .. c:function:: PyVarObject* PyObject_InitVar(PyVarObject *op, PyTypeObject *type, Py_ssize_t size) From 1371295e678f00a7c89dc5bb2ab61ede9adbc094 Mon Sep 17 00:00:00 2001 From: Peter Bierma Date: Tue, 5 Nov 2024 04:56:36 -0500 Subject: [PATCH 13/54] gh-126366: Fix crash if `__iter__` raises an exception during `yield from` (#126369) --- Lib/test/test_yield_from.py | 13 +++++++++++++ .../2024-11-03-15-15-36.gh-issue-126366.8BBdGU.rst | 2 ++ Python/bytecodes.c | 5 +++-- Python/executor_cases.c.h | 5 +++-- Python/generated_cases.c.h | 5 +++-- Tools/jit/ignore-tests-emulated-linux.txt | 1 + 6 files changed, 25 insertions(+), 6 deletions(-) create mode 100644 Misc/NEWS.d/next/Core_and_Builtins/2024-11-03-15-15-36.gh-issue-126366.8BBdGU.rst diff --git a/Lib/test/test_yield_from.py b/Lib/test/test_yield_from.py index 1a60357a1bcd629..b90e15e20027dcd 100644 --- a/Lib/test/test_yield_from.py +++ b/Lib/test/test_yield_from.py @@ -1576,6 +1576,19 @@ def outer(): self.assertIsNone(caught.exception.__context__) self.assert_stop_iteration(g) + def test_throws_in_iter(self): + # See GH-126366: NULL pointer dereference if __iter__ + # threw an exception. + class Silly: + def __iter__(self): + raise RuntimeError("nobody expects the spanish inquisition") + + def my_generator(): + yield from Silly() + + with self.assertRaisesRegex(RuntimeError, "nobody expects the spanish inquisition"): + next(iter(my_generator())) + if __name__ == '__main__': unittest.main() diff --git a/Misc/NEWS.d/next/Core_and_Builtins/2024-11-03-15-15-36.gh-issue-126366.8BBdGU.rst b/Misc/NEWS.d/next/Core_and_Builtins/2024-11-03-15-15-36.gh-issue-126366.8BBdGU.rst new file mode 100644 index 000000000000000..a47233602e4efff --- /dev/null +++ b/Misc/NEWS.d/next/Core_and_Builtins/2024-11-03-15-15-36.gh-issue-126366.8BBdGU.rst @@ -0,0 +1,2 @@ +Fix crash when using ``yield from`` on an object that raises an exception in +its ``__iter__``. diff --git a/Python/bytecodes.c b/Python/bytecodes.c index 81b527e8c050b95..8c52db6ab684369 100644 --- a/Python/bytecodes.c +++ b/Python/bytecodes.c @@ -2811,11 +2811,12 @@ dummy_func( } else { /* `iterable` is not a generator. */ - iter = PyStackRef_FromPyObjectSteal(PyObject_GetIter(iterable_o)); + PyObject *iter_o = PyObject_GetIter(iterable_o); DEAD(iterable); - if (PyStackRef_IsNull(iter)) { + if (iter_o == NULL) { ERROR_NO_POP(); } + iter = PyStackRef_FromPyObjectSteal(iter_o); DECREF_INPUTS(); } } diff --git a/Python/executor_cases.c.h b/Python/executor_cases.c.h index 9fac4e881b81e25..1d63402214db5d2 100644 --- a/Python/executor_cases.c.h +++ b/Python/executor_cases.c.h @@ -3437,11 +3437,12 @@ else { /* `iterable` is not a generator. */ _PyFrame_SetStackPointer(frame, stack_pointer); - iter = PyStackRef_FromPyObjectSteal(PyObject_GetIter(iterable_o)); + PyObject *iter_o = PyObject_GetIter(iterable_o); stack_pointer = _PyFrame_GetStackPointer(frame); - if (PyStackRef_IsNull(iter)) { + if (iter_o == NULL) { JUMP_TO_ERROR(); } + iter = PyStackRef_FromPyObjectSteal(iter_o); PyStackRef_CLOSE(iterable); } } diff --git a/Python/generated_cases.c.h b/Python/generated_cases.c.h index c6b8fbc50f388af..d346875ea4455f0 100644 --- a/Python/generated_cases.c.h +++ b/Python/generated_cases.c.h @@ -4304,11 +4304,12 @@ else { /* `iterable` is not a generator. */ _PyFrame_SetStackPointer(frame, stack_pointer); - iter = PyStackRef_FromPyObjectSteal(PyObject_GetIter(iterable_o)); + PyObject *iter_o = PyObject_GetIter(iterable_o); stack_pointer = _PyFrame_GetStackPointer(frame); - if (PyStackRef_IsNull(iter)) { + if (iter_o == NULL) { goto error; } + iter = PyStackRef_FromPyObjectSteal(iter_o); PyStackRef_CLOSE(iterable); } } diff --git a/Tools/jit/ignore-tests-emulated-linux.txt b/Tools/jit/ignore-tests-emulated-linux.txt index e379e39def0eaf4..080a569574470cb 100644 --- a/Tools/jit/ignore-tests-emulated-linux.txt +++ b/Tools/jit/ignore-tests-emulated-linux.txt @@ -71,6 +71,7 @@ test.test_socket.RecvmsgSCMRightsStreamTest.testCmsgTruncLen1 test.test_socket.RecvmsgSCMRightsStreamTest.testCmsgTruncLen2Minus1 test.test_subprocess.POSIXProcessTestCase.test_exception_bad_args_0 test.test_subprocess.POSIXProcessTestCase.test_exception_bad_executable +test.test_subprocess.POSIXProcessTestCase.test_vfork_used_when_expected test.test_subprocess.ProcessTestCase.test_cwd_with_relative_arg test.test_subprocess.ProcessTestCase.test_cwd_with_relative_executable test.test_subprocess.ProcessTestCase.test_empty_env From 78842e4a98994a218a93992a2a1e3ca3eaa28e79 Mon Sep 17 00:00:00 2001 From: Stephen Morton Date: Tue, 5 Nov 2024 02:05:45 -0800 Subject: [PATCH 14/54] gh-126417: Register multiprocessing proxy types to an appropriate collections.abc class (#126419) --- Lib/multiprocessing/managers.py | 6 +++++- Lib/test/_test_multiprocessing.py | 9 +++++++++ Misc/ACKS | 1 + .../2024-11-04-16-40-02.gh-issue-126417.OWPqn0.rst | 3 +++ 4 files changed, 18 insertions(+), 1 deletion(-) create mode 100644 Misc/NEWS.d/next/Library/2024-11-04-16-40-02.gh-issue-126417.OWPqn0.rst diff --git a/Lib/multiprocessing/managers.py b/Lib/multiprocessing/managers.py index 0f5f9f64c2de9e0..a5d2f53613952e1 100644 --- a/Lib/multiprocessing/managers.py +++ b/Lib/multiprocessing/managers.py @@ -18,6 +18,7 @@ import threading import signal import array +import collections.abc import queue import time import types @@ -1167,8 +1168,9 @@ def __imul__(self, value): __class_getitem__ = classmethod(types.GenericAlias) +collections.abc.MutableSequence.register(BaseListProxy) -_BaseDictProxy = MakeProxyType('DictProxy', ( +_BaseDictProxy = MakeProxyType('_BaseDictProxy', ( '__contains__', '__delitem__', '__getitem__', '__ior__', '__iter__', '__len__', '__or__', '__reversed__', '__ror__', '__setitem__', 'clear', 'copy', 'fromkeys', 'get', 'items', @@ -1184,6 +1186,8 @@ def __ior__(self, value): __class_getitem__ = classmethod(types.GenericAlias) +collections.abc.MutableMapping.register(_BaseDictProxy) + ArrayProxy = MakeProxyType('ArrayProxy', ( '__len__', '__getitem__', '__setitem__' )) diff --git a/Lib/test/_test_multiprocessing.py b/Lib/test/_test_multiprocessing.py index 065fc27b7704383..77b618c684475a5 100644 --- a/Lib/test/_test_multiprocessing.py +++ b/Lib/test/_test_multiprocessing.py @@ -16,6 +16,7 @@ import functools import signal import array +import collections.abc import socket import random import logging @@ -2331,6 +2332,10 @@ def test_list(self): a.append('hello') self.assertEqual(f[0][:], [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 'hello']) + def test_list_isinstance(self): + a = self.list() + self.assertIsInstance(a, collections.abc.MutableSequence) + def test_list_iter(self): a = self.list(list(range(10))) it = iter(a) @@ -2371,6 +2376,10 @@ def test_dict(self): self.assertEqual(sorted(d.values()), [chr(i) for i in indices]) self.assertEqual(sorted(d.items()), [(i, chr(i)) for i in indices]) + def test_dict_isinstance(self): + a = self.dict() + self.assertIsInstance(a, collections.abc.MutableMapping) + def test_dict_iter(self): d = self.dict() indices = list(range(65, 70)) diff --git a/Misc/ACKS b/Misc/ACKS index 5e36eda554af0f8..d03c70f6db87bfe 100644 --- a/Misc/ACKS +++ b/Misc/ACKS @@ -1273,6 +1273,7 @@ Emily Morehouse Derek Morr James A Morrison Martin Morrison +Stephen Morton Derek McTavish Mounce Alessandro Moura Pablo Mouzo diff --git a/Misc/NEWS.d/next/Library/2024-11-04-16-40-02.gh-issue-126417.OWPqn0.rst b/Misc/NEWS.d/next/Library/2024-11-04-16-40-02.gh-issue-126417.OWPqn0.rst new file mode 100644 index 000000000000000..c4a366343382f3b --- /dev/null +++ b/Misc/NEWS.d/next/Library/2024-11-04-16-40-02.gh-issue-126417.OWPqn0.rst @@ -0,0 +1,3 @@ +Register the :class:`!multiprocessing.managers.DictProxy` and :class:`!multiprocessing.managers.ListProxy` types in +:mod:`multiprocessing.managers` to :class:`collections.abc.MutableMapping` and +:class:`collections.abc.MutableSequence`, respectively. From 75872605aa78dbdfc5c4f025b0f90a7f37ba10c3 Mon Sep 17 00:00:00 2001 From: sobolevn Date: Tue, 5 Nov 2024 15:23:24 +0300 Subject: [PATCH 15/54] gh-126425: Refactor `_lsprof_Profiler_enable` (#126426) - Explicit memory management for `None` objects (since we still try to treat immortal objects as regular objects) - Respect possible errors of `sys.monitoring.register_callback` call --- Modules/_lsprof.c | 37 +++++++++++++++++++++++++------------ 1 file changed, 25 insertions(+), 12 deletions(-) diff --git a/Modules/_lsprof.c b/Modules/_lsprof.c index 4f996c7230e16d5..51ad9fc7da84924 100644 --- a/Modules/_lsprof.c +++ b/Modules/_lsprof.c @@ -780,34 +780,47 @@ _lsprof_Profiler_enable_impl(ProfilerObject *self, int subcalls, return NULL; } - if (PyObject_CallMethod(monitoring, "use_tool_id", "is", self->tool_id, "cProfile") == NULL) { + PyObject *check = PyObject_CallMethod(monitoring, + "use_tool_id", "is", + self->tool_id, "cProfile"); + if (check == NULL) { PyErr_Format(PyExc_ValueError, "Another profiling tool is already active"); - Py_DECREF(monitoring); - return NULL; + goto error; } + Py_DECREF(check); for (int i = 0; callback_table[i].callback_method; i++) { + int event = (1 << callback_table[i].event); PyObject* callback = PyObject_GetAttrString((PyObject*)self, callback_table[i].callback_method); if (!callback) { - Py_DECREF(monitoring); - return NULL; + goto error; } - Py_XDECREF(PyObject_CallMethod(monitoring, "register_callback", "iiO", self->tool_id, - (1 << callback_table[i].event), - callback)); + PyObject *register_result = PyObject_CallMethod(monitoring, "register_callback", + "iiO", self->tool_id, + event, callback); Py_DECREF(callback); - all_events |= (1 << callback_table[i].event); + if (register_result == NULL) { + goto error; + } + Py_DECREF(register_result); + all_events |= event; } - if (!PyObject_CallMethod(monitoring, "set_events", "ii", self->tool_id, all_events)) { - Py_DECREF(monitoring); - return NULL; + PyObject *event_result = PyObject_CallMethod(monitoring, "set_events", "ii", + self->tool_id, all_events); + if (event_result == NULL) { + goto error; } + Py_DECREF(event_result); Py_DECREF(monitoring); self->flags |= POF_ENABLED; Py_RETURN_NONE; + +error: + Py_DECREF(monitoring); + return NULL; } static void From bbfd9c92fa3e3d77a86c7858617eb3d09de44fd1 Mon Sep 17 00:00:00 2001 From: "T. Wouters" Date: Tue, 5 Nov 2024 15:49:27 +0100 Subject: [PATCH 16/54] gh-115999: Fix gdb support for libpython.so after thread-local bytecode change (#126440) Fix the gdb pretty printer in the face of --enable-shared by delaying the attempt to load the _PyInterpreterFrame definition until after .so files are loaded. --- Tools/gdb/libpython.py | 15 +++++++++------ 1 file changed, 9 insertions(+), 6 deletions(-) diff --git a/Tools/gdb/libpython.py b/Tools/gdb/libpython.py index ed254152d7da41f..698ecbd3b549aa5 100755 --- a/Tools/gdb/libpython.py +++ b/Tools/gdb/libpython.py @@ -77,10 +77,14 @@ def _managed_dict_offset(): else: return -3 * _sizeof_void_p() -def _interp_frame_has_tlbc_index(): - interp_frame = gdb.lookup_type("_PyInterpreterFrame") - return any(field.name == "tlbc_index" for field in interp_frame.fields()) - +_INTERP_FRAME_HAS_TLBC_INDEX = None +def interp_frame_has_tlbc_index(): + global _INTERP_FRAME_HAS_TLBC_INDEX + if _INTERP_FRAME_HAS_TLBC_INDEX is None: + interp_frame = gdb.lookup_type("_PyInterpreterFrame") + _INTERP_FRAME_HAS_TLBC_INDEX = any(field.name == "tlbc_index" + for field in interp_frame.fields()) + return _INTERP_FRAME_HAS_TLBC_INDEX Py_TPFLAGS_INLINE_VALUES = (1 << 2) Py_TPFLAGS_MANAGED_DICT = (1 << 4) @@ -109,7 +113,6 @@ def _interp_frame_has_tlbc_index(): UNABLE_READ_INFO_PYTHON_FRAME = 'Unable to read information on python frame' EVALFRAME = '_PyEval_EvalFrameDefault' -INTERP_FRAME_HAS_TLBC_INDEX = _interp_frame_has_tlbc_index() class NullPyObjectPtr(RuntimeError): pass @@ -1101,7 +1104,7 @@ def _f_nlocalsplus(self): def _f_lasti(self): codeunit_p = gdb.lookup_type("_Py_CODEUNIT").pointer() instr_ptr = self._gdbval["instr_ptr"] - if INTERP_FRAME_HAS_TLBC_INDEX: + if interp_frame_has_tlbc_index(): tlbc_index = self._gdbval["tlbc_index"] code_arr = PyCodeArrayPtr(self._f_code().field("co_tlbc")) first_instr = code_arr.get_entry(tlbc_index).cast(codeunit_p) From 0b67ce930a56c4ffd597b1a658ddcbacfb40e798 Mon Sep 17 00:00:00 2001 From: Victor Stinner Date: Tue, 5 Nov 2024 16:05:13 +0100 Subject: [PATCH 17/54] gh-126433: Fix compiler warnings on 32-bit Windows (#126444) --- Modules/_interpchannelsmodule.c | 2 +- Modules/_ssl.c | 6 ++++-- Modules/_winapi.c | 6 +++--- Modules/blake2module.c | 6 +++--- PC/venvlauncher.c | 2 +- 5 files changed, 12 insertions(+), 10 deletions(-) diff --git a/Modules/_interpchannelsmodule.c b/Modules/_interpchannelsmodule.c index 8e6b21db76e01c8..68ee429a9e1dfe5 100644 --- a/Modules/_interpchannelsmodule.c +++ b/Modules/_interpchannelsmodule.c @@ -2061,7 +2061,7 @@ _channel_get_info(_channels *channels, int64_t cid, struct channel_info *info) if (interp == NULL) { return -1; } - Py_ssize_t interpid = PyInterpreterState_GetID(interp); + int64_t interpid = PyInterpreterState_GetID(interp); // Hold the global lock until we're done. PyThread_acquire_lock(channels->mutex, WAIT_LOCK); diff --git a/Modules/_ssl.c b/Modules/_ssl.c index 54bac28e5beccf1..5223e21b5cdb11f 100644 --- a/Modules/_ssl.c +++ b/Modules/_ssl.c @@ -4923,7 +4923,9 @@ static unsigned int psk_client_callback(SSL *s, goto error; } - if (identity_len_ + 1 > max_identity_len || psk_len_ > max_psk_len) { + if ((size_t)identity_len_ + 1 > max_identity_len + || (size_t)psk_len_ > max_psk_len) + { Py_DECREF(result); goto error; } @@ -5036,7 +5038,7 @@ static unsigned int psk_server_callback(SSL *s, goto error; } - if (psk_len_ > max_psk_len) { + if ((size_t)psk_len_ > max_psk_len) { Py_DECREF(result); goto error; } diff --git a/Modules/_winapi.c b/Modules/_winapi.c index a330b3ff68db622..4ce689fe30e6df5 100644 --- a/Modules/_winapi.c +++ b/Modules/_winapi.c @@ -2317,7 +2317,7 @@ _winapi_BatchedWaitForMultipleObjects_impl(PyObject *module, BOOL wait_all, DWORD milliseconds) /*[clinic end generated code: output=d21c1a4ad0a252fd input=7e196f29005dc77b]*/ { - Py_ssize_t thread_count = 0, handle_count = 0, i, j; + Py_ssize_t thread_count = 0, handle_count = 0, i; Py_ssize_t nhandles; BatchedWaitData *thread_data[MAXIMUM_WAIT_OBJECTS]; HANDLE handles[MAXIMUM_WAIT_OBJECTS]; @@ -2378,7 +2378,7 @@ _winapi_BatchedWaitForMultipleObjects_impl(PyObject *module, if (data->handle_count > MAXIMUM_WAIT_OBJECTS - 1) { data->handle_count = MAXIMUM_WAIT_OBJECTS - 1; } - for (j = 0; j < data->handle_count; ++i, ++j) { + for (DWORD j = 0; j < data->handle_count; ++i, ++j) { PyObject *v = PySequence_GetItem(handle_seq, i); if (!v || !PyArg_Parse(v, F_HANDLE, &data->handles[j])) { Py_XDECREF(v); @@ -2526,7 +2526,7 @@ _winapi_BatchedWaitForMultipleObjects_impl(PyObject *module, if (triggered_indices) { for (i = 0; i < thread_count; ++i) { Py_ssize_t triggered = (Py_ssize_t)thread_data[i]->result - WAIT_OBJECT_0; - if (triggered >= 0 && triggered < thread_data[i]->handle_count - 1) { + if (triggered >= 0 && (size_t)triggered < thread_data[i]->handle_count - 1) { PyObject *v = PyLong_FromSsize_t(thread_data[i]->handle_base + triggered); if (!v || PyList_Append(triggered_indices, v) < 0) { Py_XDECREF(v); diff --git a/Modules/blake2module.c b/Modules/blake2module.c index 1ec676c34c6128b..94cdfe7fd2e962d 100644 --- a/Modules/blake2module.c +++ b/Modules/blake2module.c @@ -474,7 +474,7 @@ py_blake2b_or_s_new(PyTypeObject *type, PyObject *data, int digest_size, /* Validate salt parameter. */ if ((salt->obj != NULL) && salt->len) { - if (salt->len > (is_blake2b(self->impl) ? HACL_HASH_BLAKE2B_SALT_BYTES : HACL_HASH_BLAKE2S_SALT_BYTES)) { + if ((size_t)salt->len > (is_blake2b(self->impl) ? HACL_HASH_BLAKE2B_SALT_BYTES : HACL_HASH_BLAKE2S_SALT_BYTES)) { PyErr_Format(PyExc_ValueError, "maximum salt length is %d bytes", (is_blake2b(self->impl) ? HACL_HASH_BLAKE2B_SALT_BYTES : HACL_HASH_BLAKE2S_SALT_BYTES)); @@ -485,7 +485,7 @@ py_blake2b_or_s_new(PyTypeObject *type, PyObject *data, int digest_size, /* Validate personalization parameter. */ if ((person->obj != NULL) && person->len) { - if (person->len > (is_blake2b(self->impl) ? HACL_HASH_BLAKE2B_PERSONAL_BYTES : HACL_HASH_BLAKE2S_PERSONAL_BYTES)) { + if ((size_t)person->len > (is_blake2b(self->impl) ? HACL_HASH_BLAKE2B_PERSONAL_BYTES : HACL_HASH_BLAKE2S_PERSONAL_BYTES)) { PyErr_Format(PyExc_ValueError, "maximum person length is %d bytes", (is_blake2b(self->impl) ? HACL_HASH_BLAKE2B_PERSONAL_BYTES : HACL_HASH_BLAKE2S_PERSONAL_BYTES)); @@ -534,7 +534,7 @@ py_blake2b_or_s_new(PyTypeObject *type, PyObject *data, int digest_size, /* Set key length. */ if ((key->obj != NULL) && key->len) { - if (key->len > (is_blake2b(self->impl) ? HACL_HASH_BLAKE2B_KEY_BYTES : HACL_HASH_BLAKE2S_KEY_BYTES)) { + if ((size_t)key->len > (is_blake2b(self->impl) ? HACL_HASH_BLAKE2B_KEY_BYTES : HACL_HASH_BLAKE2S_KEY_BYTES)) { PyErr_Format(PyExc_ValueError, "maximum key length is %d bytes", (is_blake2b(self->impl) ? HACL_HASH_BLAKE2B_KEY_BYTES : HACL_HASH_BLAKE2S_KEY_BYTES)); diff --git a/PC/venvlauncher.c b/PC/venvlauncher.c index b1c8d0763d8c76a..b6bb0218236ae9d 100644 --- a/PC/venvlauncher.c +++ b/PC/venvlauncher.c @@ -223,7 +223,7 @@ find_home_value(const char *buffer, DWORD maxlen, const char **start, DWORD *len return 0; } for (const char *s = strstr(buffer, "home"); - s && ((ptrdiff_t)s - (ptrdiff_t)buffer) < maxlen; + s && (size_t)((ptrdiff_t)s - (ptrdiff_t)buffer) < maxlen; s = strstr(s + 1, "\nhome") ) { if (*s == '\n') { From 8525c9375f25e6ec0c0b5dfcab464703f6e78082 Mon Sep 17 00:00:00 2001 From: Valery Fedorenko Date: Tue, 5 Nov 2024 20:34:33 +0300 Subject: [PATCH 18/54] gh-126238: Fix possible null pointer dereference of freevars in _PyCompile_LookupArg (#126239) * Replace Py_DECREF by Py_XDECREF Co-authored-by: blurb-it[bot] <43283697+blurb-it[bot]@users.noreply.github.com> Co-authored-by: Peter Bierma --- Python/compile.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Python/compile.c b/Python/compile.c index 4dcb9a1b5acdb3d..ecca9b0b06ecf73 100644 --- a/Python/compile.c +++ b/Python/compile.c @@ -901,7 +901,7 @@ _PyCompile_LookupArg(compiler *c, PyCodeObject *co, PyObject *name) c->u->u_metadata.u_name, co->co_name, freevars); - Py_DECREF(freevars); + Py_XDECREF(freevars); return ERROR; } return arg; From ff8349979c2ca4e442afc583e1217519611c6c48 Mon Sep 17 00:00:00 2001 From: Barney Gale Date: Tue, 5 Nov 2024 18:43:43 +0000 Subject: [PATCH 19/54] GH-124985: Document that `pathlib.Path.copy()` uses copy-on-write. (#125861) --- Doc/library/pathlib.rst | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/Doc/library/pathlib.rst b/Doc/library/pathlib.rst index b6fb36554f7cec7..a42ac1f8bcdf717 100644 --- a/Doc/library/pathlib.rst +++ b/Doc/library/pathlib.rst @@ -1592,6 +1592,11 @@ Copying, moving and deleting This argument has no effect when copying files on Windows (where metadata is always preserved). + .. note:: + Where supported by the operating system and file system, this method + performs a lightweight copy, where data blocks are only copied when + modified. This is known as copy-on-write. + .. versionadded:: 3.14 From f51fd84034e2cbf458321c25ba6fd085a39d6f6f Mon Sep 17 00:00:00 2001 From: Steve Dower Date: Tue, 5 Nov 2024 20:43:52 +0000 Subject: [PATCH 20/54] gh-126074: Removes unnecessary DLLs from embeddable package (GH-126143) --- .../2024-10-29-20-09-52.gh-issue-126074.83ZzZs.rst | 1 + PC/layout/main.py | 8 +++++++- 2 files changed, 8 insertions(+), 1 deletion(-) create mode 100644 Misc/NEWS.d/next/Windows/2024-10-29-20-09-52.gh-issue-126074.83ZzZs.rst diff --git a/Misc/NEWS.d/next/Windows/2024-10-29-20-09-52.gh-issue-126074.83ZzZs.rst b/Misc/NEWS.d/next/Windows/2024-10-29-20-09-52.gh-issue-126074.83ZzZs.rst new file mode 100644 index 000000000000000..d4d06b090b59229 --- /dev/null +++ b/Misc/NEWS.d/next/Windows/2024-10-29-20-09-52.gh-issue-126074.83ZzZs.rst @@ -0,0 +1 @@ +Removed unnecessary DLLs from Windows embeddable package diff --git a/PC/layout/main.py b/PC/layout/main.py index 0350ed7af3f9b58..8bd435456c635a4 100644 --- a/PC/layout/main.py +++ b/PC/layout/main.py @@ -31,11 +31,13 @@ from .support.nuspec import * TEST_PYDS_ONLY = FileStemSet("xxlimited", "xxlimited_35", "_ctypes_test", "_test*") +TEST_DLLS_ONLY = set() TEST_DIRS_ONLY = FileNameSet("test", "tests") IDLE_DIRS_ONLY = FileNameSet("idlelib") -TCLTK_PYDS_ONLY = FileStemSet("tcl*", "tk*", "_tkinter", "zlib1") +TCLTK_PYDS_ONLY = FileStemSet("_tkinter") +TCLTK_DLLS_ONLY = FileStemSet("tcl*", "tk*", "zlib1") TCLTK_DIRS_ONLY = FileNameSet("tkinter", "turtledemo") TCLTK_FILES_ONLY = FileNameSet("turtle.py") @@ -226,6 +228,10 @@ def in_build(f, dest="", new_name=None, no_lib=False): continue if src in EXCLUDE_FROM_DLLS: continue + if src in TEST_DLLS_ONLY and not ns.include_tests: + continue + if src in TCLTK_DLLS_ONLY and not ns.include_tcltk: + continue yield from in_build(src.name, dest=dest, no_lib=True) if ns.zip_lib: From 5e9168492f12c579b2481f3f3e0ae11f9d986857 Mon Sep 17 00:00:00 2001 From: Barney Gale Date: Tue, 5 Nov 2024 21:19:36 +0000 Subject: [PATCH 21/54] pathlib ABCs: defer path joining (#126409) Defer joining of path segments in the private `PurePathBase` ABC. The new behaviour matches how the public `PurePath` class handles path segments. This removes a hard-to-grok difference between the ABCs and the main classes. It also slightly reduces the size of `PurePath` objects by eliminating a `_raw_path` slot. --- Lib/pathlib/_abc.py | 59 ++++++++++++++--------- Lib/pathlib/_local.py | 25 +++------- Lib/test/test_pathlib/test_pathlib_abc.py | 5 -- 3 files changed, 43 insertions(+), 46 deletions(-) diff --git a/Lib/pathlib/_abc.py b/Lib/pathlib/_abc.py index f5eed6f025c2505..43e6624934b045b 100644 --- a/Lib/pathlib/_abc.py +++ b/Lib/pathlib/_abc.py @@ -99,7 +99,7 @@ class PathGlobber(_GlobberBase): @staticmethod def concat_path(path, text): """Appends text to the given path.""" - return path.with_segments(path._raw_path + text) + return path.with_segments(str(path) + text) class PurePathBase: @@ -112,9 +112,9 @@ class PurePathBase: """ __slots__ = ( - # The `_raw_path` slot store a joined string path. This is set in the - # `__init__()` method. - '_raw_path', + # The `_raw_paths` slot stores unjoined string paths. This is set in + # the `__init__()` method. + '_raw_paths', # The '_resolving' slot stores a boolean indicating whether the path # is being processed by `PathBase.resolve()`. This prevents duplicate @@ -124,11 +124,14 @@ class PurePathBase: parser = ParserBase() _globber = PathGlobber - def __init__(self, path, *paths): - self._raw_path = self.parser.join(path, *paths) if paths else path - if not isinstance(self._raw_path, str): - raise TypeError( - f"path should be a str, not {type(self._raw_path).__name__!r}") + def __init__(self, arg, *args): + paths = [arg] + paths.extend(args) + for path in paths: + if not isinstance(path, str): + raise TypeError( + f"path should be a str, not {type(path).__name__!r}") + self._raw_paths = paths self._resolving = False def with_segments(self, *pathsegments): @@ -141,7 +144,19 @@ def with_segments(self, *pathsegments): def __str__(self): """Return the string representation of the path, suitable for passing to system calls.""" - return self._raw_path + paths = self._raw_paths + if len(paths) == 1: + return paths[0] + elif paths: + # Join path segments from the initializer. + path = self.parser.join(*paths) + # Cache the joined path. + paths.clear() + paths.append(path) + return path + else: + paths.append('') + return '' def as_posix(self): """Return the string representation of the path with forward (/) @@ -166,7 +181,7 @@ def anchor(self): @property def name(self): """The final path component, if any.""" - return self.parser.split(self._raw_path)[1] + return self.parser.split(str(self))[1] @property def suffix(self): @@ -202,7 +217,7 @@ def with_name(self, name): split = self.parser.split if split(name)[0]: raise ValueError(f"Invalid name {name!r}") - return self.with_segments(split(self._raw_path)[0], name) + return self.with_segments(split(str(self))[0], name) def with_stem(self, stem): """Return a new path with the stem changed.""" @@ -242,7 +257,7 @@ def relative_to(self, other, *, walk_up=False): anchor0, parts0 = self._stack anchor1, parts1 = other._stack if anchor0 != anchor1: - raise ValueError(f"{self._raw_path!r} and {other._raw_path!r} have different anchors") + raise ValueError(f"{str(self)!r} and {str(other)!r} have different anchors") while parts0 and parts1 and parts0[-1] == parts1[-1]: parts0.pop() parts1.pop() @@ -250,9 +265,9 @@ def relative_to(self, other, *, walk_up=False): if not part or part == '.': pass elif not walk_up: - raise ValueError(f"{self._raw_path!r} is not in the subpath of {other._raw_path!r}") + raise ValueError(f"{str(self)!r} is not in the subpath of {str(other)!r}") elif part == '..': - raise ValueError(f"'..' segment in {other._raw_path!r} cannot be walked") + raise ValueError(f"'..' segment in {str(other)!r} cannot be walked") else: parts0.append('..') return self.with_segments('', *reversed(parts0)) @@ -289,17 +304,17 @@ def joinpath(self, *pathsegments): paths) or a totally different path (if one of the arguments is anchored). """ - return self.with_segments(self._raw_path, *pathsegments) + return self.with_segments(*self._raw_paths, *pathsegments) def __truediv__(self, key): try: - return self.with_segments(self._raw_path, key) + return self.with_segments(*self._raw_paths, key) except TypeError: return NotImplemented def __rtruediv__(self, key): try: - return self.with_segments(key, self._raw_path) + return self.with_segments(key, *self._raw_paths) except TypeError: return NotImplemented @@ -311,7 +326,7 @@ def _stack(self): *parts* is a reversed list of parts following the anchor. """ split = self.parser.split - path = self._raw_path + path = str(self) parent, name = split(path) names = [] while path != parent: @@ -323,7 +338,7 @@ def _stack(self): @property def parent(self): """The logical parent of the path.""" - path = self._raw_path + path = str(self) parent = self.parser.split(path)[0] if path != parent: parent = self.with_segments(parent) @@ -335,7 +350,7 @@ def parent(self): def parents(self): """A sequence of this path's logical parents.""" split = self.parser.split - path = self._raw_path + path = str(self) parent = split(path)[0] parents = [] while path != parent: @@ -347,7 +362,7 @@ def parents(self): def is_absolute(self): """True if the path is absolute (has both a root and, if applicable, a drive).""" - return self.parser.isabs(self._raw_path) + return self.parser.isabs(str(self)) @property def _pattern_str(self): diff --git a/Lib/pathlib/_local.py b/Lib/pathlib/_local.py index 99474e1f71a3073..b27f456d3752252 100644 --- a/Lib/pathlib/_local.py +++ b/Lib/pathlib/_local.py @@ -68,10 +68,6 @@ class PurePath(PurePathBase): """ __slots__ = ( - # The `_raw_paths` slot stores unnormalized string paths. This is set - # in the `__init__()` method. - '_raw_paths', - # The `_drv`, `_root` and `_tail_cached` slots store parsed and # normalized parts of the path. They are set when any of the `drive`, # `root` or `_tail` properties are accessed for the first time. The @@ -299,25 +295,14 @@ def _parse_pattern(cls, pattern): parts.append('') return parts - @property - def _raw_path(self): - """The joined but unnormalized path.""" - paths = self._raw_paths - if len(paths) == 0: - path = '' - elif len(paths) == 1: - path = paths[0] - else: - path = self.parser.join(*paths) - return path - @property def drive(self): """The drive prefix (letter or UNC path), if any.""" try: return self._drv except AttributeError: - self._drv, self._root, self._tail_cached = self._parse_path(self._raw_path) + raw_path = PurePathBase.__str__(self) + self._drv, self._root, self._tail_cached = self._parse_path(raw_path) return self._drv @property @@ -326,7 +311,8 @@ def root(self): try: return self._root except AttributeError: - self._drv, self._root, self._tail_cached = self._parse_path(self._raw_path) + raw_path = PurePathBase.__str__(self) + self._drv, self._root, self._tail_cached = self._parse_path(raw_path) return self._root @property @@ -334,7 +320,8 @@ def _tail(self): try: return self._tail_cached except AttributeError: - self._drv, self._root, self._tail_cached = self._parse_path(self._raw_path) + raw_path = PurePathBase.__str__(self) + self._drv, self._root, self._tail_cached = self._parse_path(raw_path) return self._tail_cached @property diff --git a/Lib/test/test_pathlib/test_pathlib_abc.py b/Lib/test/test_pathlib/test_pathlib_abc.py index 4ab804850e9c3e4..d155e7c5bb99350 100644 --- a/Lib/test/test_pathlib/test_pathlib_abc.py +++ b/Lib/test/test_pathlib/test_pathlib_abc.py @@ -86,11 +86,6 @@ def test_unsupported_operation_pure(self): p.suffix with self.assertRaises(e): p.suffixes - with self.assertRaises(e): - p / 'bar' - with self.assertRaises(e): - 'bar' / p - self.assertRaises(e, p.joinpath, 'bar') self.assertRaises(e, p.with_name, 'bar') self.assertRaises(e, p.with_stem, 'bar') self.assertRaises(e, p.with_suffix, '.txt') From 478a1c09c4cdb082c84f9102e3e452e6911b84f1 Mon Sep 17 00:00:00 2001 From: Damien <81557462+Damien-Chen@users.noreply.github.com> Date: Wed, 6 Nov 2024 07:10:12 +0800 Subject: [PATCH 22/54] gh-122544: Change OS image in Azure pipeline to Ubuntu 24.04 (#125344) --- .azure-pipelines/ci.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.azure-pipelines/ci.yml b/.azure-pipelines/ci.yml index d3e842d9f31d01b..7490dd947e1504e 100644 --- a/.azure-pipelines/ci.yml +++ b/.azure-pipelines/ci.yml @@ -5,7 +5,7 @@ jobs: displayName: Pre-build checks pool: - vmImage: ubuntu-22.04 + vmImage: ubuntu-24.04 steps: - template: ./prebuild-checks.yml From c3a12ae13ee0212a096f570064407f8ba954e6aa Mon Sep 17 00:00:00 2001 From: Savannah Ostrowski Date: Tue, 5 Nov 2024 15:26:46 -0800 Subject: [PATCH 23/54] GH-125911: Rename big trampoline to "shim" (GH-126339) --- Python/jit.c | 12 +++++------- Tools/jit/_targets.py | 4 ++-- Tools/jit/_writer.py | 4 ++-- Tools/jit/{trampoline.c => shim.c} | 0 4 files changed, 9 insertions(+), 11 deletions(-) rename Tools/jit/{trampoline.c => shim.c} (100%) diff --git a/Python/jit.c b/Python/jit.c index 135daeb1b1da808..90f693dfb7c41b9 100644 --- a/Python/jit.c +++ b/Python/jit.c @@ -470,7 +470,7 @@ _PyJIT_Compile(_PyExecutorObject *executor, const _PyUOpInstruction trace[], siz size_t code_size = 0; size_t data_size = 0; jit_state state = {0}; - group = &trampoline; + group = &shim; code_size += group->code_size; data_size += group->data_size; combine_symbol_mask(group->trampoline_mask, state.trampolines.mask); @@ -507,12 +507,10 @@ _PyJIT_Compile(_PyExecutorObject *executor, const _PyUOpInstruction trace[], siz unsigned char *code = memory; unsigned char *data = memory + code_size; state.trampolines.mem = memory + code_size + data_size; - // Compile the trampoline, which handles converting between the native + // Compile the shim, which handles converting between the native // calling convention and the calling convention used by jitted code - // (which may be different for efficiency reasons). On platforms where - // we don't change calling conventions, the trampoline is empty and - // nothing is emitted here: - group = &trampoline; + // (which may be different for efficiency reasons). + group = &shim; group->emit(code, data, executor, NULL, &state); code += group->code_size; data += group->data_size; @@ -536,7 +534,7 @@ _PyJIT_Compile(_PyExecutorObject *executor, const _PyUOpInstruction trace[], siz return -1; } executor->jit_code = memory; - executor->jit_side_entry = memory + trampoline.code_size; + executor->jit_side_entry = memory + shim.code_size; executor->jit_size = total_size; return 0; } diff --git a/Tools/jit/_targets.py b/Tools/jit/_targets.py index 634208da3c8157a..d8dce0a905c0f85 100644 --- a/Tools/jit/_targets.py +++ b/Tools/jit/_targets.py @@ -154,8 +154,8 @@ async def _build_stencils(self) -> dict[str, _stencils.StencilGroup]: with tempfile.TemporaryDirectory() as tempdir: work = pathlib.Path(tempdir).resolve() async with asyncio.TaskGroup() as group: - coro = self._compile("trampoline", TOOLS_JIT / "trampoline.c", work) - tasks.append(group.create_task(coro, name="trampoline")) + coro = self._compile("shim", TOOLS_JIT / "shim.c", work) + tasks.append(group.create_task(coro, name="shim")) template = TOOLS_JIT_TEMPLATE_C.read_text() for case, opname in cases_and_opnames: # Write out a copy of the template with *only* this case diff --git a/Tools/jit/_writer.py b/Tools/jit/_writer.py index f33d8ef322f0736..81a9f08db31703a 100644 --- a/Tools/jit/_writer.py +++ b/Tools/jit/_writer.py @@ -22,11 +22,11 @@ def _dump_footer( yield " symbol_mask trampoline_mask;" yield "} StencilGroup;" yield "" - yield f"static const StencilGroup trampoline = {groups['trampoline'].as_c('trampoline')};" + yield f"static const StencilGroup shim = {groups['shim'].as_c('shim')};" yield "" yield "static const StencilGroup stencil_groups[MAX_UOP_ID + 1] = {" for opname, group in sorted(groups.items()): - if opname == "trampoline": + if opname == "shim": continue yield f" [{opname}] = {group.as_c(opname)}," yield "};" diff --git a/Tools/jit/trampoline.c b/Tools/jit/shim.c similarity index 100% rename from Tools/jit/trampoline.c rename to Tools/jit/shim.c From fc233f46d3761b4e808be2c44fda0b843179004e Mon Sep 17 00:00:00 2001 From: Damien <81557462+Damien-Chen@users.noreply.github.com> Date: Wed, 6 Nov 2024 07:29:21 +0800 Subject: [PATCH 24/54] gh-122544: Change OS image in GitHub Actions to Ubuntu 24.04 (#122566) --- .github/workflows/build.yml | 8 ++++---- .github/workflows/posix-deps-apt.sh | 1 - .github/workflows/reusable-tsan.yml | 2 +- .github/workflows/reusable-ubuntu.yml | 2 +- .github/workflows/reusable-wasi.yml | 2 +- 5 files changed, 7 insertions(+), 8 deletions(-) diff --git a/.github/workflows/build.yml b/.github/workflows/build.yml index 083b07156674df0..f63c4606220494f 100644 --- a/.github/workflows/build.yml +++ b/.github/workflows/build.yml @@ -88,7 +88,7 @@ jobs: name: 'Check if generated files are up to date' # Don't use ubuntu-latest but a specific version to make the job # reproducible: to get the same tools versions (autoconf, aclocal, ...) - runs-on: ubuntu-22.04 + runs-on: ubuntu-24.04 timeout-minutes: 60 needs: check_source if: needs.check_source.outputs.run_tests == 'true' @@ -237,7 +237,7 @@ jobs: strategy: fail-fast: false matrix: - os: [ubuntu-22.04] + os: [ubuntu-24.04] openssl_ver: [3.0.15, 3.1.7, 3.2.3, 3.3.2] env: OPENSSL_VER: ${{ matrix.openssl_ver }} @@ -297,7 +297,7 @@ jobs: test_hypothesis: name: "Hypothesis tests on Ubuntu" - runs-on: ubuntu-22.04 + runs-on: ubuntu-24.04 timeout-minutes: 60 needs: check_source if: needs.check_source.outputs.run_tests == 'true' && needs.check_source.outputs.run_hypothesis == 'true' @@ -417,7 +417,7 @@ jobs: if: needs.check_source.outputs.run_tests == 'true' strategy: matrix: - os: [ubuntu-22.04] + os: [ubuntu-24.04] env: OPENSSL_VER: 3.0.15 PYTHONSTRICTEXTENSIONBUILD: 1 diff --git a/.github/workflows/posix-deps-apt.sh b/.github/workflows/posix-deps-apt.sh index bfc5a0874281bd8..d5538cd9367ec60 100755 --- a/.github/workflows/posix-deps-apt.sh +++ b/.github/workflows/posix-deps-apt.sh @@ -13,7 +13,6 @@ apt-get -yq install \ libgdbm-dev \ libgdbm-compat-dev \ liblzma-dev \ - libmpdec-dev \ libncurses5-dev \ libreadline6-dev \ libsqlite3-dev \ diff --git a/.github/workflows/reusable-tsan.yml b/.github/workflows/reusable-tsan.yml index 65072efa8e90234..7a4d81f0bdcad18 100644 --- a/.github/workflows/reusable-tsan.yml +++ b/.github/workflows/reusable-tsan.yml @@ -21,7 +21,7 @@ on: jobs: build_tsan_reusable: name: 'Thread sanitizer' - runs-on: ubuntu-22.04 + runs-on: ubuntu-24.04 timeout-minutes: 60 steps: - uses: actions/checkout@v4 diff --git a/.github/workflows/reusable-ubuntu.yml b/.github/workflows/reusable-ubuntu.yml index f0ca6a9e7ed7930..ec39025504efd12 100644 --- a/.github/workflows/reusable-ubuntu.yml +++ b/.github/workflows/reusable-ubuntu.yml @@ -20,7 +20,7 @@ jobs: strategy: fail-fast: false matrix: - os: [ubuntu-22.04] + os: [ubuntu-24.04] env: FORCE_COLOR: 1 OPENSSL_VER: 3.0.15 diff --git a/.github/workflows/reusable-wasi.yml b/.github/workflows/reusable-wasi.yml index abc617a317cc0f1..85af793c342c518 100644 --- a/.github/workflows/reusable-wasi.yml +++ b/.github/workflows/reusable-wasi.yml @@ -11,7 +11,7 @@ jobs: build_wasi_reusable: name: 'build and test' timeout-minutes: 60 - runs-on: ubuntu-22.04 + runs-on: ubuntu-24.04 env: WASMTIME_VERSION: 22.0.0 WASI_SDK_VERSION: 24 From 83ba8c2bba834c0b92de669cac16fcda17485e0e Mon Sep 17 00:00:00 2001 From: blhsing Date: Wed, 6 Nov 2024 07:53:54 +0800 Subject: [PATCH 25/54] gh-70764: inspect.getclosurevars now identifies global variables with LOAD_GLOBAL (#120143) --- Lib/inspect.py | 14 +++++++++----- Lib/test/test_inspect/test_inspect.py | 13 +++++++++++++ .../2024-06-06-04-06-05.gh-issue-70764.6511hw.rst | 1 + 3 files changed, 23 insertions(+), 5 deletions(-) create mode 100644 Misc/NEWS.d/next/Library/2024-06-06-04-06-05.gh-issue-70764.6511hw.rst diff --git a/Lib/inspect.py b/Lib/inspect.py index 08718d82e915828..e3f74e9f047eafc 100644 --- a/Lib/inspect.py +++ b/Lib/inspect.py @@ -1507,11 +1507,15 @@ def getclosurevars(func): global_vars = {} builtin_vars = {} unbound_names = set() - for name in code.co_names: - if name in ("None", "True", "False"): - # Because these used to be builtins instead of keywords, they - # may still show up as name references. We ignore them. - continue + global_names = set() + for instruction in dis.get_instructions(code): + opname = instruction.opname + name = instruction.argval + if opname == "LOAD_ATTR": + unbound_names.add(name) + elif opname == "LOAD_GLOBAL": + global_names.add(name) + for name in global_names: try: global_vars[name] = global_ns[name] except KeyError: diff --git a/Lib/test/test_inspect/test_inspect.py b/Lib/test/test_inspect/test_inspect.py index a4430a868676e22..a92627a4d60f687 100644 --- a/Lib/test/test_inspect/test_inspect.py +++ b/Lib/test/test_inspect/test_inspect.py @@ -1960,6 +1960,19 @@ def g(local_ref): builtin_vars, unbound_names) self.assertEqual(inspect.getclosurevars(C().f(_arg)), expected) + def test_attribute_same_name_as_global_var(self): + class C: + _global_ref = object() + def f(): + print(C._global_ref, _global_ref) + nonlocal_vars = {"C": C} + global_vars = {"_global_ref": _global_ref} + builtin_vars = {"print": print} + unbound_names = {"_global_ref"} + expected = inspect.ClosureVars(nonlocal_vars, global_vars, + builtin_vars, unbound_names) + self.assertEqual(inspect.getclosurevars(f), expected) + def test_nonlocal_vars(self): # More complex tests of nonlocal resolution def _nonlocal_vars(f): diff --git a/Misc/NEWS.d/next/Library/2024-06-06-04-06-05.gh-issue-70764.6511hw.rst b/Misc/NEWS.d/next/Library/2024-06-06-04-06-05.gh-issue-70764.6511hw.rst new file mode 100644 index 000000000000000..4cfb66a6ccc6ee5 --- /dev/null +++ b/Misc/NEWS.d/next/Library/2024-06-06-04-06-05.gh-issue-70764.6511hw.rst @@ -0,0 +1 @@ +Fixed an issue where :func:`inspect.getclosurevars` would incorrectly classify an attribute name as a global variable when the name exists both as an attribute name and a global variable. From a204c63919ca7ce528d8e3ab4196a4aa1a2b6ac4 Mon Sep 17 00:00:00 2001 From: Brandt Bucher Date: Tue, 5 Nov 2024 19:09:04 -0800 Subject: [PATCH 26/54] GH-126464: Temporarily disable `aarch64-apple-darwin` JIT CI jobs (gh-126465) * Temporarily disable aarch64-apple-darwin JIT CI jobs * Also up here --- .github/workflows/jit.yml | 11 ++++++----- 1 file changed, 6 insertions(+), 5 deletions(-) diff --git a/.github/workflows/jit.yml b/.github/workflows/jit.yml index 48f05818a38f969..897c692118e9a4d 100644 --- a/.github/workflows/jit.yml +++ b/.github/workflows/jit.yml @@ -52,7 +52,7 @@ jobs: - x86_64-pc-windows-msvc/msvc - aarch64-pc-windows-msvc/msvc - x86_64-apple-darwin/clang - - aarch64-apple-darwin/clang + # - aarch64-apple-darwin/clang - x86_64-unknown-linux-gnu/gcc - x86_64-unknown-linux-gnu/clang - aarch64-unknown-linux-gnu/gcc @@ -79,10 +79,11 @@ jobs: architecture: x86_64 runner: macos-13 compiler: clang - - target: aarch64-apple-darwin/clang - architecture: aarch64 - runner: macos-14 - compiler: clang + # GH-126464: A recent change to either GHA or LLVM broke this job: + # - target: aarch64-apple-darwin/clang + # architecture: aarch64 + # runner: macos-14 + # compiler: clang - target: x86_64-unknown-linux-gnu/gcc architecture: x86_64 runner: ubuntu-22.04 From 4ea214ea982944b59ff543a5c6f4ec782a47588c Mon Sep 17 00:00:00 2001 From: Donghee Na Date: Wed, 6 Nov 2024 12:35:10 +0900 Subject: [PATCH 27/54] gh-115999: Add free-threaded specialization for CONTAINS_OP (gh-126450) - The specialization logic determines the appropriate specialization using only the operand's type, which is safe to read non-atomically (changing it requires stopping the world). We are guaranteed that the type will not change in between when it is checked and when we specialize the bytecode because the types involved are immutable (you cannot assign to `__class__` for exact instances of `dict`, `set`, or `frozenset`). The bytecode is mutated atomically using helpers. - The specialized instructions rely on the operand type not changing in between the `DEOPT_IF` checks and the calls to the appropriate type-specific helpers (e.g. `_PySet_Contains`). This is a correctness requirement in the default builds and there are no changes to the opcodes in the free-threaded builds that would invalidate this. --- Lib/test/test_dis.py | 21 +++++++++++++++++++++ Python/bytecodes.c | 2 +- Python/generated_cases.c.h | 2 +- Python/specialize.c | 10 ++++++---- 4 files changed, 29 insertions(+), 6 deletions(-) diff --git a/Lib/test/test_dis.py b/Lib/test/test_dis.py index a991c67fca46bea..337ee3bbb05136b 100644 --- a/Lib/test/test_dis.py +++ b/Lib/test/test_dis.py @@ -1335,6 +1335,27 @@ def test_call_specialize(self): got = self.get_disassembly(co, adaptive=True) self.do_disassembly_compare(got, call_quicken) + @cpython_only + @requires_specialization_ft + def test_contains_specialize(self): + contains_op_quicken = """\ + 0 RESUME_CHECK 0 + + 1 LOAD_NAME 0 (a) + LOAD_NAME 1 (b) + %s + RETURN_VALUE +""" + co_dict = compile('a in b', "", "eval") + self.code_quicken(lambda: exec(co_dict, {}, {'a': 1, 'b': {1: 5}})) + got = self.get_disassembly(co_dict, adaptive=True) + self.do_disassembly_compare(got, contains_op_quicken % "CONTAINS_OP_DICT 0 (in)") + + co_set = compile('a in b', "", "eval") + self.code_quicken(lambda: exec(co_set, {}, {'a': 1.0, 'b': {1, 2, 3}})) + got = self.get_disassembly(co_set, adaptive=True) + self.do_disassembly_compare(got, contains_op_quicken % "CONTAINS_OP_SET 0 (in)") + @cpython_only @requires_specialization def test_loop_quicken(self): diff --git a/Python/bytecodes.c b/Python/bytecodes.c index 8c52db6ab684369..7ae0f20369641a7 100644 --- a/Python/bytecodes.c +++ b/Python/bytecodes.c @@ -2508,7 +2508,7 @@ dummy_func( } specializing op(_SPECIALIZE_CONTAINS_OP, (counter/1, left, right -- left, right)) { - #if ENABLE_SPECIALIZATION + #if ENABLE_SPECIALIZATION_FT if (ADAPTIVE_COUNTER_TRIGGERS(counter)) { next_instr = this_instr; _Py_Specialize_ContainsOp(right, next_instr); diff --git a/Python/generated_cases.c.h b/Python/generated_cases.c.h index d346875ea4455f0..03b4d2224922f0e 100644 --- a/Python/generated_cases.c.h +++ b/Python/generated_cases.c.h @@ -3395,7 +3395,7 @@ right = stack_pointer[-1]; uint16_t counter = read_u16(&this_instr[1].cache); (void)counter; - #if ENABLE_SPECIALIZATION + #if ENABLE_SPECIALIZATION_FT if (ADAPTIVE_COUNTER_TRIGGERS(counter)) { next_instr = this_instr; _PyFrame_SetStackPointer(frame, stack_pointer); diff --git a/Python/specialize.c b/Python/specialize.c index 86cb997ca2ced3a..17e661b2bd3c769 100644 --- a/Python/specialize.c +++ b/Python/specialize.c @@ -2747,25 +2747,27 @@ _Py_Specialize_ContainsOp(_PyStackRef value_st, _Py_CODEUNIT *instr) { PyObject *value = PyStackRef_AsPyObjectBorrow(value_st); - assert(ENABLE_SPECIALIZATION); + assert(ENABLE_SPECIALIZATION_FT); assert(_PyOpcode_Caches[CONTAINS_OP] == INLINE_CACHE_ENTRIES_COMPARE_OP); + uint8_t specialized_op; _PyContainsOpCache *cache = (_PyContainsOpCache *)(instr + 1); if (PyDict_CheckExact(value)) { - instr->op.code = CONTAINS_OP_DICT; + specialized_op = CONTAINS_OP_DICT; goto success; } if (PySet_CheckExact(value) || PyFrozenSet_CheckExact(value)) { - instr->op.code = CONTAINS_OP_SET; + specialized_op = CONTAINS_OP_SET; goto success; } SPECIALIZATION_FAIL(CONTAINS_OP, containsop_fail_kind(value)); STAT_INC(CONTAINS_OP, failure); - instr->op.code = CONTAINS_OP; + SET_OPCODE_OR_RETURN(instr, CONTAINS_OP); cache->counter = adaptive_counter_backoff(cache->counter); return; success: STAT_INC(CONTAINS_OP, success); + SET_OPCODE_OR_RETURN(instr, specialized_op); cache->counter = adaptive_counter_cooldown(); } From 6431f379b86c2f41a9a9f5a54e77d4e8d556489d Mon Sep 17 00:00:00 2001 From: Cody Maloney Date: Tue, 5 Nov 2024 22:54:40 -0800 Subject: [PATCH 28/54] gh-120754: Add to `io` optimization to what's new (#126466) --- Doc/whatsnew/3.14.rst | 9 +++++++++ 1 file changed, 9 insertions(+) diff --git a/Doc/whatsnew/3.14.rst b/Doc/whatsnew/3.14.rst index 9300de440cdc305..b9d2c27eb9a3214 100644 --- a/Doc/whatsnew/3.14.rst +++ b/Doc/whatsnew/3.14.rst @@ -471,6 +471,15 @@ asyncio reduces memory usage. (Contributed by Kumar Aditya in :gh:`107803`.) +io +--- +* :mod:`io` which provides the built-in :func:`open` makes less system calls + when opening regular files as well as reading whole files. Reading a small + operating system cached file in full is up to 15% faster. + :func:`pathlib.Path.read_bytes` has the most optimizations for reading a + file's bytes in full. (Contributed by Cody Maloney and Victor Stinner in + :gh:`120754` and :gh:`90102`.) + Deprecated ========== From 09d7083962062acfef7e7a9a309a01fb70ad8276 Mon Sep 17 00:00:00 2001 From: Victor Stinner Date: Wed, 6 Nov 2024 11:20:17 +0100 Subject: [PATCH 29/54] gh-126433: Change channel_info.count to int64_t (#126447) Fix compiler warnings on 32-bit Windows: change channel_info.count type from Py_ssize_t to int64_t in _interpchannelsmodule.c. --- Modules/_interpchannelsmodule.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Modules/_interpchannelsmodule.c b/Modules/_interpchannelsmodule.c index 68ee429a9e1dfe5..5dc032b46cac9af 100644 --- a/Modules/_interpchannelsmodule.c +++ b/Modules/_interpchannelsmodule.c @@ -2047,7 +2047,7 @@ struct channel_info { int recv; } cur; } status; - Py_ssize_t count; + int64_t count; }; static int From b1c4ffc20573befb4db66bbbdd569b9bd13bb127 Mon Sep 17 00:00:00 2001 From: Victor Stinner Date: Wed, 6 Nov 2024 11:59:39 +0100 Subject: [PATCH 30/54] gh-126455: Disallow _ssl.SSLSocket instantiation (#126481) Prevent creation of incomplete/invalid _ssl.SSLSocket objects when created directly. --- Modules/_ssl.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Modules/_ssl.c b/Modules/_ssl.c index 5223e21b5cdb11f..b6b5ebf094c938a 100644 --- a/Modules/_ssl.c +++ b/Modules/_ssl.c @@ -2979,7 +2979,7 @@ static PyType_Spec PySSLSocket_spec = { .name = "_ssl._SSLSocket", .basicsize = sizeof(PySSLSocket), .flags = (Py_TPFLAGS_DEFAULT | Py_TPFLAGS_IMMUTABLETYPE | - Py_TPFLAGS_HAVE_GC), + Py_TPFLAGS_HAVE_GC | Py_TPFLAGS_DISALLOW_INSTANTIATION), .slots = PySSLSocket_slots, }; From 9cba47d9f151734815a61e32391ea7fca877ea55 Mon Sep 17 00:00:00 2001 From: Kamil Turek Date: Wed, 6 Nov 2024 13:16:13 +0100 Subject: [PATCH 31/54] gh-122838: Document missing opcodes (#123073) --- Doc/library/dis.rst | 21 +++++++++++++++++++++ 1 file changed, 21 insertions(+) diff --git a/Doc/library/dis.rst b/Doc/library/dis.rst index ecbe0fae8cd74cd..e2926f2440af6da 100644 --- a/Doc/library/dis.rst +++ b/Doc/library/dis.rst @@ -1395,6 +1395,13 @@ iterations of the loop. This opcode is now only used in situations where the local variable is guaranteed to be initialized. It cannot raise :exc:`UnboundLocalError`. +.. opcode:: LOAD_FAST_LOAD_FAST (var_nums) + + Pushes references to ``co_varnames[var_nums >> 4]`` and + ``co_varnames[var_nums & 15]`` onto the stack. + + .. versionadded:: 3.13 + .. opcode:: LOAD_FAST_CHECK (var_num) Pushes a reference to the local ``co_varnames[var_num]`` onto the stack, @@ -1415,6 +1422,20 @@ iterations of the loop. Stores ``STACK.pop()`` into the local ``co_varnames[var_num]``. +.. opcode:: STORE_FAST_STORE_FAST (var_nums) + + Stores ``STACK[-1]`` into ``co_varnames[var_nums >> 4]`` + and ``STACK[-2]`` into ``co_varnames[var_nums & 15]``. + + .. versionadded:: 3.13 + +.. opcode:: STORE_FAST_LOAD_FAST (var_nums) + + Stores ``STACK.pop()`` into the local ``co_varnames[var_nums >> 4]`` + and pushes a reference to the local ``co_varnames[var_nums & 15]`` + onto the stack. + + .. versionadded:: 3.13 .. opcode:: DELETE_FAST (var_num) From a1c57bcfd2bcbc55ff858407e09c1d8d8cee44e6 Mon Sep 17 00:00:00 2001 From: Victor Stinner Date: Wed, 6 Nov 2024 14:24:46 +0100 Subject: [PATCH 32/54] gh-126461: Fix _Unpickler_ReadFromFile() error handling (#126485) Handle _Unpickler_SetStringInput() failure. --- Modules/_pickle.c | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/Modules/_pickle.c b/Modules/_pickle.c index b2bd9545c1b1304..863da6878409f32 100644 --- a/Modules/_pickle.c +++ b/Modules/_pickle.c @@ -1288,6 +1288,10 @@ _Unpickler_ReadFromFile(UnpicklerObject *self, Py_ssize_t n) else { read_size = _Unpickler_SetStringInput(self, data); Py_DECREF(data); + if (read_size < 0) { + return -1; + } + self->prefetched_idx = 0; if (n <= read_size) return n; From 9ce4fa0719d291070b6a66fe25716ef1e81448fc Mon Sep 17 00:00:00 2001 From: mpage Date: Wed, 6 Nov 2024 12:04:04 -0800 Subject: [PATCH 33/54] gh-115999: Introduce helpers for (un)specializing instructions (#126414) Introduce helpers for (un)specializing instructions Consolidate the code to specialize/unspecialize instructions into two helper functions and use them in `_Py_Specialize_BinaryOp`. The resulting code is more concise and keeps all of the logic at the point where we decide to specialize/unspecialize an instruction. --- Python/specialize.c | 132 ++++++++++++++++++++++++++++---------------- 1 file changed, 84 insertions(+), 48 deletions(-) diff --git a/Python/specialize.c b/Python/specialize.c index 17e661b2bd3c769..2673e16e596a1a1 100644 --- a/Python/specialize.c +++ b/Python/specialize.c @@ -24,25 +24,6 @@ extern const char *_PyUOpName(int index); * ./adaptive.md */ -#ifdef Py_GIL_DISABLED -#define SET_OPCODE_OR_RETURN(instr, opcode) \ - do { \ - uint8_t old_op = _Py_atomic_load_uint8_relaxed(&(instr)->op.code); \ - if (old_op >= MIN_INSTRUMENTED_OPCODE) { \ - /* Lost race with instrumentation */ \ - return; \ - } \ - if (!_Py_atomic_compare_exchange_uint8(&(instr)->op.code, &old_op, \ - (opcode))) { \ - /* Lost race with instrumentation */ \ - assert(old_op >= MIN_INSTRUMENTED_OPCODE); \ - return; \ - } \ - } while (0) -#else -#define SET_OPCODE_OR_RETURN(instr, opcode) (instr)->op.code = (opcode) -#endif - #ifdef Py_STATS GCStats _py_gc_stats[NUM_GENERATIONS] = { 0 }; static PyStats _Py_stats_struct = { .gc_stats = _py_gc_stats }; @@ -687,6 +668,73 @@ _PyCode_Quicken(_Py_CODEUNIT *instructions, Py_ssize_t size, PyObject *consts, #define SPEC_FAIL_CONTAINS_OP_LIST 11 #define SPEC_FAIL_CONTAINS_OP_USER_CLASS 12 +static inline int +set_opcode(_Py_CODEUNIT *instr, uint8_t opcode) +{ +#ifdef Py_GIL_DISABLED + uint8_t old_op = _Py_atomic_load_uint8_relaxed(&instr->op.code); + if (old_op >= MIN_INSTRUMENTED_OPCODE) { + /* Lost race with instrumentation */ + return 0; + } + if (!_Py_atomic_compare_exchange_uint8(&instr->op.code, &old_op, opcode)) { + /* Lost race with instrumentation */ + assert(old_op >= MIN_INSTRUMENTED_OPCODE); + return 0; + } + return 1; +#else + instr->op.code = opcode; + return 1; +#endif +} + +static inline void +set_counter(_Py_BackoffCounter *counter, _Py_BackoffCounter value) +{ + FT_ATOMIC_STORE_UINT16_RELAXED(counter->value_and_backoff, + value.value_and_backoff); +} + +static inline _Py_BackoffCounter +load_counter(_Py_BackoffCounter *counter) +{ + _Py_BackoffCounter result = { + .value_and_backoff = + FT_ATOMIC_LOAD_UINT16_RELAXED(counter->value_and_backoff)}; + return result; +} + +static inline void +specialize(_Py_CODEUNIT *instr, uint8_t specialized_opcode) +{ + assert(!PyErr_Occurred()); + if (!set_opcode(instr, specialized_opcode)) { + STAT_INC(_PyOpcode_Deopt[specialized_opcode], failure); + SPECIALIZATION_FAIL(_PyOpcode_Deopt[specialized_opcode], + SPEC_FAIL_OTHER); + return; + } + set_counter((_Py_BackoffCounter *)instr + 1, adaptive_counter_cooldown()); +} + +static inline void +unspecialize(_Py_CODEUNIT *instr, int reason) +{ + assert(!PyErr_Occurred()); + uint8_t opcode = FT_ATOMIC_LOAD_UINT8_RELAXED(instr->op.code); + uint8_t generic_opcode = _PyOpcode_Deopt[opcode]; + STAT_INC(generic_opcode, failure); + if (!set_opcode(instr, generic_opcode)) { + SPECIALIZATION_FAIL(generic_opcode, SPEC_FAIL_OTHER); + return; + } + SPECIALIZATION_FAIL(generic_opcode, reason); + _Py_BackoffCounter *counter = (_Py_BackoffCounter *)instr + 1; + _Py_BackoffCounter cur = load_counter(counter); + set_counter(counter, adaptive_counter_backoff(cur)); +} + static int function_kind(PyCodeObject *code); static bool function_check_args(PyObject *o, int expected_argcount, int opcode); static uint32_t function_get_version(PyObject *o, int opcode); @@ -2195,7 +2243,6 @@ _Py_Specialize_CallKw(_PyStackRef callable_st, _Py_CODEUNIT *instr, int nargs) } } -#ifdef Py_STATS static int binary_op_fail_kind(int oparg, PyObject *lhs, PyObject *rhs) { @@ -2263,7 +2310,6 @@ binary_op_fail_kind(int oparg, PyObject *lhs, PyObject *rhs) } Py_UNREACHABLE(); } -#endif // Py_STATS void _Py_Specialize_BinaryOp(_PyStackRef lhs_st, _PyStackRef rhs_st, _Py_CODEUNIT *instr, @@ -2273,8 +2319,6 @@ _Py_Specialize_BinaryOp(_PyStackRef lhs_st, _PyStackRef rhs_st, _Py_CODEUNIT *in PyObject *rhs = PyStackRef_AsPyObjectBorrow(rhs_st); assert(ENABLE_SPECIALIZATION_FT); assert(_PyOpcode_Caches[BINARY_OP] == INLINE_CACHE_ENTRIES_BINARY_OP); - _PyBinaryOpCache *cache = (_PyBinaryOpCache *)(instr + 1); - uint8_t specialized_op; switch (oparg) { case NB_ADD: case NB_INPLACE_ADD: @@ -2285,19 +2329,19 @@ _Py_Specialize_BinaryOp(_PyStackRef lhs_st, _PyStackRef rhs_st, _Py_CODEUNIT *in _Py_CODEUNIT next = instr[INLINE_CACHE_ENTRIES_BINARY_OP + 1]; bool to_store = (next.op.code == STORE_FAST); if (to_store && PyStackRef_AsPyObjectBorrow(locals[next.op.arg]) == lhs) { - specialized_op = BINARY_OP_INPLACE_ADD_UNICODE; - goto success; + specialize(instr, BINARY_OP_INPLACE_ADD_UNICODE); + return; } - specialized_op = BINARY_OP_ADD_UNICODE; - goto success; + specialize(instr, BINARY_OP_ADD_UNICODE); + return; } if (PyLong_CheckExact(lhs)) { - specialized_op = BINARY_OP_ADD_INT; - goto success; + specialize(instr, BINARY_OP_ADD_INT); + return; } if (PyFloat_CheckExact(lhs)) { - specialized_op = BINARY_OP_ADD_FLOAT; - goto success; + specialize(instr, BINARY_OP_ADD_FLOAT); + return; } break; case NB_MULTIPLY: @@ -2306,12 +2350,12 @@ _Py_Specialize_BinaryOp(_PyStackRef lhs_st, _PyStackRef rhs_st, _Py_CODEUNIT *in break; } if (PyLong_CheckExact(lhs)) { - specialized_op = BINARY_OP_MULTIPLY_INT; - goto success; + specialize(instr, BINARY_OP_MULTIPLY_INT); + return; } if (PyFloat_CheckExact(lhs)) { - specialized_op = BINARY_OP_MULTIPLY_FLOAT; - goto success; + specialize(instr, BINARY_OP_MULTIPLY_FLOAT); + return; } break; case NB_SUBTRACT: @@ -2320,24 +2364,16 @@ _Py_Specialize_BinaryOp(_PyStackRef lhs_st, _PyStackRef rhs_st, _Py_CODEUNIT *in break; } if (PyLong_CheckExact(lhs)) { - specialized_op = BINARY_OP_SUBTRACT_INT; - goto success; + specialize(instr, BINARY_OP_SUBTRACT_INT); + return; } if (PyFloat_CheckExact(lhs)) { - specialized_op = BINARY_OP_SUBTRACT_FLOAT; - goto success; + specialize(instr, BINARY_OP_SUBTRACT_FLOAT); + return; } break; } - SPECIALIZATION_FAIL(BINARY_OP, binary_op_fail_kind(oparg, lhs, rhs)); - STAT_INC(BINARY_OP, failure); - SET_OPCODE_OR_RETURN(instr, BINARY_OP); - cache->counter = adaptive_counter_backoff(cache->counter); - return; -success: - STAT_INC(BINARY_OP, success); - SET_OPCODE_OR_RETURN(instr, specialized_op); - cache->counter = adaptive_counter_cooldown(); + unspecialize(instr, binary_op_fail_kind(oparg, lhs, rhs)); } From e56fd449fbb9ac099f389806d4c494fa66fca248 Mon Sep 17 00:00:00 2001 From: Donghee Na Date: Thu, 7 Nov 2024 05:23:47 +0900 Subject: [PATCH 34/54] gh-115999: Move specializer test from test_dis to test_opcache (gh-126498) --- Lib/test/test_dis.py | 45 +----------------------------- Lib/test/test_opcache.py | 59 +++++++++++++++++++++++++++++++++++++++- 2 files changed, 59 insertions(+), 45 deletions(-) diff --git a/Lib/test/test_dis.py b/Lib/test/test_dis.py index 337ee3bbb05136b..f26411ace8fa732 100644 --- a/Lib/test/test_dis.py +++ b/Lib/test/test_dis.py @@ -10,8 +10,7 @@ import types import unittest from test.support import (captured_stdout, requires_debug_ranges, - requires_specialization, requires_specialization_ft, - cpython_only) + requires_specialization, cpython_only) from test.support.bytecode_helper import BytecodeTestCase import opcode @@ -1261,27 +1260,6 @@ def test_super_instructions(self): got = self.get_disassembly(load_test, adaptive=True) self.do_disassembly_compare(got, dis_load_test_quickened_code) - @cpython_only - @requires_specialization_ft - def test_binary_specialize(self): - binary_op_quicken = """\ - 0 RESUME_CHECK 0 - - 1 LOAD_NAME 0 (a) - LOAD_NAME 1 (b) - %s - RETURN_VALUE -""" - co_int = compile('a + b', "", "eval") - self.code_quicken(lambda: exec(co_int, {}, {'a': 1, 'b': 2})) - got = self.get_disassembly(co_int, adaptive=True) - self.do_disassembly_compare(got, binary_op_quicken % "BINARY_OP_ADD_INT 0 (+)") - - co_unicode = compile('a + b', "", "eval") - self.code_quicken(lambda: exec(co_unicode, {}, {'a': 'a', 'b': 'b'})) - got = self.get_disassembly(co_unicode, adaptive=True) - self.do_disassembly_compare(got, binary_op_quicken % "BINARY_OP_ADD_UNICODE 0 (+)") - @cpython_only @requires_specialization def test_binary_subscr_specialize(self): @@ -1335,27 +1313,6 @@ def test_call_specialize(self): got = self.get_disassembly(co, adaptive=True) self.do_disassembly_compare(got, call_quicken) - @cpython_only - @requires_specialization_ft - def test_contains_specialize(self): - contains_op_quicken = """\ - 0 RESUME_CHECK 0 - - 1 LOAD_NAME 0 (a) - LOAD_NAME 1 (b) - %s - RETURN_VALUE -""" - co_dict = compile('a in b', "", "eval") - self.code_quicken(lambda: exec(co_dict, {}, {'a': 1, 'b': {1: 5}})) - got = self.get_disassembly(co_dict, adaptive=True) - self.do_disassembly_compare(got, contains_op_quicken % "CONTAINS_OP_DICT 0 (in)") - - co_set = compile('a in b', "", "eval") - self.code_quicken(lambda: exec(co_set, {}, {'a': 1.0, 'b': {1, 2, 3}})) - got = self.get_disassembly(co_set, adaptive=True) - self.do_disassembly_compare(got, contains_op_quicken % "CONTAINS_OP_SET 0 (in)") - @cpython_only @requires_specialization def test_loop_quicken(self): diff --git a/Lib/test/test_opcache.py b/Lib/test/test_opcache.py index cdcddb0d717f23a..78e4bf44f7ea0c6 100644 --- a/Lib/test/test_opcache.py +++ b/Lib/test/test_opcache.py @@ -4,7 +4,9 @@ import threading import types import unittest -from test.support import threading_helper, check_impl_detail, requires_specialization +from test.support import (threading_helper, check_impl_detail, + requires_specialization, requires_specialization_ft, + cpython_only) from test.support.import_helper import import_module # Skip this module on other interpreters, it is cpython specific: @@ -34,6 +36,11 @@ def assert_specialized(self, f, opname): opnames = {instruction.opname for instruction in instructions} self.assertIn(opname, opnames) + def assert_no_opcode(self, f, opname): + instructions = dis.get_instructions(f, adaptive=True) + opnames = {instruction.opname for instruction in instructions} + self.assertNotIn(opname, opnames) + class TestLoadSuperAttrCache(unittest.TestCase): def test_descriptor_not_double_executed_on_spec_fail(self): @@ -1200,5 +1207,55 @@ def f(o, n): self.assertEqual(test_obj.b, 0) +class TestSpecializer(TestBase): + + @cpython_only + @requires_specialization_ft + def test_binary_op(self): + def f(): + for _ in range(100): + a, b = 1, 2 + c = a + b + self.assertEqual(c, 3) + + f() + self.assert_specialized(f, "BINARY_OP_ADD_INT") + self.assert_no_opcode(f, "BINARY_OP") + + def g(): + for _ in range(100): + a, b = "foo", "bar" + c = a + b + self.assertEqual(c, "foobar") + + g() + self.assert_specialized(g, "BINARY_OP_ADD_UNICODE") + self.assert_no_opcode(g, "BINARY_OP") + + @cpython_only + @requires_specialization_ft + def test_contain_op(self): + def f(): + for _ in range(100): + a, b = 1, {1: 2, 2: 5} + self.assertTrue(a in b) + self.assertFalse(3 in b) + + f() + self.assert_specialized(f, "CONTAINS_OP_DICT") + self.assert_no_opcode(f, "CONTAINS_OP") + + def g(): + for _ in range(100): + a, b = 1, {1, 2} + self.assertTrue(a in b) + self.assertFalse(3 in b) + + g() + self.assert_specialized(g, "CONTAINS_OP_SET") + self.assert_no_opcode(g, "CONTAINS_OP") + + + if __name__ == "__main__": unittest.main() From 8fa4dc4ba8646c59f945f2451c53e2919f066065 Mon Sep 17 00:00:00 2001 From: Serhiy Storchaka Date: Wed, 6 Nov 2024 22:25:14 +0200 Subject: [PATCH 35/54] gh-126489: Do not call persistent_id() for a persistent id in Python pickle (GH-126490) --- Lib/pickle.py | 9 +++++---- Lib/test/test_pickle.py | 6 ++++++ .../2024-11-06-13-41-38.gh-issue-126489.toaf-0.rst | 3 +++ 3 files changed, 14 insertions(+), 4 deletions(-) create mode 100644 Misc/NEWS.d/next/Library/2024-11-06-13-41-38.gh-issue-126489.toaf-0.rst diff --git a/Lib/pickle.py b/Lib/pickle.py index 965e1952fb8c5ea..25dadb3f75a5731 100644 --- a/Lib/pickle.py +++ b/Lib/pickle.py @@ -548,10 +548,11 @@ def save(self, obj, save_persistent_id=True): self.framer.commit_frame() # Check for persistent id (defined by a subclass) - pid = self.persistent_id(obj) - if pid is not None and save_persistent_id: - self.save_pers(pid) - return + if save_persistent_id: + pid = self.persistent_id(obj) + if pid is not None: + self.save_pers(pid) + return # Check the memo x = self.memo.get(id(obj)) diff --git a/Lib/test/test_pickle.py b/Lib/test/test_pickle.py index c84e507cdf645f5..9ec2eb97147fae7 100644 --- a/Lib/test/test_pickle.py +++ b/Lib/test/test_pickle.py @@ -224,25 +224,31 @@ def persistent_load(pid): def test_pickler_super(self): class PersPickler(self.pickler): def persistent_id(subself, obj): + called.append(obj) self.assertIsNone(super().persistent_id(obj)) return obj for proto in range(pickle.HIGHEST_PROTOCOL + 1): f = io.BytesIO() pickler = PersPickler(f, proto) + called = [] pickler.dump('abc') + self.assertEqual(called, ['abc']) self.assertEqual(self.loads(f.getvalue()), 'abc') def test_unpickler_super(self): class PersUnpickler(self.unpickler): def persistent_load(subself, pid): + called.append(pid) with self.assertRaises(self.persistent_load_error): super().persistent_load(pid) return pid for proto in range(pickle.HIGHEST_PROTOCOL + 1): unpickler = PersUnpickler(io.BytesIO(self.dumps('abc', proto))) + called = [] self.assertEqual(unpickler.load(), 'abc') + self.assertEqual(called, ['abc']) class PyPicklerUnpicklerObjectTests(AbstractPicklerUnpicklerObjectTests, unittest.TestCase): diff --git a/Misc/NEWS.d/next/Library/2024-11-06-13-41-38.gh-issue-126489.toaf-0.rst b/Misc/NEWS.d/next/Library/2024-11-06-13-41-38.gh-issue-126489.toaf-0.rst new file mode 100644 index 000000000000000..8a6573cdea7b42f --- /dev/null +++ b/Misc/NEWS.d/next/Library/2024-11-06-13-41-38.gh-issue-126489.toaf-0.rst @@ -0,0 +1,3 @@ +The Python implementation of :mod:`pickle` no longer calls +:meth:`pickle.Pickler.persistent_id` for the result of +:meth:`!persistent_id`. From 6e03ff2419a7faf514ad833dc513175c4f7e9bc7 Mon Sep 17 00:00:00 2001 From: Kirill Podoprigora Date: Wed, 6 Nov 2024 23:52:15 +0200 Subject: [PATCH 36/54] gh-126513: Use helpers for `_Py_Specialize_ConstainsOp` (#126517) * Use helpers for _Py_Specialize_ConstainsOp * Remove unnecessary variable --- Python/specialize.c | 24 +++++++----------------- 1 file changed, 7 insertions(+), 17 deletions(-) diff --git a/Python/specialize.c b/Python/specialize.c index 2673e16e596a1a1..0699e7be5e6b9c4 100644 --- a/Python/specialize.c +++ b/Python/specialize.c @@ -2760,8 +2760,8 @@ _Py_Specialize_ToBool(_PyStackRef value_o, _Py_CODEUNIT *instr) cache->counter = adaptive_counter_cooldown(); } -#ifdef Py_STATS -static int containsop_fail_kind(PyObject *value) { +static int +containsop_fail_kind(PyObject *value) { if (PyUnicode_CheckExact(value)) { return SPEC_FAIL_CONTAINS_OP_STR; } @@ -2776,7 +2776,6 @@ static int containsop_fail_kind(PyObject *value) { } return SPEC_FAIL_OTHER; } -#endif // Py_STATS void _Py_Specialize_ContainsOp(_PyStackRef value_st, _Py_CODEUNIT *instr) @@ -2785,26 +2784,17 @@ _Py_Specialize_ContainsOp(_PyStackRef value_st, _Py_CODEUNIT *instr) assert(ENABLE_SPECIALIZATION_FT); assert(_PyOpcode_Caches[CONTAINS_OP] == INLINE_CACHE_ENTRIES_COMPARE_OP); - uint8_t specialized_op; - _PyContainsOpCache *cache = (_PyContainsOpCache *)(instr + 1); if (PyDict_CheckExact(value)) { - specialized_op = CONTAINS_OP_DICT; - goto success; + specialize(instr, CONTAINS_OP_DICT); + return; } if (PySet_CheckExact(value) || PyFrozenSet_CheckExact(value)) { - specialized_op = CONTAINS_OP_SET; - goto success; + specialize(instr, CONTAINS_OP_SET); + return; } - SPECIALIZATION_FAIL(CONTAINS_OP, containsop_fail_kind(value)); - STAT_INC(CONTAINS_OP, failure); - SET_OPCODE_OR_RETURN(instr, CONTAINS_OP); - cache->counter = adaptive_counter_backoff(cache->counter); + unspecialize(instr, containsop_fail_kind(value)); return; -success: - STAT_INC(CONTAINS_OP, success); - SET_OPCODE_OR_RETURN(instr, specialized_op); - cache->counter = adaptive_counter_cooldown(); } /* Code init cleanup. From b9082958ef7dfb57d0fef745a5bf2521546a0dd6 Mon Sep 17 00:00:00 2001 From: Valerii <81074936+valerii-chirkov@users.noreply.github.com> Date: Thu, 7 Nov 2024 03:11:48 +0500 Subject: [PATCH 37/54] gh-126509: Update link to CPython's grammar docs in InternalDocs/parser.md (#126510) --- InternalDocs/parser.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/InternalDocs/parser.md b/InternalDocs/parser.md index 6398ba6cd2838f2..a0c70b46087d1ad 100644 --- a/InternalDocs/parser.md +++ b/InternalDocs/parser.md @@ -17,7 +17,7 @@ Therefore, changes to the Python language are made by modifying the [grammar file](../Grammar/python.gram). Developers rarely need to modify the generator itself. -See the devguide's [Changing CPython's grammar](https://devguide.python.org/developer-workflow/grammar/#grammar) +See [Changing CPython's grammar](./changing_grammar.md) for a detailed description of the grammar and the process for changing it. How PEG parsers work From 5dc36dc5658f6ba9cfd9d7a2771baaf17d2ee23a Mon Sep 17 00:00:00 2001 From: Stephen Morton Date: Wed, 6 Nov 2024 14:12:45 -0800 Subject: [PATCH 38/54] gh-126451: Register contextvars.Context to collections.abc.Mapping (#126452) Co-authored-by: sobolevn Co-authored-by: Alex Waygood Co-authored-by: Peter Bierma --- Lib/contextvars.py | 4 ++++ Lib/test/test_context.py | 14 ++++++++++++++ .../2024-11-05-11-28-45.gh-issue-126451.XJMtqz.rst | 2 ++ 3 files changed, 20 insertions(+) create mode 100644 Misc/NEWS.d/next/Library/2024-11-05-11-28-45.gh-issue-126451.XJMtqz.rst diff --git a/Lib/contextvars.py b/Lib/contextvars.py index d78c80dfe6f99ce..14514f185e069dd 100644 --- a/Lib/contextvars.py +++ b/Lib/contextvars.py @@ -1,4 +1,8 @@ +import _collections_abc from _contextvars import Context, ContextVar, Token, copy_context __all__ = ('Context', 'ContextVar', 'Token', 'copy_context') + + +_collections_abc.Mapping.register(Context) diff --git a/Lib/test/test_context.py b/Lib/test/test_context.py index b06b9df9f5b0b8a..82d1797ab3b79e2 100644 --- a/Lib/test/test_context.py +++ b/Lib/test/test_context.py @@ -1,3 +1,4 @@ +import collections.abc import concurrent.futures import contextvars import functools @@ -350,6 +351,19 @@ def ctx2_fun(): ctx1.run(ctx1_fun) + def test_context_isinstance(self): + ctx = contextvars.Context() + self.assertIsInstance(ctx, collections.abc.Mapping) + self.assertTrue(issubclass(contextvars.Context, collections.abc.Mapping)) + + mapping_methods = ( + '__contains__', '__eq__', '__getitem__', '__iter__', '__len__', + '__ne__', 'get', 'items', 'keys', 'values', + ) + for name in mapping_methods: + with self.subTest(name=name): + self.assertTrue(callable(getattr(ctx, name))) + @isolated_context @threading_helper.requires_working_threading() def test_context_threads_1(self): diff --git a/Misc/NEWS.d/next/Library/2024-11-05-11-28-45.gh-issue-126451.XJMtqz.rst b/Misc/NEWS.d/next/Library/2024-11-05-11-28-45.gh-issue-126451.XJMtqz.rst new file mode 100644 index 000000000000000..563cb2515eca60e --- /dev/null +++ b/Misc/NEWS.d/next/Library/2024-11-05-11-28-45.gh-issue-126451.XJMtqz.rst @@ -0,0 +1,2 @@ +Register the :class:`contextvars.Context` type to +:class:`collections.abc.Mapping`. From 2a6b6b33dfe0f3c435abf2829b62ef3f1ef12cd3 Mon Sep 17 00:00:00 2001 From: Brett Cannon Date: Wed, 6 Nov 2024 14:33:46 -0800 Subject: [PATCH 39/54] GH-126458: disable SIMD for HACL under WASI (#126512) Requires an extra `-msimd128` flag and the `*mmintrin.h` header files are exclusive to x86-family CPUs. --- .../Build/2024-11-06-11-12-04.gh-issue-126458.7vzHtx.rst | 1 + configure | 6 ++++-- configure.ac | 6 ++++-- 3 files changed, 9 insertions(+), 4 deletions(-) create mode 100644 Misc/NEWS.d/next/Build/2024-11-06-11-12-04.gh-issue-126458.7vzHtx.rst diff --git a/Misc/NEWS.d/next/Build/2024-11-06-11-12-04.gh-issue-126458.7vzHtx.rst b/Misc/NEWS.d/next/Build/2024-11-06-11-12-04.gh-issue-126458.7vzHtx.rst new file mode 100644 index 000000000000000..cc06dd8a30e30aa --- /dev/null +++ b/Misc/NEWS.d/next/Build/2024-11-06-11-12-04.gh-issue-126458.7vzHtx.rst @@ -0,0 +1 @@ +Disable SIMD support for HACL under WASI. diff --git a/configure b/configure index e529527214da291..e0ab304570dfd48 100755 --- a/configure +++ b/configure @@ -30770,7 +30770,8 @@ esac # The SIMD files use aligned_alloc, which is not available on older versions of # Android. -if test "$ac_sys_system" != "Linux-android" || test "$ANDROID_API_LEVEL" -ge 28; then +# The *mmintrin.h headers are x86-family-specific, so can't be used on WASI. +if test "$ac_sys_system" != "Linux-android" -a "$ac_sys_system" != "WASI" || test "$ANDROID_API_LEVEL" -ge 28; then { printf "%s\n" "$as_me:${as_lineno-$LINENO}: checking whether C compiler accepts -msse -msse2 -msse3 -msse4.1 -msse4.2" >&5 printf %s "checking whether C compiler accepts -msse -msse2 -msse3 -msse4.1 -msse4.2... " >&6; } if test ${ax_cv_check_cflags__Werror__msse__msse2__msse3__msse4_1__msse4_2+y} @@ -30837,11 +30838,12 @@ fi # The SIMD files use aligned_alloc, which is not available on older versions of # Android. +# The *mmintrin.h headers are x86-family-specific, so can't be used on WASI. # # Although AVX support is not guaranteed on Android # (https://developer.android.com/ndk/guides/abis#86-64), this is safe because we do a # runtime CPUID check. -if test "$ac_sys_system" != "Linux-android" || test "$ANDROID_API_LEVEL" -ge 28; then +if test "$ac_sys_system" != "Linux-android" -a "$ac_sys_system" != "WASI" || test "$ANDROID_API_LEVEL" -ge 28; then { printf "%s\n" "$as_me:${as_lineno-$LINENO}: checking whether C compiler accepts -mavx2" >&5 printf %s "checking whether C compiler accepts -mavx2... " >&6; } if test ${ax_cv_check_cflags__Werror__mavx2+y} diff --git a/configure.ac b/configure.ac index bc67a0596ac2b43..da7d1ef68eefa89 100644 --- a/configure.ac +++ b/configure.ac @@ -7853,7 +7853,8 @@ AC_SUBST([LIBHACL_CFLAGS]) # The SIMD files use aligned_alloc, which is not available on older versions of # Android. -if test "$ac_sys_system" != "Linux-android" || test "$ANDROID_API_LEVEL" -ge 28; then +# The *mmintrin.h headers are x86-family-specific, so can't be used on WASI. +if test "$ac_sys_system" != "Linux-android" -a "$ac_sys_system" != "WASI" || test "$ANDROID_API_LEVEL" -ge 28; then dnl This can be extended here to detect e.g. Power8, which HACL* should also support. AX_CHECK_COMPILE_FLAG([-msse -msse2 -msse3 -msse4.1 -msse4.2],[ [LIBHACL_SIMD128_FLAGS="-msse -msse2 -msse3 -msse4.1 -msse4.2"] @@ -7879,11 +7880,12 @@ AC_SUBST([LIBHACL_SIMD128_OBJS]) # The SIMD files use aligned_alloc, which is not available on older versions of # Android. +# The *mmintrin.h headers are x86-family-specific, so can't be used on WASI. # # Although AVX support is not guaranteed on Android # (https://developer.android.com/ndk/guides/abis#86-64), this is safe because we do a # runtime CPUID check. -if test "$ac_sys_system" != "Linux-android" || test "$ANDROID_API_LEVEL" -ge 28; then +if test "$ac_sys_system" != "Linux-android" -a "$ac_sys_system" != "WASI" || test "$ANDROID_API_LEVEL" -ge 28; then AX_CHECK_COMPILE_FLAG([-mavx2],[ [LIBHACL_SIMD256_FLAGS="-mavx2"] AC_DEFINE([HACL_CAN_COMPILE_SIMD256], [1], [HACL* library can compile SIMD256 implementations]) From 223d3dc554dde45f185f7f465753824c6f698b9b Mon Sep 17 00:00:00 2001 From: Serhiy Storchaka Date: Thu, 7 Nov 2024 08:53:02 +0200 Subject: [PATCH 40/54] gh-125631: Enable setting persistent_id and persistent_load of pickler and unpickler (GH-125752) pickle.Pickler.persistent_id and pickle.Unpickler.persistent_load can again be overridden as instance attributes. --- Lib/test/test_pickle.py | 82 ++++++++++++++++++- ...-10-19-11-06-06.gh-issue-125631.BlhVvR.rst | 4 + Modules/_pickle.c | 62 ++++++++++++++ 3 files changed, 146 insertions(+), 2 deletions(-) create mode 100644 Misc/NEWS.d/next/Library/2024-10-19-11-06-06.gh-issue-125631.BlhVvR.rst diff --git a/Lib/test/test_pickle.py b/Lib/test/test_pickle.py index 9ec2eb97147fae7..4ec966d83514901 100644 --- a/Lib/test/test_pickle.py +++ b/Lib/test/test_pickle.py @@ -250,6 +250,84 @@ def persistent_load(subself, pid): self.assertEqual(unpickler.load(), 'abc') self.assertEqual(called, ['abc']) + def test_pickler_instance_attribute(self): + def persistent_id(obj): + called.append(obj) + return obj + + for proto in range(pickle.HIGHEST_PROTOCOL + 1): + f = io.BytesIO() + pickler = self.pickler(f, proto) + called = [] + old_persistent_id = pickler.persistent_id + pickler.persistent_id = persistent_id + self.assertEqual(pickler.persistent_id, persistent_id) + pickler.dump('abc') + self.assertEqual(called, ['abc']) + self.assertEqual(self.loads(f.getvalue()), 'abc') + del pickler.persistent_id + self.assertEqual(pickler.persistent_id, old_persistent_id) + + def test_unpickler_instance_attribute(self): + def persistent_load(pid): + called.append(pid) + return pid + + for proto in range(pickle.HIGHEST_PROTOCOL + 1): + unpickler = self.unpickler(io.BytesIO(self.dumps('abc', proto))) + called = [] + old_persistent_load = unpickler.persistent_load + unpickler.persistent_load = persistent_load + self.assertEqual(unpickler.persistent_load, persistent_load) + self.assertEqual(unpickler.load(), 'abc') + self.assertEqual(called, ['abc']) + del unpickler.persistent_load + self.assertEqual(unpickler.persistent_load, old_persistent_load) + + def test_pickler_super_instance_attribute(self): + class PersPickler(self.pickler): + def persistent_id(subself, obj): + raise AssertionError('should never be called') + def _persistent_id(subself, obj): + called.append(obj) + self.assertIsNone(super().persistent_id(obj)) + return obj + + for proto in range(pickle.HIGHEST_PROTOCOL + 1): + f = io.BytesIO() + pickler = PersPickler(f, proto) + called = [] + old_persistent_id = pickler.persistent_id + pickler.persistent_id = pickler._persistent_id + self.assertEqual(pickler.persistent_id, pickler._persistent_id) + pickler.dump('abc') + self.assertEqual(called, ['abc']) + self.assertEqual(self.loads(f.getvalue()), 'abc') + del pickler.persistent_id + self.assertEqual(pickler.persistent_id, old_persistent_id) + + def test_unpickler_super_instance_attribute(self): + class PersUnpickler(self.unpickler): + def persistent_load(subself, pid): + raise AssertionError('should never be called') + def _persistent_load(subself, pid): + called.append(pid) + with self.assertRaises(self.persistent_load_error): + super().persistent_load(pid) + return pid + + for proto in range(pickle.HIGHEST_PROTOCOL + 1): + unpickler = PersUnpickler(io.BytesIO(self.dumps('abc', proto))) + called = [] + old_persistent_load = unpickler.persistent_load + unpickler.persistent_load = unpickler._persistent_load + self.assertEqual(unpickler.persistent_load, unpickler._persistent_load) + self.assertEqual(unpickler.load(), 'abc') + self.assertEqual(called, ['abc']) + del unpickler.persistent_load + self.assertEqual(unpickler.persistent_load, old_persistent_load) + + class PyPicklerUnpicklerObjectTests(AbstractPicklerUnpicklerObjectTests, unittest.TestCase): pickler_class = pickle._Pickler @@ -373,7 +451,7 @@ class SizeofTests(unittest.TestCase): check_sizeof = support.check_sizeof def test_pickler(self): - basesize = support.calcobjsize('6P2n3i2n3i2P') + basesize = support.calcobjsize('7P2n3i2n3i2P') p = _pickle.Pickler(io.BytesIO()) self.assertEqual(object.__sizeof__(p), basesize) MT_size = struct.calcsize('3nP0n') @@ -390,7 +468,7 @@ def test_pickler(self): 0) # Write buffer is cleared after every dump(). def test_unpickler(self): - basesize = support.calcobjsize('2P2nP 2P2n2i5P 2P3n8P2n2i') + basesize = support.calcobjsize('2P2n2P 2P2n2i5P 2P3n8P2n2i') unpickler = _pickle.Unpickler P = struct.calcsize('P') # Size of memo table entry. n = struct.calcsize('n') # Size of mark table entry. diff --git a/Misc/NEWS.d/next/Library/2024-10-19-11-06-06.gh-issue-125631.BlhVvR.rst b/Misc/NEWS.d/next/Library/2024-10-19-11-06-06.gh-issue-125631.BlhVvR.rst new file mode 100644 index 000000000000000..e870abbf87803a9 --- /dev/null +++ b/Misc/NEWS.d/next/Library/2024-10-19-11-06-06.gh-issue-125631.BlhVvR.rst @@ -0,0 +1,4 @@ +Restore ability to set :attr:`~pickle.Pickler.persistent_id` and +:attr:`~pickle.Unpickler.persistent_load` attributes of instances of the +:class:`!Pickler` and :class:`!Unpickler` classes in the :mod:`pickle` +module. diff --git a/Modules/_pickle.c b/Modules/_pickle.c index 863da6878409f32..5837cd41a40cd48 100644 --- a/Modules/_pickle.c +++ b/Modules/_pickle.c @@ -613,6 +613,7 @@ typedef struct PicklerObject { objects to support self-referential objects pickling. */ PyObject *persistent_id; /* persistent_id() method, can be NULL */ + PyObject *persistent_id_attr; /* instance attribute, can be NULL */ PyObject *dispatch_table; /* private dispatch_table, can be NULL */ PyObject *reducer_override; /* hook for invoking user-defined callbacks instead of save_global when pickling @@ -655,6 +656,7 @@ typedef struct UnpicklerObject { size_t memo_len; /* Number of objects in the memo */ PyObject *persistent_load; /* persistent_load() method, can be NULL. */ + PyObject *persistent_load_attr; /* instance attribute, can be NULL. */ Py_buffer buffer; char *input_buffer; @@ -1108,6 +1110,7 @@ _Pickler_New(PickleState *st) self->memo = memo; self->persistent_id = NULL; + self->persistent_id_attr = NULL; self->dispatch_table = NULL; self->reducer_override = NULL; self->write = NULL; @@ -1606,6 +1609,7 @@ _Unpickler_New(PyObject *module) self->memo_size = MEMO_SIZE; self->memo_len = 0; self->persistent_load = NULL; + self->persistent_load_attr = NULL; memset(&self->buffer, 0, sizeof(Py_buffer)); self->input_buffer = NULL; self->input_line = NULL; @@ -5092,6 +5096,33 @@ Pickler_set_memo(PicklerObject *self, PyObject *obj, void *Py_UNUSED(ignored)) return -1; } +static PyObject * +Pickler_getattr(PyObject *self, PyObject *name) +{ + if (PyUnicode_Check(name) + && PyUnicode_EqualToUTF8(name, "persistent_id") + && ((PicklerObject *)self)->persistent_id_attr) + { + return Py_NewRef(((PicklerObject *)self)->persistent_id_attr); + } + + return PyObject_GenericGetAttr(self, name); +} + +static int +Pickler_setattr(PyObject *self, PyObject *name, PyObject *value) +{ + if (PyUnicode_Check(name) + && PyUnicode_EqualToUTF8(name, "persistent_id")) + { + Py_XINCREF(value); + Py_XSETREF(((PicklerObject *)self)->persistent_id_attr, value); + return 0; + } + + return PyObject_GenericSetAttr(self, name, value); +} + static PyMemberDef Pickler_members[] = { {"bin", Py_T_INT, offsetof(PicklerObject, bin)}, {"fast", Py_T_INT, offsetof(PicklerObject, fast)}, @@ -5107,6 +5138,8 @@ static PyGetSetDef Pickler_getsets[] = { static PyType_Slot pickler_type_slots[] = { {Py_tp_dealloc, Pickler_dealloc}, + {Py_tp_getattro, Pickler_getattr}, + {Py_tp_setattro, Pickler_setattr}, {Py_tp_methods, Pickler_methods}, {Py_tp_members, Pickler_members}, {Py_tp_getset, Pickler_getsets}, @@ -7566,6 +7599,33 @@ Unpickler_set_memo(UnpicklerObject *self, PyObject *obj, void *Py_UNUSED(ignored return -1; } +static PyObject * +Unpickler_getattr(PyObject *self, PyObject *name) +{ + if (PyUnicode_Check(name) + && PyUnicode_EqualToUTF8(name, "persistent_load") + && ((UnpicklerObject *)self)->persistent_load_attr) + { + return Py_NewRef(((UnpicklerObject *)self)->persistent_load_attr); + } + + return PyObject_GenericGetAttr(self, name); +} + +static int +Unpickler_setattr(PyObject *self, PyObject *name, PyObject *value) +{ + if (PyUnicode_Check(name) + && PyUnicode_EqualToUTF8(name, "persistent_load")) + { + Py_XINCREF(value); + Py_XSETREF(((UnpicklerObject *)self)->persistent_load_attr, value); + return 0; + } + + return PyObject_GenericSetAttr(self, name, value); +} + static PyGetSetDef Unpickler_getsets[] = { {"memo", (getter)Unpickler_get_memo, (setter)Unpickler_set_memo}, {NULL} @@ -7574,6 +7634,8 @@ static PyGetSetDef Unpickler_getsets[] = { static PyType_Slot unpickler_type_slots[] = { {Py_tp_dealloc, Unpickler_dealloc}, {Py_tp_doc, (char *)_pickle_Unpickler___init____doc__}, + {Py_tp_getattro, Unpickler_getattr}, + {Py_tp_setattro, Unpickler_setattr}, {Py_tp_traverse, Unpickler_traverse}, {Py_tp_clear, Unpickler_clear}, {Py_tp_methods, Unpickler_methods}, From dbb6e22cb1f533bba00a61a5b63ec68af9d48836 Mon Sep 17 00:00:00 2001 From: Serhiy Storchaka Date: Thu, 7 Nov 2024 09:09:59 +0200 Subject: [PATCH 41/54] gh-125926: Fix urllib.parse.urljoin() for base URI with undefined authority (GH-125989) Although this goes beyond the application of RFC 3986, urljoin() should support relative base URIs for backward compatibility. --- Lib/test/test_urlparse.py | 72 +++++++++++++++++++ Lib/urllib/parse.py | 4 +- ...-10-25-20-52-15.gh-issue-125926.pp8rtZ.rst | 4 ++ 3 files changed, 78 insertions(+), 2 deletions(-) create mode 100644 Misc/NEWS.d/next/Library/2024-10-25-20-52-15.gh-issue-125926.pp8rtZ.rst diff --git a/Lib/test/test_urlparse.py b/Lib/test/test_urlparse.py index d49e4388696ab41..297fb4831c16bfc 100644 --- a/Lib/test/test_urlparse.py +++ b/Lib/test/test_urlparse.py @@ -623,6 +623,78 @@ def test_urljoins(self): self.checkJoin(RFC1808_BASE, 'https:;', 'https:;') self.checkJoin(RFC1808_BASE, 'https:;x', 'https:;x') + def test_urljoins_relative_base(self): + # According to RFC 3986, Section 5.1, a base URI must conform to + # the absolute-URI syntax rule (Section 4.3). But urljoin() lacks + # a context to establish missed components of the relative base URI. + # It still has to return a sensible result for backwards compatibility. + # The following tests are figments of the imagination and artifacts + # of the current implementation that are not based on any standard. + self.checkJoin('', '', '') + self.checkJoin('', '//', '//', relroundtrip=False) + self.checkJoin('', '//v', '//v') + self.checkJoin('', '//v/w', '//v/w') + self.checkJoin('', '/w', '/w') + self.checkJoin('', '///w', '///w', relroundtrip=False) + self.checkJoin('', 'w', 'w') + + self.checkJoin('//', '', '//') + self.checkJoin('//', '//', '//') + self.checkJoin('//', '//v', '//v') + self.checkJoin('//', '//v/w', '//v/w') + self.checkJoin('//', '/w', '///w') + self.checkJoin('//', '///w', '///w') + self.checkJoin('//', 'w', '///w') + + self.checkJoin('//a', '', '//a') + self.checkJoin('//a', '//', '//a') + self.checkJoin('//a', '//v', '//v') + self.checkJoin('//a', '//v/w', '//v/w') + self.checkJoin('//a', '/w', '//a/w') + self.checkJoin('//a', '///w', '//a/w') + self.checkJoin('//a', 'w', '//a/w') + + for scheme in '', 'http:': + self.checkJoin('http:', scheme + '', 'http:') + self.checkJoin('http:', scheme + '//', 'http:') + self.checkJoin('http:', scheme + '//v', 'http://v') + self.checkJoin('http:', scheme + '//v/w', 'http://v/w') + self.checkJoin('http:', scheme + '/w', 'http:/w') + self.checkJoin('http:', scheme + '///w', 'http:/w') + self.checkJoin('http:', scheme + 'w', 'http:/w') + + self.checkJoin('http://', scheme + '', 'http://') + self.checkJoin('http://', scheme + '//', 'http://') + self.checkJoin('http://', scheme + '//v', 'http://v') + self.checkJoin('http://', scheme + '//v/w', 'http://v/w') + self.checkJoin('http://', scheme + '/w', 'http:///w') + self.checkJoin('http://', scheme + '///w', 'http:///w') + self.checkJoin('http://', scheme + 'w', 'http:///w') + + self.checkJoin('http://a', scheme + '', 'http://a') + self.checkJoin('http://a', scheme + '//', 'http://a') + self.checkJoin('http://a', scheme + '//v', 'http://v') + self.checkJoin('http://a', scheme + '//v/w', 'http://v/w') + self.checkJoin('http://a', scheme + '/w', 'http://a/w') + self.checkJoin('http://a', scheme + '///w', 'http://a/w') + self.checkJoin('http://a', scheme + 'w', 'http://a/w') + + self.checkJoin('/b/c', '', '/b/c') + self.checkJoin('/b/c', '//', '/b/c') + self.checkJoin('/b/c', '//v', '//v') + self.checkJoin('/b/c', '//v/w', '//v/w') + self.checkJoin('/b/c', '/w', '/w') + self.checkJoin('/b/c', '///w', '/w') + self.checkJoin('/b/c', 'w', '/b/w') + + self.checkJoin('///b/c', '', '///b/c') + self.checkJoin('///b/c', '//', '///b/c') + self.checkJoin('///b/c', '//v', '//v') + self.checkJoin('///b/c', '//v/w', '//v/w') + self.checkJoin('///b/c', '/w', '///w') + self.checkJoin('///b/c', '///w', '///w') + self.checkJoin('///b/c', 'w', '///b/w') + def test_RFC2732(self): str_cases = [ ('http://Test.python.org:5432/foo/', 'test.python.org', 5432), diff --git a/Lib/urllib/parse.py b/Lib/urllib/parse.py index 5b00ab25c6b4cad..a721d777c82f828 100644 --- a/Lib/urllib/parse.py +++ b/Lib/urllib/parse.py @@ -577,9 +577,9 @@ def urljoin(base, url, allow_fragments=True): if scheme is None: scheme = bscheme - if scheme != bscheme or scheme not in uses_relative: + if scheme != bscheme or (scheme and scheme not in uses_relative): return _coerce_result(url) - if scheme in uses_netloc: + if not scheme or scheme in uses_netloc: if netloc: return _coerce_result(_urlunsplit(scheme, netloc, path, query, fragment)) diff --git a/Misc/NEWS.d/next/Library/2024-10-25-20-52-15.gh-issue-125926.pp8rtZ.rst b/Misc/NEWS.d/next/Library/2024-10-25-20-52-15.gh-issue-125926.pp8rtZ.rst new file mode 100644 index 000000000000000..7f98bcdc38e5662 --- /dev/null +++ b/Misc/NEWS.d/next/Library/2024-10-25-20-52-15.gh-issue-125926.pp8rtZ.rst @@ -0,0 +1,4 @@ +Fix :func:`urllib.parse.urljoin` for base URI with undefined authority. +Although :rfc:`3986` only specify reference resolution for absolute base +URI, :func:`!urljoin` should continue to return sensible result for relative +base URI. From d46d3f2ec783004f0927c9f5e6211a570360cf3b Mon Sep 17 00:00:00 2001 From: "Gregory P. Smith" Date: Thu, 7 Nov 2024 00:06:14 -0800 Subject: [PATCH 42/54] Cleanup multiprocessing comment and unusual import error message (#126532) Define constants as constants rather than calling `list(range(2))`. Explain which values must remain in sync via comments. --- Lib/multiprocessing/synchronize.py | 15 +++++++-------- Modules/_multiprocessing/semaphore.c | 1 + 2 files changed, 8 insertions(+), 8 deletions(-) diff --git a/Lib/multiprocessing/synchronize.py b/Lib/multiprocessing/synchronize.py index 3ccbfe311c71f37..1917a8bd51dcabb 100644 --- a/Lib/multiprocessing/synchronize.py +++ b/Lib/multiprocessing/synchronize.py @@ -21,22 +21,21 @@ from . import process from . import util -# Try to import the mp.synchronize module cleanly, if it fails -# raise ImportError for platforms lacking a working sem_open implementation. -# See issue 3770 +# TODO: Do any platforms still lack a functioning sem_open? try: from _multiprocessing import SemLock, sem_unlink -except (ImportError): +except ImportError: raise ImportError("This platform lacks a functioning sem_open" + - " implementation, therefore, the required" + - " synchronization primitives needed will not" + - " function, see issue 3770.") + " implementation. https://github.com/python/cpython/issues/48020.") # # Constants # -RECURSIVE_MUTEX, SEMAPHORE = list(range(2)) +# These match the enum in Modules/_multiprocessing/semaphore.c +RECURSIVE_MUTEX = 0 +SEMAPHORE = 1 + SEM_VALUE_MAX = _multiprocessing.SemLock.SEM_VALUE_MAX # diff --git a/Modules/_multiprocessing/semaphore.c b/Modules/_multiprocessing/semaphore.c index 4de4ee6c78fbd1d..9eef7c25636899c 100644 --- a/Modules/_multiprocessing/semaphore.c +++ b/Modules/_multiprocessing/semaphore.c @@ -15,6 +15,7 @@ #ifdef HAVE_MP_SEMAPHORE +// These match the values in Lib/multiprocessing/synchronize.py enum { RECURSIVE_MUTEX, SEMAPHORE }; typedef struct { From 75f7cf91ec5afc6091a0fd442a1f0435c19300b2 Mon Sep 17 00:00:00 2001 From: Duprat Date: Thu, 7 Nov 2024 09:10:57 +0100 Subject: [PATCH 43/54] gh-125679: multiprocessing Lock and RLock - fix invalid representation string on MacOSX. (#125680) --- Lib/multiprocessing/synchronize.py | 4 +- Lib/test/_test_multiprocessing.py | 122 ++++++++++++++++++ ...-11-06-23-40-28.gh-issue-125679.Qq9xF5.rst | 2 + 3 files changed, 126 insertions(+), 2 deletions(-) create mode 100644 Misc/NEWS.d/next/Library/2024-11-06-23-40-28.gh-issue-125679.Qq9xF5.rst diff --git a/Lib/multiprocessing/synchronize.py b/Lib/multiprocessing/synchronize.py index 1917a8bd51dcabb..4f72373c951abc5 100644 --- a/Lib/multiprocessing/synchronize.py +++ b/Lib/multiprocessing/synchronize.py @@ -173,7 +173,7 @@ def __repr__(self): name = process.current_process().name if threading.current_thread().name != 'MainThread': name += '|' + threading.current_thread().name - elif self._semlock._get_value() == 1: + elif not self._semlock._is_zero(): name = 'None' elif self._semlock._count() > 0: name = 'SomeOtherThread' @@ -199,7 +199,7 @@ def __repr__(self): if threading.current_thread().name != 'MainThread': name += '|' + threading.current_thread().name count = self._semlock._count() - elif self._semlock._get_value() == 1: + elif not self._semlock._is_zero(): name, count = 'None', 0 elif self._semlock._count() > 0: name, count = 'SomeOtherThread', 'nonzero' diff --git a/Lib/test/_test_multiprocessing.py b/Lib/test/_test_multiprocessing.py index 77b618c684475a5..38ddb62c693fc03 100644 --- a/Lib/test/_test_multiprocessing.py +++ b/Lib/test/_test_multiprocessing.py @@ -1363,6 +1363,66 @@ def test_closed_queue_put_get_exceptions(self): class _TestLock(BaseTestCase): + @staticmethod + def _acquire(lock, l=None): + lock.acquire() + if l is not None: + l.append(repr(lock)) + + @staticmethod + def _acquire_event(lock, event): + lock.acquire() + event.set() + time.sleep(1.0) + + def test_repr_lock(self): + if self.TYPE != 'processes': + self.skipTest('test not appropriate for {}'.format(self.TYPE)) + + lock = self.Lock() + self.assertEqual(f'', repr(lock)) + + lock.acquire() + self.assertEqual(f'', repr(lock)) + lock.release() + + tname = 'T1' + l = [] + t = threading.Thread(target=self._acquire, + args=(lock, l), + name=tname) + t.start() + time.sleep(0.1) + self.assertEqual(f'', l[0]) + lock.release() + + t = threading.Thread(target=self._acquire, + args=(lock,), + name=tname) + t.start() + time.sleep(0.1) + self.assertEqual('', repr(lock)) + lock.release() + + pname = 'P1' + l = multiprocessing.Manager().list() + p = self.Process(target=self._acquire, + args=(lock, l), + name=pname) + p.start() + p.join() + self.assertEqual(f'', l[0]) + + lock = self.Lock() + event = self.Event() + p = self.Process(target=self._acquire_event, + args=(lock, event), + name='P2') + p.start() + event.wait() + self.assertEqual(f'', repr(lock)) + p.terminate() + def test_lock(self): lock = self.Lock() self.assertEqual(lock.acquire(), True) @@ -1370,6 +1430,68 @@ def test_lock(self): self.assertEqual(lock.release(), None) self.assertRaises((ValueError, threading.ThreadError), lock.release) + @staticmethod + def _acquire_release(lock, timeout, l=None, n=1): + for _ in range(n): + lock.acquire() + if l is not None: + l.append(repr(lock)) + time.sleep(timeout) + for _ in range(n): + lock.release() + + def test_repr_rlock(self): + if self.TYPE != 'processes': + self.skipTest('test not appropriate for {}'.format(self.TYPE)) + + lock = self.RLock() + self.assertEqual('', repr(lock)) + + n = 3 + for _ in range(n): + lock.acquire() + self.assertEqual(f'', repr(lock)) + for _ in range(n): + lock.release() + + t, l = [], [] + for i in range(n): + t.append(threading.Thread(target=self._acquire_release, + args=(lock, 0.1, l, i+1), + name=f'T{i+1}')) + t[-1].start() + for t_ in t: + t_.join() + for i in range(n): + self.assertIn(f'', l) + + + t = threading.Thread(target=self._acquire_release, + args=(lock, 0.2), + name=f'T1') + t.start() + time.sleep(0.1) + self.assertEqual('', repr(lock)) + time.sleep(0.2) + + pname = 'P1' + l = multiprocessing.Manager().list() + p = self.Process(target=self._acquire_release, + args=(lock, 0.1, l), + name=pname) + p.start() + p.join() + self.assertEqual(f'', l[0]) + + event = self.Event() + lock = self.RLock() + p = self.Process(target=self._acquire_event, + args=(lock, event)) + p.start() + event.wait() + self.assertEqual('', repr(lock)) + p.join() + def test_rlock(self): lock = self.RLock() self.assertEqual(lock.acquire(), True) diff --git a/Misc/NEWS.d/next/Library/2024-11-06-23-40-28.gh-issue-125679.Qq9xF5.rst b/Misc/NEWS.d/next/Library/2024-11-06-23-40-28.gh-issue-125679.Qq9xF5.rst new file mode 100644 index 000000000000000..ac6851e26896920 --- /dev/null +++ b/Misc/NEWS.d/next/Library/2024-11-06-23-40-28.gh-issue-125679.Qq9xF5.rst @@ -0,0 +1,2 @@ +The :class:`multiprocessing.Lock` and :class:`multiprocessing.RLock` +``repr`` values no longer say "unknown" on macOS. From a5b94d066016be63d632cccee0ec2a2eb24536dc Mon Sep 17 00:00:00 2001 From: "Erlend E. Aasland" Date: Thu, 7 Nov 2024 10:49:58 +0100 Subject: [PATCH 44/54] gh-96398: Improve accuracy of compiler checks in configure.ac (#117815) The following variables are now used in compiler checks: - $ac_cv_gcc_compat is set to 'yes' for GCC compatible compilers (the C preprocessor defines the __GNUC__ macro) - for compiler basename checks, use $CC_BASENAME (may contain platform triplets) - for the rest, use $ac_cv_cc_name (does not contain platform triplets) --- configure | 167 +++++++++++++++++++++------------------------------ configure.ac | 163 ++++++++++++++++++------------------------------- 2 files changed, 127 insertions(+), 203 deletions(-) diff --git a/configure b/configure index e0ab304570dfd48..1d5c0941247c306 100755 --- a/configure +++ b/configure @@ -6193,6 +6193,8 @@ printf "%s\n" "$ac_cv_path_EGREP" >&6; } +CC_BASENAME=$(expr "//$CC" : '.*/\(.*\)') + { printf "%s\n" "$as_me:${as_lineno-$LINENO}: checking for CC compiler name" >&5 printf %s "checking for CC compiler name... " >&6; } if test ${ac_cv_cc_name+y} @@ -6220,9 +6222,10 @@ EOF if $CPP $CPPFLAGS conftest.c >conftest.out 2>/dev/null; then ac_cv_cc_name=`grep -v '^#' conftest.out | grep -v '^ *$' | tr -d ' '` - if test $(expr "//$CC" : '.*/\(.*\)') = "mpicc"; then - ac_cv_cc_name="mpicc" - fi + if test "x$CC_BASENAME" = xmpicc +then : + ac_cv_cc_name=mpicc +fi else ac_cv_cc_name="unknown" fi @@ -6440,7 +6443,7 @@ printf "%s\n" "$ac_cv_gcc_compat" >&6; } preset_cxx="$CXX" if test -z "$CXX" then - case "$CC" in + case "$ac_cv_cc_name" in gcc) if test -n "$ac_tool_prefix"; then # Extract the first word of "${ac_tool_prefix}g++", so it can be a program name with args. set dummy ${ac_tool_prefix}g++; ac_word=$2 @@ -6657,7 +6660,7 @@ else CXX="$ac_cv_path_CXX" fi ;; - clang|*/clang) if test -n "$ac_tool_prefix"; then + clang) if test -n "$ac_tool_prefix"; then # Extract the first word of "${ac_tool_prefix}clang++", so it can be a program name with args. set dummy ${ac_tool_prefix}clang++; ac_word=$2 { printf "%s\n" "$as_me:${as_lineno-$LINENO}: checking for $ac_word" >&5 @@ -6765,7 +6768,7 @@ else CXX="$ac_cv_path_CXX" fi ;; - icc|*/icc) if test -n "$ac_tool_prefix"; then + icc) if test -n "$ac_tool_prefix"; then # Extract the first word of "${ac_tool_prefix}icpc", so it can be a program name with args. set dummy ${ac_tool_prefix}icpc; ac_word=$2 { printf "%s\n" "$as_me:${as_lineno-$LINENO}: checking for $ac_word" >&5 @@ -7374,7 +7377,7 @@ rmdir CaseSensitiveTestDir case $ac_sys_system in hp*|HP*) - case $CC in + case $ac_cv_cc_name in cc|*/cc) CC="$CC -Ae";; esac;; esac @@ -7467,7 +7470,7 @@ printf "%s\n" "$EXPORTSYMS" >&6; } { printf "%s\n" "$as_me:${as_lineno-$LINENO}: checking for GNU ld" >&5 printf %s "checking for GNU ld... " >&6; } ac_prog=ld -if test "$GCC" = yes; then +if test "$ac_cv_cc_name" = "gcc"; then ac_prog=`$CC -print-prog-name=ld` fi case `"$ac_prog" -V 2>&1 < /dev/null` in @@ -8338,8 +8341,9 @@ if test "$Py_OPT" = 'true' ; then DEF_MAKE_ALL_RULE="profile-opt" REQUIRE_PGO="yes" DEF_MAKE_RULE="build_all" - case $CC in - *gcc*) + if test "x$ac_cv_gcc_compat" = xyes +then : + { printf "%s\n" "$as_me:${as_lineno-$LINENO}: checking whether C compiler accepts -fno-semantic-interposition" >&5 printf %s "checking whether C compiler accepts -fno-semantic-interposition... " >&6; } if test ${ax_cv_check_cflags__Werror__fno_semantic_interposition+y} @@ -8381,8 +8385,8 @@ else $as_nop : fi - ;; - esac + +fi elif test "$ac_sys_system" = "Emscripten" -o "$ac_sys_system" = "WASI"; then DEF_MAKE_ALL_RULE="build_wasm" REQUIRE_PGO="no" @@ -8409,7 +8413,7 @@ printf "%s\n" "$PROFILE_TASK" >&6; } llvm_bin_dir='' llvm_path="${PATH}" -if test "${CC}" = "clang" +if test "${ac_cv_cc_name}" = "clang" then clang_bin=`which clang` # Some systems install clang elsewhere as a symlink to the real path @@ -8467,8 +8471,8 @@ printf "%s\n" "no" >&6; } fi if test "$Py_LTO" = 'true' ; then - case $CC in - *clang*) + case $ac_cv_cc_name in + clang) LDFLAGS_NOLTO="-fno-lto" { printf "%s\n" "$as_me:${as_lineno-$LINENO}: checking whether C compiler accepts -flto=thin" >&5 printf %s "checking whether C compiler accepts -flto=thin... " >&6; } @@ -8748,14 +8752,14 @@ fi ;; esac ;; - *emcc*) + emcc) if test "$Py_LTO_POLICY" != "default"; then as_fn_error $? "emcc supports only default lto." "$LINENO" 5 fi LTOFLAGS="-flto" LTOCFLAGS="-flto" ;; - *gcc*) + gcc) if test $Py_LTO_POLICY = thin then as_fn_error $? "thin lto is not supported under gcc compiler." "$LINENO" 5 @@ -8921,10 +8925,8 @@ printf "%s\n" "$as_me: llvm-profdata found via xcrun: ${LLVM_PROFDATA}" >&6;} fi LLVM_PROF_ERR=no -# GNU Autoconf recommends the use of expr instead of basename. -CC_BASENAME=$(expr "//$CC" : '.*/\(.*\)') -case "$CC_BASENAME" in - *clang*) +case "$ac_cv_cc_name" in + clang) # Any changes made here should be reflected in the GCC+Darwin case below PGO_PROF_GEN_FLAG="-fprofile-instr-generate" PGO_PROF_USE_FLAG="-fprofile-instr-use=\"\$(shell pwd)/code.profclangd\"" @@ -8939,31 +8941,13 @@ case "$CC_BASENAME" in fi fi ;; - *gcc*) - case $ac_sys_system in - Darwin*) - PGO_PROF_GEN_FLAG="-fprofile-instr-generate" - PGO_PROF_USE_FLAG="-fprofile-instr-use=\"\$(shell pwd)/code.profclangd\"" - LLVM_PROF_MERGER=" ${LLVM_PROFDATA} merge -output=\"\$(shell pwd)/code.profclangd\" \"\$(shell pwd)\"/*.profclangr " - LLVM_PROF_FILE="LLVM_PROFILE_FILE=\"\$(shell pwd)/code-%p.profclangr\"" - if test "${LLVM_PROF_FOUND}" = "not-found" - then - LLVM_PROF_ERR=yes - if test "${REQUIRE_PGO}" = "yes" - then - as_fn_error $? "llvm-profdata is required for a --enable-optimizations build but could not be found." "$LINENO" 5 - fi - fi - ;; - *) - PGO_PROF_GEN_FLAG="-fprofile-generate" - PGO_PROF_USE_FLAG="-fprofile-use -fprofile-correction" - LLVM_PROF_MERGER="true" - LLVM_PROF_FILE="" - ;; - esac + gcc) + PGO_PROF_GEN_FLAG="-fprofile-generate" + PGO_PROF_USE_FLAG="-fprofile-use -fprofile-correction" + LLVM_PROF_MERGER="true" + LLVM_PROF_FILE="" ;; - *icc*) + icc) PGO_PROF_GEN_FLAG="-prof-gen" PGO_PROF_USE_FLAG="-prof-use" LLVM_PROF_MERGER="true" @@ -9329,19 +9313,6 @@ printf "%s\n" "$BOLT_APPLY_FLAGS" >&6; } # compiler and platform. BASECFLAGS tweaks need to be made even if the # user set OPT. -case $CC in - *clang*) - cc_is_clang=1 - ;; - *) - if $CC --version 2>&1 | grep -q clang - then - cc_is_clang=1 - else - cc_is_clang= - fi -esac - save_CFLAGS=$CFLAGS CFLAGS="-fstrict-overflow -fno-strict-overflow" { printf "%s\n" "$as_me:${as_lineno-$LINENO}: checking if $CC supports -fstrict-overflow and -fno-strict-overflow" >&5 @@ -9465,7 +9436,7 @@ if test "${OPT-unset}" = "unset" then case $GCC in yes) - if test -n "${cc_is_clang}" + if test "${ac_cv_cc_name}" != "clang" then # bpo-30104: disable strict aliasing to compile correctly dtoa.c, # see Makefile.pre.in for more information @@ -9964,8 +9935,9 @@ fi fi -case $GCC in -yes) +if test "x$ac_cv_gcc_compat" = xyes +then : + CFLAGS_NODIST="$CFLAGS_NODIST -std=c11" @@ -10083,8 +10055,8 @@ fi # ICC doesn't recognize the option, but only emits a warning ## XXX does it emit an unused result warning and can it be disabled? - case "$CC_BASENAME" in #( - *icc*) : + case "$ac_cv_cc_name" in #( + icc) : ac_cv_disable_unused_result_warning=no @@ -10489,22 +10461,19 @@ fi Darwin*) # -Wno-long-double, -no-cpp-precomp, and -mno-fused-madd # used to be here, but non-Apple gcc doesn't accept them. - if test "${CC}" = gcc - then - { printf "%s\n" "$as_me:${as_lineno-$LINENO}: checking which compiler should be used" >&5 + { printf "%s\n" "$as_me:${as_lineno-$LINENO}: checking which compiler should be used" >&5 printf %s "checking which compiler should be used... " >&6; } - case "${UNIVERSALSDK}" in - */MacOSX10.4u.sdk) - # Build using 10.4 SDK, force usage of gcc when the - # compiler is gcc, otherwise the user will get very - # confusing error messages when building on OSX 10.6 - CC=gcc-4.0 - CPP=cpp-4.0 - ;; - esac - { printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: $CC" >&5 + case "${UNIVERSALSDK}" in + */MacOSX10.4u.sdk) + # Build using 10.4 SDK, force usage of gcc when the + # compiler is gcc, otherwise the user will get very + # confusing error messages when building on OSX 10.6 + CC=gcc-4.0 + CPP=cpp-4.0 + ;; + esac + { printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: $CC" >&5 printf "%s\n" "$CC" >&6; } - fi LIPO_INTEL64_FLAGS="" if test "${enable_universalsdk}" @@ -10650,9 +10619,9 @@ rm -f core conftest.err conftest.$ac_objext conftest.beam \ # end of Darwin* tests ;; esac - ;; -*) +else $as_nop + case $ac_sys_system in OpenUNIX*|UnixWare*) BASECFLAGS="$BASECFLAGS -K pentium,host,inline,loop_unroll,alloca " @@ -10661,18 +10630,18 @@ rm -f core conftest.err conftest.$ac_objext conftest.beam \ BASECFLAGS="$BASECFLAGS -belf -Ki486 -DSCO5" ;; esac - ;; -esac -case "$CC_BASENAME" in -*mpicc*) +fi + +case "$ac_cv_cc_name" in +mpicc) CFLAGS_NODIST="$CFLAGS_NODIST" ;; -*icc*) +icc) # ICC needs -fp-model strict or floats behave badly CFLAGS_NODIST="$CFLAGS_NODIST -fp-model strict" ;; -*xlc*) +xlc) CFLAGS_NODIST="$CFLAGS_NODIST -qalias=noansi -qmaxmem=-1" ;; esac @@ -13195,7 +13164,7 @@ then LDSHARED="\$(LIBPL)/ld_so_aix \$(CC) -bI:\$(LIBPL)/python.exp" ;; SunOS/5*) - if test "$GCC" = "yes" ; then + if test "$ac_cv_gcc_compat" = "yes" ; then LDSHARED='$(CC) -shared' LDCXXSHARED='$(CXX) -shared' else @@ -13203,7 +13172,7 @@ then LDCXXSHARED='$(CXX) -G' fi ;; hp*|HP*) - if test "$GCC" = "yes" ; then + if test "$ac_cv_gcc_compat" = "yes" ; then LDSHARED='$(CC) -shared' LDCXXSHARED='$(CXX) -shared' else @@ -13296,7 +13265,7 @@ then LDSHARED='$(CC) -shared' LDCXXSHARED='$(CXX) -shared';; OpenUNIX*|UnixWare*) - if test "$GCC" = "yes" ; then + if test "$ac_cv_gcc_compat" = "yes" ; then LDSHARED='$(CC) -shared' LDCXXSHARED='$(CXX) -shared' else @@ -13340,13 +13309,13 @@ printf %s "checking CCSHARED... " >&6; } if test -z "$CCSHARED" then case $ac_sys_system/$ac_sys_release in - SunOS*) if test "$GCC" = yes; + SunOS*) if test "$ac_cv_gcc_compat" = "yes"; then CCSHARED="-fPIC"; elif test `uname -p` = sparc; then CCSHARED="-xcode=pic32"; else CCSHARED="-Kpic"; fi;; - hp*|HP*) if test "$GCC" = yes; + hp*|HP*) if test "$ac_cv_gcc_compat" = "yes"; then CCSHARED="-fPIC"; else CCSHARED="+z"; fi;; @@ -13361,12 +13330,12 @@ fi;; FreeBSD*|NetBSD*|OpenBSD*|DragonFly*) CCSHARED="-fPIC";; Haiku*) CCSHARED="-fPIC";; OpenUNIX*|UnixWare*) - if test "$GCC" = "yes" + if test "$ac_cv_gcc_compat" = "yes" then CCSHARED="-fPIC" else CCSHARED="-KPIC" fi;; SCO_SV*) - if test "$GCC" = "yes" + if test "$ac_cv_gcc_compat" = "yes" then CCSHARED="-fPIC" else CCSHARED="-Kpic -belf" fi;; @@ -13426,13 +13395,13 @@ printf "%s\n" "#define THREAD_STACK_SIZE 0x$stack_size" >>confdefs.h then LINKFORSHARED="-Wl,--export-dynamic" fi;; - SunOS/5*) case $CC in - *gcc*) + SunOS/5*) if test "$ac_cv_gcc_compat" = "yes"; then if $CC -Xlinker --help 2>&1 | grep export-dynamic >/dev/null then LINKFORSHARED="-Xlinker --export-dynamic" - fi;; - esac;; + fi + fi + ;; CYGWIN*) if test $enable_shared = "no" then @@ -15323,7 +15292,7 @@ esac fi elif test $ac_cv_sizeof_size_t -eq 4; then if test "$ac_cv_gcc_asm_for_x87" = yes -a "$libmpdec_system" != sunos; then - case $CC in #( + case $ac_cv_cc_name in #( *gcc*) : libmpdec_machine=ppro ;; #( *clang*) : @@ -28206,8 +28175,8 @@ if test "$ac_cv_gcc_asm_for_x87" = yes; then # Some versions of gcc miscompile inline asm: # http://gcc.gnu.org/bugzilla/show_bug.cgi?id=46491 # http://gcc.gnu.org/ml/gcc/2010-11/msg00366.html - case $CC in - *gcc*) + case $ac_cv_cc_name in + gcc) { printf "%s\n" "$as_me:${as_lineno-$LINENO}: checking for gcc ipa-pure-const bug" >&5 printf %s "checking for gcc ipa-pure-const bug... " >&6; } saved_cflags="$CFLAGS" diff --git a/configure.ac b/configure.ac index da7d1ef68eefa89..ce5a5eb9c2891fe 100644 --- a/configure.ac +++ b/configure.ac @@ -1048,6 +1048,9 @@ AC_PROG_GREP AC_PROG_SED AC_PROG_EGREP +dnl GNU Autoconf recommends the use of expr instead of basename. +AS_VAR_SET([CC_BASENAME], [$(expr "//$CC" : '.*/\(.*\)')]) + dnl detect compiler name dnl check for xlc before clang, newer xlc's can use clang as frontend. dnl check for GCC last, other compilers set __GNUC__, too. @@ -1073,9 +1076,7 @@ EOF if $CPP $CPPFLAGS conftest.c >conftest.out 2>/dev/null; then ac_cv_cc_name=`grep -v '^#' conftest.out | grep -v '^ *$' | tr -d ' '` - if test $(expr "//$CC" : '.*/\(.*\)') = "mpicc"; then - ac_cv_cc_name="mpicc" - fi + AS_VAR_IF([CC_BASENAME], [mpicc], [ac_cv_cc_name=mpicc]) else ac_cv_cc_name="unknown" fi @@ -1104,11 +1105,11 @@ AC_SUBST([CXX]) preset_cxx="$CXX" if test -z "$CXX" then - case "$CC" in + case "$ac_cv_cc_name" in gcc) AC_PATH_TOOL([CXX], [g++], [g++], [notfound]) ;; cc) AC_PATH_TOOL([CXX], [c++], [c++], [notfound]) ;; - clang|*/clang) AC_PATH_TOOL([CXX], [clang++], [clang++], [notfound]) ;; - icc|*/icc) AC_PATH_TOOL([CXX], [icpc], [icpc], [notfound]) ;; + clang) AC_PATH_TOOL([CXX], [clang++], [clang++], [notfound]) ;; + icc) AC_PATH_TOOL([CXX], [icpc], [icpc], [notfound]) ;; esac if test "$CXX" = "notfound" then @@ -1381,7 +1382,7 @@ rmdir CaseSensitiveTestDir case $ac_sys_system in hp*|HP*) - case $CC in + case $ac_cv_cc_name in cc|*/cc) CC="$CC -Ae";; esac;; esac @@ -1467,7 +1468,7 @@ AC_MSG_RESULT([$EXPORTSYMS]) AC_SUBST([GNULD]) AC_MSG_CHECKING([for GNU ld]) ac_prog=ld -if test "$GCC" = yes; then +if test "$ac_cv_cc_name" = "gcc"; then ac_prog=`$CC -print-prog-name=ld` fi case `"$ac_prog" -V 2>&1 < /dev/null` in @@ -1874,14 +1875,12 @@ if test "$Py_OPT" = 'true' ; then DEF_MAKE_ALL_RULE="profile-opt" REQUIRE_PGO="yes" DEF_MAKE_RULE="build_all" - case $CC in - *gcc*) + AS_VAR_IF([ac_cv_gcc_compat], [yes], [ AX_CHECK_COMPILE_FLAG([-fno-semantic-interposition],[ CFLAGS_NODIST="$CFLAGS_NODIST -fno-semantic-interposition" LDFLAGS_NODIST="$LDFLAGS_NODIST -fno-semantic-interposition" ], [], [-Werror]) - ;; - esac + ]) elif test "$ac_sys_system" = "Emscripten" -o "$ac_sys_system" = "WASI"; then dnl Emscripten does not support shared extensions yet. Build dnl "python.[js,wasm]", "pybuilddir.txt", and "platform" files. @@ -1908,7 +1907,7 @@ AC_MSG_RESULT([$PROFILE_TASK]) llvm_bin_dir='' llvm_path="${PATH}" -if test "${CC}" = "clang" +if test "${ac_cv_cc_name}" = "clang" then clang_bin=`which clang` # Some systems install clang elsewhere as a symlink to the real path @@ -1955,8 +1954,8 @@ esac ], [AC_MSG_RESULT([no])]) if test "$Py_LTO" = 'true' ; then - case $CC in - *clang*) + case $ac_cv_cc_name in + clang) LDFLAGS_NOLTO="-fno-lto" dnl Clang linker requires -flto in order to link objects with LTO information. dnl Thin LTO is faster and works for object files with full LTO information, too. @@ -2019,14 +2018,14 @@ if test "$Py_LTO" = 'true' ; then ;; esac ;; - *emcc*) + emcc) if test "$Py_LTO_POLICY" != "default"; then AC_MSG_ERROR([emcc supports only default lto.]) fi LTOFLAGS="-flto" LTOCFLAGS="-flto" ;; - *gcc*) + gcc) if test $Py_LTO_POLICY = thin then AC_MSG_ERROR([thin lto is not supported under gcc compiler.]) @@ -2085,10 +2084,8 @@ then fi LLVM_PROF_ERR=no -# GNU Autoconf recommends the use of expr instead of basename. -AS_VAR_SET([CC_BASENAME], [$(expr "//$CC" : '.*/\(.*\)')]) -case "$CC_BASENAME" in - *clang*) +case "$ac_cv_cc_name" in + clang) # Any changes made here should be reflected in the GCC+Darwin case below PGO_PROF_GEN_FLAG="-fprofile-instr-generate" PGO_PROF_USE_FLAG="-fprofile-instr-use=\"\$(shell pwd)/code.profclangd\"" @@ -2107,35 +2104,13 @@ case "$CC_BASENAME" in fi fi ;; - *gcc*) - case $ac_sys_system in - Darwin*) - PGO_PROF_GEN_FLAG="-fprofile-instr-generate" - PGO_PROF_USE_FLAG="-fprofile-instr-use=\"\$(shell pwd)/code.profclangd\"" - LLVM_PROF_MERGER=m4_normalize(" - ${LLVM_PROFDATA} merge - -output=\"\$(shell pwd)/code.profclangd\" - \"\$(shell pwd)\"/*.profclangr - ") - LLVM_PROF_FILE="LLVM_PROFILE_FILE=\"\$(shell pwd)/code-%p.profclangr\"" - if test "${LLVM_PROF_FOUND}" = "not-found" - then - LLVM_PROF_ERR=yes - if test "${REQUIRE_PGO}" = "yes" - then - AC_MSG_ERROR([llvm-profdata is required for a --enable-optimizations build but could not be found.]) - fi - fi - ;; - *) - PGO_PROF_GEN_FLAG="-fprofile-generate" - PGO_PROF_USE_FLAG="-fprofile-use -fprofile-correction" - LLVM_PROF_MERGER="true" - LLVM_PROF_FILE="" - ;; - esac + gcc) + PGO_PROF_GEN_FLAG="-fprofile-generate" + PGO_PROF_USE_FLAG="-fprofile-use -fprofile-correction" + LLVM_PROF_MERGER="true" + LLVM_PROF_FILE="" ;; - *icc*) + icc) PGO_PROF_GEN_FLAG="-prof-gen" PGO_PROF_USE_FLAG="-prof-use" LLVM_PROF_MERGER="true" @@ -2259,19 +2234,6 @@ AC_MSG_RESULT([$BOLT_APPLY_FLAGS]) # compiler and platform. BASECFLAGS tweaks need to be made even if the # user set OPT. -case $CC in - *clang*) - cc_is_clang=1 - ;; - *) - if $CC --version 2>&1 | grep -q clang - then - cc_is_clang=1 - else - cc_is_clang= - fi -esac - dnl Historically, some of our code assumed that signed integer overflow dnl is defined behaviour via twos-complement. dnl Set STRICT_OVERFLOW_CFLAGS and NO_STRICT_OVERFLOW_CFLAGS depending on compiler support. @@ -2346,7 +2308,7 @@ if test "${OPT-unset}" = "unset" then case $GCC in yes) - if test -n "${cc_is_clang}" + if test "${ac_cv_cc_name}" != "clang" then # bpo-30104: disable strict aliasing to compile correctly dtoa.c, # see Makefile.pre.in for more information @@ -2526,8 +2488,7 @@ then AX_CHECK_COMPILE_FLAG([-D_FORTIFY_SOURCE=3], [CFLAGS_NODIST="$CFLAGS_NODIST -U_FORTIFY_SOURCE -D_FORTIFY_SOURCE=3"], [AC_MSG_WARN([-D_FORTIFY_SOURCE=3 not supported])], [-Werror]) fi -case $GCC in -yes) +AS_VAR_IF([ac_cv_gcc_compat], [yes], [ CFLAGS_NODIST="$CFLAGS_NODIST -std=c11" PY_CHECK_CC_WARNING([enable], [extra], [if we can add -Wextra]) @@ -2568,8 +2529,8 @@ yes) # ICC doesn't recognize the option, but only emits a warning ## XXX does it emit an unused result warning and can it be disabled? - AS_CASE(["$CC_BASENAME"], - [*icc*], [ac_cv_disable_unused_result_warning=no] + AS_CASE(["$ac_cv_cc_name"], + [icc], [ac_cv_disable_unused_result_warning=no] [PY_CHECK_CC_WARNING([disable], [unused-result])]) AS_VAR_IF([ac_cv_disable_unused_result_warning], [yes], [BASECFLAGS="$BASECFLAGS -Wno-unused-result" @@ -2662,20 +2623,17 @@ yes) Darwin*) # -Wno-long-double, -no-cpp-precomp, and -mno-fused-madd # used to be here, but non-Apple gcc doesn't accept them. - if test "${CC}" = gcc - then - AC_MSG_CHECKING([which compiler should be used]) - case "${UNIVERSALSDK}" in - */MacOSX10.4u.sdk) - # Build using 10.4 SDK, force usage of gcc when the - # compiler is gcc, otherwise the user will get very - # confusing error messages when building on OSX 10.6 - CC=gcc-4.0 - CPP=cpp-4.0 - ;; - esac - AC_MSG_RESULT([$CC]) - fi + AC_MSG_CHECKING([which compiler should be used]) + case "${UNIVERSALSDK}" in + */MacOSX10.4u.sdk) + # Build using 10.4 SDK, force usage of gcc when the + # compiler is gcc, otherwise the user will get very + # confusing error messages when building on OSX 10.6 + CC=gcc-4.0 + CPP=cpp-4.0 + ;; + esac + AC_MSG_RESULT([$CC]) LIPO_INTEL64_FLAGS="" if test "${enable_universalsdk}" @@ -2800,9 +2758,7 @@ yes) # end of Darwin* tests ;; esac - ;; - -*) +], [ case $ac_sys_system in OpenUNIX*|UnixWare*) BASECFLAGS="$BASECFLAGS -K pentium,host,inline,loop_unroll,alloca " @@ -2811,18 +2767,17 @@ yes) BASECFLAGS="$BASECFLAGS -belf -Ki486 -DSCO5" ;; esac - ;; -esac +]) -case "$CC_BASENAME" in -*mpicc*) +case "$ac_cv_cc_name" in +mpicc) CFLAGS_NODIST="$CFLAGS_NODIST" ;; -*icc*) +icc) # ICC needs -fp-model strict or floats behave badly CFLAGS_NODIST="$CFLAGS_NODIST -fp-model strict" ;; -*xlc*) +xlc) CFLAGS_NODIST="$CFLAGS_NODIST -qalias=noansi -qmaxmem=-1" ;; esac @@ -3430,7 +3385,7 @@ then LDSHARED="\$(LIBPL)/ld_so_aix \$(CC) -bI:\$(LIBPL)/python.exp" ;; SunOS/5*) - if test "$GCC" = "yes" ; then + if test "$ac_cv_gcc_compat" = "yes" ; then LDSHARED='$(CC) -shared' LDCXXSHARED='$(CXX) -shared' else @@ -3438,7 +3393,7 @@ then LDCXXSHARED='$(CXX) -G' fi ;; hp*|HP*) - if test "$GCC" = "yes" ; then + if test "$ac_cv_gcc_compat" = "yes" ; then LDSHARED='$(CC) -shared' LDCXXSHARED='$(CXX) -shared' else @@ -3531,7 +3486,7 @@ then LDSHARED='$(CC) -shared' LDCXXSHARED='$(CXX) -shared';; OpenUNIX*|UnixWare*) - if test "$GCC" = "yes" ; then + if test "$ac_cv_gcc_compat" = "yes" ; then LDSHARED='$(CC) -shared' LDCXXSHARED='$(CXX) -shared' else @@ -3571,13 +3526,13 @@ AC_MSG_CHECKING([CCSHARED]) if test -z "$CCSHARED" then case $ac_sys_system/$ac_sys_release in - SunOS*) if test "$GCC" = yes; + SunOS*) if test "$ac_cv_gcc_compat" = "yes"; then CCSHARED="-fPIC"; elif test `uname -p` = sparc; then CCSHARED="-xcode=pic32"; else CCSHARED="-Kpic"; fi;; - hp*|HP*) if test "$GCC" = yes; + hp*|HP*) if test "$ac_cv_gcc_compat" = "yes"; then CCSHARED="-fPIC"; else CCSHARED="+z"; fi;; @@ -3589,12 +3544,12 @@ then FreeBSD*|NetBSD*|OpenBSD*|DragonFly*) CCSHARED="-fPIC";; Haiku*) CCSHARED="-fPIC";; OpenUNIX*|UnixWare*) - if test "$GCC" = "yes" + if test "$ac_cv_gcc_compat" = "yes" then CCSHARED="-fPIC" else CCSHARED="-KPIC" fi;; SCO_SV*) - if test "$GCC" = "yes" + if test "$ac_cv_gcc_compat" = "yes" then CCSHARED="-fPIC" else CCSHARED="-Kpic -belf" fi;; @@ -3652,13 +3607,13 @@ then then LINKFORSHARED="-Wl,--export-dynamic" fi;; - SunOS/5*) case $CC in - *gcc*) + SunOS/5*) if test "$ac_cv_gcc_compat" = "yes"; then if $CC -Xlinker --help 2>&1 | grep export-dynamic >/dev/null then LINKFORSHARED="-Xlinker --export-dynamic" - fi;; - esac;; + fi + fi + ;; CYGWIN*) if test $enable_shared = "no" then @@ -4228,7 +4183,7 @@ AS_VAR_IF( fi elif test $ac_cv_sizeof_size_t -eq 4; then if test "$ac_cv_gcc_asm_for_x87" = yes -a "$libmpdec_system" != sunos; then - AS_CASE([$CC], + AS_CASE([$ac_cv_cc_name], [*gcc*], [libmpdec_machine=ppro], [*clang*], [libmpdec_machine=ppro], [libmpdec_machine=ansi32] @@ -7150,8 +7105,8 @@ if test "$ac_cv_gcc_asm_for_x87" = yes; then # Some versions of gcc miscompile inline asm: # http://gcc.gnu.org/bugzilla/show_bug.cgi?id=46491 # http://gcc.gnu.org/ml/gcc/2010-11/msg00366.html - case $CC in - *gcc*) + case $ac_cv_cc_name in + gcc) AC_MSG_CHECKING([for gcc ipa-pure-const bug]) saved_cflags="$CFLAGS" CFLAGS="-O2" From 1fe67df8e373a5177143e4a310c83438e79f9b77 Mon Sep 17 00:00:00 2001 From: "Gregory P. Smith" Date: Thu, 7 Nov 2024 01:57:01 -0800 Subject: [PATCH 45/54] gh-48020: [docs] Remove the logging howto suggested future FileHandler multiprocessing support (GH-126531) Docs: Remove the logging howto potential promise of multiprocessing support in the future. Stick to the facts and suggestions, don't provide hope where we're not going to implement complexity that we'd rather the user implement themselves when needed. --- Doc/howto/logging-cookbook.rst | 7 ++----- 1 file changed, 2 insertions(+), 5 deletions(-) diff --git a/Doc/howto/logging-cookbook.rst b/Doc/howto/logging-cookbook.rst index 321ec0c0f738710..3cd2f1d96a7b34c 100644 --- a/Doc/howto/logging-cookbook.rst +++ b/Doc/howto/logging-cookbook.rst @@ -1267,11 +1267,8 @@ to adapt in your own applications. You could also write your own handler which uses the :class:`~multiprocessing.Lock` class from the :mod:`multiprocessing` module to serialize access to the -file from your processes. The existing :class:`FileHandler` and subclasses do -not make use of :mod:`multiprocessing` at present, though they may do so in the -future. Note that at present, the :mod:`multiprocessing` module does not provide -working lock functionality on all platforms (see -https://bugs.python.org/issue3770). +file from your processes. The stdlib :class:`FileHandler` and subclasses do +not make use of :mod:`multiprocessing`. .. currentmodule:: logging.handlers From 78ad7e632248dc989378cabeb797b9f3d940d9f2 Mon Sep 17 00:00:00 2001 From: Petr Viktorin Date: Thu, 7 Nov 2024 11:06:27 +0100 Subject: [PATCH 46/54] gh-126499: test_ssl: Don't assume err.reason is a string (GH-126501) The skipping machinery called `getattr(err, "reason", "")` on an arbitrary exception. As intermittent Buildbot failures show, sometimes it's set to None. Convert it to string for this specific check. --- Lib/test/test_ssl.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Lib/test/test_ssl.py b/Lib/test/test_ssl.py index de5110a1cc4b6d8..ca9dac97c8e2131 100644 --- a/Lib/test/test_ssl.py +++ b/Lib/test/test_ssl.py @@ -5029,7 +5029,7 @@ def non_linux_skip_if_other_okay_error(self, err): return # Expect the full test setup to always work on Linux. if (isinstance(err, ConnectionResetError) or (isinstance(err, OSError) and err.errno == errno.EINVAL) or - re.search('wrong.version.number', getattr(err, "reason", ""), re.I)): + re.search('wrong.version.number', str(getattr(err, "reason", "")), re.I)): # On Windows the TCP RST leads to a ConnectionResetError # (ECONNRESET) which Linux doesn't appear to surface to userspace. # If wrap_socket() winds up on the "if connected:" path and doing From c9cda1608edf7664c10f4f467e24591062c2fe62 Mon Sep 17 00:00:00 2001 From: Petr Viktorin Date: Thu, 7 Nov 2024 11:07:02 +0100 Subject: [PATCH 47/54] gh-126500: test_ssl: Don't stop ThreadedEchoServer on OSError in ConnectionHandler; rely on __exit__ (GH-126503) If `read()` in the ConnectionHandler thread raises `OSError` (except `ConnectionError`), the ConnectionHandler shuts down the entire ThreadedEchoServer, preventing further connections. It also does that for `EPROTOTYPE` in `wrap_conn`. As far as I can see, this is done to avoid the server thread getting stuck, forgotten, in its accept loop. However, since 2011 (5b95eb90a7167285b6544b50865227c584943c9a) the server is used as a context manager, and its `__exit__` does `stop()` and `join()`. (I'm not sure if we *always* used `with` since that commit, but currently we do.) Make sure that the context manager *is* used, and remove the `server.stop()` calls from ConnectionHandler. --- Lib/test/test_ssl.py | 17 ++++++++++++----- 1 file changed, 12 insertions(+), 5 deletions(-) diff --git a/Lib/test/test_ssl.py b/Lib/test/test_ssl.py index ca9dac97c8e2131..59f37b3f9a75752 100644 --- a/Lib/test/test_ssl.py +++ b/Lib/test/test_ssl.py @@ -2299,7 +2299,6 @@ def wrap_conn(self): # See also http://erickt.github.io/blog/2014/11/19/adventures-in-debugging-a-potential-osx-kernel-bug/ if e.errno != errno.EPROTOTYPE and sys.platform != "darwin": self.running = False - self.server.stop() self.close() return False else: @@ -2436,10 +2435,6 @@ def run(self): self.close() self.running = False - # normally, we'd just stop here, but for the test - # harness, we want to stop the server - self.server.stop() - def __init__(self, certificate=None, ssl_version=None, certreqs=None, cacerts=None, chatty=True, connectionchatty=False, starttls_server=False, @@ -2473,21 +2468,33 @@ def __init__(self, certificate=None, ssl_version=None, self.conn_errors = [] threading.Thread.__init__(self) self.daemon = True + self._in_context = False def __enter__(self): + if self._in_context: + raise ValueError('Re-entering ThreadedEchoServer context') + self._in_context = True self.start(threading.Event()) self.flag.wait() return self def __exit__(self, *args): + assert self._in_context + self._in_context = False self.stop() self.join() def start(self, flag=None): + if not self._in_context: + raise ValueError( + 'ThreadedEchoServer must be used as a context manager') self.flag = flag threading.Thread.start(self) def run(self): + if not self._in_context: + raise ValueError( + 'ThreadedEchoServer must be used as a context manager') self.sock.settimeout(1.0) self.sock.listen(5) self.active = True From 85036c8d612007356d2118eb25b460505078b023 Mon Sep 17 00:00:00 2001 From: Mark Shannon Date: Thu, 7 Nov 2024 10:48:27 +0000 Subject: [PATCH 48/54] GH-126222: Fix `_PyUop_num_popped` (GH-126507) --- Include/internal/pycore_uop_metadata.h | 112 +++++++++--------- Lib/test/test_capi/test_opt.py | 8 ++ ...-11-06-16-34-11.gh-issue-126222.9NBfTn.rst | 3 + .../cases_generator/uop_metadata_generator.py | 2 + 4 files changed, 69 insertions(+), 56 deletions(-) create mode 100644 Misc/NEWS.d/next/Core_and_Builtins/2024-11-06-16-34-11.gh-issue-126222.9NBfTn.rst diff --git a/Include/internal/pycore_uop_metadata.h b/Include/internal/pycore_uop_metadata.h index ade297201f0ac29..98a41d1f23f569b 100644 --- a/Include/internal/pycore_uop_metadata.h +++ b/Include/internal/pycore_uop_metadata.h @@ -658,7 +658,7 @@ int _PyUop_num_popped(int opcode, int oparg) case _TO_BOOL: return 1; case _TO_BOOL_BOOL: - return 1; + return 0; case _TO_BOOL_INT: return 1; case _TO_BOOL_LIST: @@ -672,11 +672,11 @@ int _PyUop_num_popped(int opcode, int oparg) case _UNARY_INVERT: return 1; case _GUARD_BOTH_INT: - return 2; + return 0; case _GUARD_NOS_INT: - return 2; + return 0; case _GUARD_TOS_INT: - return 1; + return 0; case _BINARY_OP_MULTIPLY_INT: return 2; case _BINARY_OP_ADD_INT: @@ -684,11 +684,11 @@ int _PyUop_num_popped(int opcode, int oparg) case _BINARY_OP_SUBTRACT_INT: return 2; case _GUARD_BOTH_FLOAT: - return 2; + return 0; case _GUARD_NOS_FLOAT: - return 2; + return 0; case _GUARD_TOS_FLOAT: - return 1; + return 0; case _BINARY_OP_MULTIPLY_FLOAT: return 2; case _BINARY_OP_ADD_FLOAT: @@ -696,7 +696,7 @@ int _PyUop_num_popped(int opcode, int oparg) case _BINARY_OP_SUBTRACT_FLOAT: return 2; case _GUARD_BOTH_UNICODE: - return 2; + return 0; case _BINARY_OP_ADD_UNICODE: return 2; case _BINARY_OP_INPLACE_ADD_UNICODE: @@ -716,13 +716,13 @@ int _PyUop_num_popped(int opcode, int oparg) case _BINARY_SUBSCR_DICT: return 2; case _BINARY_SUBSCR_CHECK_FUNC: - return 2; + return 0; case _BINARY_SUBSCR_INIT_CALL: return 2; case _LIST_APPEND: - return 2 + (oparg-1); + return 1; case _SET_ADD: - return 2 + (oparg-1); + return 1; case _STORE_SUBSCR: return 3; case _STORE_SUBSCR_LIST_INT: @@ -740,11 +740,11 @@ int _PyUop_num_popped(int opcode, int oparg) case _GET_AITER: return 1; case _GET_ANEXT: - return 1; + return 0; case _GET_AWAITABLE: return 1; case _SEND_GEN_FRAME: - return 2; + return 1; case _YIELD_VALUE: return 1; case _POP_EXCEPT: @@ -812,9 +812,9 @@ int _PyUop_num_popped(int opcode, int oparg) case _BUILD_LIST: return oparg; case _LIST_EXTEND: - return 2 + (oparg-1); + return 1; case _SET_UPDATE: - return 2 + (oparg-1); + return 1; case _BUILD_SET: return oparg; case _BUILD_MAP: @@ -822,11 +822,11 @@ int _PyUop_num_popped(int opcode, int oparg) case _SETUP_ANNOTATIONS: return 0; case _DICT_UPDATE: - return 2 + (oparg - 1); + return 1; case _DICT_MERGE: - return 5 + (oparg - 1); + return 1; case _MAP_ADD: - return 3 + (oparg - 1); + return 2; case _LOAD_SUPER_ATTR_ATTR: return 3; case _LOAD_SUPER_ATTR_METHOD: @@ -834,9 +834,9 @@ int _PyUop_num_popped(int opcode, int oparg) case _LOAD_ATTR: return 1; case _GUARD_TYPE_VERSION: - return 1; + return 0; case _CHECK_MANAGED_OBJECT_HAS_VALUES: - return 1; + return 0; case _LOAD_ATTR_INSTANCE_VALUE_0: return 1; case _LOAD_ATTR_INSTANCE_VALUE_1: @@ -844,11 +844,11 @@ int _PyUop_num_popped(int opcode, int oparg) case _LOAD_ATTR_INSTANCE_VALUE: return 1; case _CHECK_ATTR_MODULE: - return 1; + return 0; case _LOAD_ATTR_MODULE: return 1; case _CHECK_ATTR_WITH_HINT: - return 1; + return 0; case _LOAD_ATTR_WITH_HINT: return 1; case _LOAD_ATTR_SLOT_0: @@ -858,7 +858,7 @@ int _PyUop_num_popped(int opcode, int oparg) case _LOAD_ATTR_SLOT: return 1; case _CHECK_ATTR_CLASS: - return 1; + return 0; case _LOAD_ATTR_CLASS_0: return 1; case _LOAD_ATTR_CLASS_1: @@ -868,7 +868,7 @@ int _PyUop_num_popped(int opcode, int oparg) case _LOAD_ATTR_PROPERTY_FRAME: return 1; case _GUARD_DORV_NO_DICT: - return 1; + return 0; case _STORE_ATTR_INSTANCE_VALUE: return 2; case _STORE_ATTR_WITH_HINT: @@ -894,59 +894,59 @@ int _PyUop_num_popped(int opcode, int oparg) case _CHECK_EG_MATCH: return 2; case _CHECK_EXC_MATCH: - return 2; + return 1; case _IMPORT_NAME: return 2; case _IMPORT_FROM: - return 1; + return 0; case _IS_NONE: return 1; case _GET_LEN: - return 1; + return 0; case _MATCH_CLASS: return 3; case _MATCH_MAPPING: - return 1; + return 0; case _MATCH_SEQUENCE: - return 1; + return 0; case _MATCH_KEYS: - return 2; + return 0; case _GET_ITER: return 1; case _GET_YIELD_FROM_ITER: return 1; case _FOR_ITER_TIER_TWO: - return 1; + return 0; case _ITER_CHECK_LIST: - return 1; + return 0; case _GUARD_NOT_EXHAUSTED_LIST: - return 1; + return 0; case _ITER_NEXT_LIST: - return 1; + return 0; case _ITER_CHECK_TUPLE: - return 1; + return 0; case _GUARD_NOT_EXHAUSTED_TUPLE: - return 1; + return 0; case _ITER_NEXT_TUPLE: - return 1; + return 0; case _ITER_CHECK_RANGE: - return 1; + return 0; case _GUARD_NOT_EXHAUSTED_RANGE: - return 1; + return 0; case _ITER_NEXT_RANGE: - return 1; + return 0; case _FOR_ITER_GEN_FRAME: - return 1; + return 0; case _LOAD_SPECIAL: return 1; case _WITH_EXCEPT_START: - return 5; + return 0; case _PUSH_EXC_INFO: return 1; case _GUARD_DORV_VALUES_INST_ATTR_FROM_DICT: - return 1; + return 0; case _GUARD_KEYS_VERSION: - return 1; + return 0; case _LOAD_ATTR_METHOD_WITH_VALUES: return 1; case _LOAD_ATTR_METHOD_NO_DICT: @@ -956,7 +956,7 @@ int _PyUop_num_popped(int opcode, int oparg) case _LOAD_ATTR_NONDESCRIPTOR_NO_DICT: return 1; case _CHECK_ATTR_METHOD_LAZY_DICT: - return 1; + return 0; case _LOAD_ATTR_METHOD_LAZY_DICT: return 1; case _MAYBE_EXPAND_METHOD: @@ -964,25 +964,25 @@ int _PyUop_num_popped(int opcode, int oparg) case _PY_FRAME_GENERAL: return 2 + oparg; case _CHECK_FUNCTION_VERSION: - return 2 + oparg; + return 0; case _CHECK_METHOD_VERSION: - return 2 + oparg; + return 0; case _EXPAND_METHOD: return 2 + oparg; case _CHECK_IS_NOT_PY_CALLABLE: - return 2 + oparg; + return 0; case _CALL_NON_PY_GENERAL: return 2 + oparg; case _CHECK_CALL_BOUND_METHOD_EXACT_ARGS: - return 2 + oparg; + return 0; case _INIT_CALL_BOUND_METHOD_EXACT_ARGS: return 2 + oparg; case _CHECK_PEP_523: return 0; case _CHECK_FUNCTION_EXACT_ARGS: - return 2 + oparg; + return 0; case _CHECK_STACK_SPACE: - return 2 + oparg; + return 0; case _INIT_CALL_PY_EXACT_ARGS_0: return 2 + oparg; case _INIT_CALL_PY_EXACT_ARGS_1: @@ -1036,17 +1036,17 @@ int _PyUop_num_popped(int opcode, int oparg) case _PY_FRAME_KW: return 3 + oparg; case _CHECK_FUNCTION_VERSION_KW: - return 3 + oparg; + return 0; case _CHECK_METHOD_VERSION_KW: - return 3 + oparg; + return 0; case _EXPAND_METHOD_KW: return 3 + oparg; case _CHECK_IS_NOT_PY_CALLABLE_KW: - return 3 + oparg; + return 0; case _CALL_KW_NON_PY: return 3 + oparg; case _MAKE_CALLARGS_A_TUPLE: - return 3 + (oparg & 1); + return 1 + (oparg & 1); case _MAKE_FUNCTION: return 1; case _SET_FUNCTION_ATTRIBUTE: @@ -1062,7 +1062,7 @@ int _PyUop_num_popped(int opcode, int oparg) case _FORMAT_WITH_SPEC: return 2; case _COPY: - return 1 + (oparg-1); + return 0; case _BINARY_OP: return 2; case _SWAP: diff --git a/Lib/test/test_capi/test_opt.py b/Lib/test/test_capi/test_opt.py index c352325ff3d08af..7b3d9e4fd1126fc 100644 --- a/Lib/test/test_capi/test_opt.py +++ b/Lib/test/test_capi/test_opt.py @@ -1486,6 +1486,14 @@ def fn(a): fn(A()) + def test_jit_error_pops(self): + """ + Tests that the correct number of pops are inserted into the + exit stub + """ + items = 17 * [None] + [[]] + with self.assertRaises(TypeError): + {item for item in items} if __name__ == "__main__": unittest.main() diff --git a/Misc/NEWS.d/next/Core_and_Builtins/2024-11-06-16-34-11.gh-issue-126222.9NBfTn.rst b/Misc/NEWS.d/next/Core_and_Builtins/2024-11-06-16-34-11.gh-issue-126222.9NBfTn.rst new file mode 100644 index 000000000000000..ebf6673782f02c4 --- /dev/null +++ b/Misc/NEWS.d/next/Core_and_Builtins/2024-11-06-16-34-11.gh-issue-126222.9NBfTn.rst @@ -0,0 +1,3 @@ +Do not include count of "peek" items in ``_PyUop_num_popped``. This ensures +that the correct number of items are popped from the stack when a micro-op +exits with an error. diff --git a/Tools/cases_generator/uop_metadata_generator.py b/Tools/cases_generator/uop_metadata_generator.py index 7b3325ada4a49fe..6eb022899d6cae7 100644 --- a/Tools/cases_generator/uop_metadata_generator.py +++ b/Tools/cases_generator/uop_metadata_generator.py @@ -51,6 +51,8 @@ def generate_names_and_flags(analysis: Analysis, out: CWriter) -> None: if uop.is_viable() and uop.properties.tier != 1: stack = Stack() for var in reversed(uop.stack.inputs): + if var.peek: + break stack.pop(var) popped = (-stack.base_offset).to_c() out.emit(f"case {uop.name}:\n") From e3510bd3dd9ea8f2a30cb1128470aee3a48d8880 Mon Sep 17 00:00:00 2001 From: Richard Hansen Date: Thu, 7 Nov 2024 10:29:31 -0500 Subject: [PATCH 49/54] Doc: C API: Demote sections to subsections for consistency (#126535) The entire file should be a single section; the headings below the first heading should be subsections. --- Doc/c-api/typeobj.rst | 14 +++++++------- 1 file changed, 7 insertions(+), 7 deletions(-) diff --git a/Doc/c-api/typeobj.rst b/Doc/c-api/typeobj.rst index 8a185486fe44f1d..ba58cc1c26c70b4 100644 --- a/Doc/c-api/typeobj.rst +++ b/Doc/c-api/typeobj.rst @@ -2230,7 +2230,7 @@ This is done by filling a :c:type:`PyType_Spec` structure and calling .. _number-structs: Number Object Structures -======================== +------------------------ .. sectionauthor:: Amaury Forgeot d'Arc @@ -2344,7 +2344,7 @@ Number Object Structures .. _mapping-structs: Mapping Object Structures -========================= +------------------------- .. sectionauthor:: Amaury Forgeot d'Arc @@ -2381,7 +2381,7 @@ Mapping Object Structures .. _sequence-structs: Sequence Object Structures -========================== +-------------------------- .. sectionauthor:: Amaury Forgeot d'Arc @@ -2461,7 +2461,7 @@ Sequence Object Structures .. _buffer-structs: Buffer Object Structures -======================== +------------------------ .. sectionauthor:: Greg J. Stein .. sectionauthor:: Benjamin Peterson @@ -2556,7 +2556,7 @@ Buffer Object Structures Async Object Structures -======================= +----------------------- .. sectionauthor:: Yury Selivanov @@ -2624,7 +2624,7 @@ Async Object Structures .. _slot-typedefs: Slot Type typedefs -================== +------------------ .. c:type:: PyObject *(*allocfunc)(PyTypeObject *cls, Py_ssize_t nitems) @@ -2733,7 +2733,7 @@ Slot Type typedefs .. _typedef-examples: Examples -======== +-------- The following are simple examples of Python type definitions. They include common usage you may encounter. Some demonstrate tricky corner From 19c248185343dfad046bbe4046b2b900e7405666 Mon Sep 17 00:00:00 2001 From: Valerii <81074936+valerii-chirkov@users.noreply.github.com> Date: Thu, 7 Nov 2024 20:35:29 +0500 Subject: [PATCH 50/54] gh-126529: Update devguide links to relative filenames in InternalDocs (#126530) Update devguide links to relative filenames in InternalDocs/parser.md and InternalDocs/compiler.md. --- InternalDocs/compiler.md | 4 ++-- InternalDocs/parser.md | 2 +- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/InternalDocs/compiler.md b/InternalDocs/compiler.md index 0da4670c792cb5f..37964bd99428df7 100644 --- a/InternalDocs/compiler.md +++ b/InternalDocs/compiler.md @@ -42,10 +42,10 @@ The definitions for literal tokens (such as `:`, numbers, etc.) can be found in See Also: -* [Guide to the parser](https://devguide.python.org/internals/parser/index.html) +* [Guide to the parser](parser.md) for a detailed description of the parser. -* [Changing CPython’s grammar](https://devguide.python.org/developer-workflow/grammar/#grammar) +* [Changing CPython’s grammar](changing_grammar.md) for a detailed description of the grammar. diff --git a/InternalDocs/parser.md b/InternalDocs/parser.md index a0c70b46087d1ad..348988b7c2f0039 100644 --- a/InternalDocs/parser.md +++ b/InternalDocs/parser.md @@ -17,7 +17,7 @@ Therefore, changes to the Python language are made by modifying the [grammar file](../Grammar/python.gram). Developers rarely need to modify the generator itself. -See [Changing CPython's grammar](./changing_grammar.md) +See [Changing CPython's grammar](changing_grammar.md) for a detailed description of the grammar and the process for changing it. How PEG parsers work From 3d9f9ae5a7c4739fe319aa436ab1834d6765b0ac Mon Sep 17 00:00:00 2001 From: Aditya Borikar Date: Thu, 7 Nov 2024 07:37:41 -0800 Subject: [PATCH 51/54] Chore: Fix typo in `pyarena.c` (#126527) --- Python/pyarena.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Python/pyarena.c b/Python/pyarena.c index 7ab370163b2b93f..28970f9d0670f43 100644 --- a/Python/pyarena.c +++ b/Python/pyarena.c @@ -4,7 +4,7 @@ /* A simple arena block structure. Measurements with standard library modules suggest the average - allocation is about 20 bytes and that most compiles use a single + allocation is about 20 bytes and that most compilers use a single block. */ From 9357fdcaf0b08dac9396c17e8695b420fad887f8 Mon Sep 17 00:00:00 2001 From: Eric Snow Date: Thu, 7 Nov 2024 09:32:42 -0700 Subject: [PATCH 52/54] gh-76785: Minor Cleanup of "Cross-interpreter" Code (gh-126457) The primary objective here is to allow some later changes to be cleaner. Mostly this involves renaming things and moving a few things around. * CrossInterpreterData -> XIData * crossinterpdatafunc -> xidatafunc * split out pycore_crossinterp_data_registry.h * add _PyXIData_lookup_t --- Include/internal/pycore_crossinterp.h | 114 ++++------ .../pycore_crossinterp_data_registry.h | 36 +++ Include/internal/pycore_runtime_init.h | 6 +- Makefile.pre.in | 1 + Modules/_interpchannelsmodule.c | 70 +++--- Modules/_interpqueuesmodule.c | 53 ++--- Modules/_interpreters_common.h | 10 +- Modules/_interpretersmodule.c | 24 +- Modules/_testinternalcapi.c | 22 +- PCbuild/pythoncore.vcxproj | 1 + PCbuild/pythoncore.vcxproj.filters | 3 + Python/crossinterp.c | 131 +++++------ Python/crossinterp_data_lookup.h | 206 +++++++++--------- Python/pystate.c | 2 +- Tools/c-analyzer/cpython/_parser.py | 1 + 15 files changed, 343 insertions(+), 337 deletions(-) create mode 100644 Include/internal/pycore_crossinterp_data_registry.h diff --git a/Include/internal/pycore_crossinterp.h b/Include/internal/pycore_crossinterp.h index 2dd165eae74850f..e91e911feb38cca 100644 --- a/Include/internal/pycore_crossinterp.h +++ b/Include/internal/pycore_crossinterp.h @@ -38,28 +38,28 @@ extern int _Py_CallInInterpreterAndRawFree( /* cross-interpreter data */ /**************************/ -typedef struct _xid _PyCrossInterpreterData; -typedef PyObject *(*xid_newobjectfunc)(_PyCrossInterpreterData *); +typedef struct _xid _PyXIData_t; +typedef PyObject *(*xid_newobjectfunc)(_PyXIData_t *); typedef void (*xid_freefunc)(void *); -// _PyCrossInterpreterData is similar to Py_buffer as an effectively +// _PyXIData_t is similar to Py_buffer as an effectively // opaque struct that holds data outside the object machinery. This // is necessary to pass safely between interpreters in the same process. struct _xid { // data is the cross-interpreter-safe derivation of a Python object - // (see _PyObject_GetCrossInterpreterData). It will be NULL if the + // (see _PyObject_GetXIData). It will be NULL if the // new_object func (below) encodes the data. void *data; // obj is the Python object from which the data was derived. This // is non-NULL only if the data remains bound to the object in some // way, such that the object must be "released" (via a decref) when // the data is released. In that case the code that sets the field, - // likely a registered "crossinterpdatafunc", is responsible for + // likely a registered "xidatafunc", is responsible for // ensuring it owns the reference (i.e. incref). PyObject *obj; // interp is the ID of the owning interpreter of the original // object. It corresponds to the active interpreter when - // _PyObject_GetCrossInterpreterData() was called. This should only + // _PyObject_GetXIData() was called. This should only // be set by the cross-interpreter machinery. // // We use the ID rather than the PyInterpreterState to avoid issues @@ -77,96 +77,77 @@ struct _xid { // okay (e.g. bytes) and for those types this field should be set // to NULL. However, for most the data was allocated just for // cross-interpreter use, so it must be freed when - // _PyCrossInterpreterData_Release is called or the memory will + // _PyXIData_Release is called or the memory will // leak. In that case, at the very least this field should be set // to PyMem_RawFree (the default if not explicitly set to NULL). // The call will happen with the original interpreter activated. xid_freefunc free; }; -PyAPI_FUNC(_PyCrossInterpreterData *) _PyCrossInterpreterData_New(void); -PyAPI_FUNC(void) _PyCrossInterpreterData_Free(_PyCrossInterpreterData *data); +PyAPI_FUNC(_PyXIData_t *) _PyXIData_New(void); +PyAPI_FUNC(void) _PyXIData_Free(_PyXIData_t *data); -#define _PyCrossInterpreterData_DATA(DATA) ((DATA)->data) -#define _PyCrossInterpreterData_OBJ(DATA) ((DATA)->obj) -#define _PyCrossInterpreterData_INTERPID(DATA) ((DATA)->interpid) +#define _PyXIData_DATA(DATA) ((DATA)->data) +#define _PyXIData_OBJ(DATA) ((DATA)->obj) +#define _PyXIData_INTERPID(DATA) ((DATA)->interpid) // Users should not need getters for "new_object" or "free". +/* getting cross-interpreter data */ + +typedef int (*xidatafunc)(PyThreadState *tstate, PyObject *, _PyXIData_t *); + +typedef struct _xid_lookup_state _PyXIData_lookup_t; + +PyAPI_FUNC(xidatafunc) _PyXIData_Lookup(PyObject *); +PyAPI_FUNC(int) _PyObject_CheckXIData(PyObject *); +PyAPI_FUNC(int) _PyObject_GetXIData(PyObject *, _PyXIData_t *); + + +/* using cross-interpreter data */ + +PyAPI_FUNC(PyObject *) _PyXIData_NewObject(_PyXIData_t *); +PyAPI_FUNC(int) _PyXIData_Release(_PyXIData_t *); +PyAPI_FUNC(int) _PyXIData_ReleaseAndRawFree(_PyXIData_t *); + + /* defining cross-interpreter data */ -PyAPI_FUNC(void) _PyCrossInterpreterData_Init( - _PyCrossInterpreterData *data, +PyAPI_FUNC(void) _PyXIData_Init( + _PyXIData_t *data, PyInterpreterState *interp, void *shared, PyObject *obj, xid_newobjectfunc new_object); -PyAPI_FUNC(int) _PyCrossInterpreterData_InitWithSize( - _PyCrossInterpreterData *, +PyAPI_FUNC(int) _PyXIData_InitWithSize( + _PyXIData_t *, PyInterpreterState *interp, const size_t, PyObject *, xid_newobjectfunc); -PyAPI_FUNC(void) _PyCrossInterpreterData_Clear( - PyInterpreterState *, _PyCrossInterpreterData *); +PyAPI_FUNC(void) _PyXIData_Clear( PyInterpreterState *, _PyXIData_t *); // Normally the Init* functions are sufficient. The only time // additional initialization might be needed is to set the "free" func, // though that should be infrequent. -#define _PyCrossInterpreterData_SET_FREE(DATA, FUNC) \ +#define _PyXIData_SET_FREE(DATA, FUNC) \ do { \ (DATA)->free = (FUNC); \ } while (0) // Additionally, some shareable types are essentially light wrappers -// around other shareable types. The crossinterpdatafunc of the wrapper +// around other shareable types. The xidatafunc of the wrapper // can often be implemented by calling the wrapped object's -// crossinterpdatafunc and then changing the "new_object" function. -// We have _PyCrossInterpreterData_SET_NEW_OBJECT() here for that, +// xidatafunc and then changing the "new_object" function. +// We have _PyXIData_SET_NEW_OBJECT() here for that, // but might be better to have a function like -// _PyCrossInterpreterData_AdaptToWrapper() instead. -#define _PyCrossInterpreterData_SET_NEW_OBJECT(DATA, FUNC) \ +// _PyXIData_AdaptToWrapper() instead. +#define _PyXIData_SET_NEW_OBJECT(DATA, FUNC) \ do { \ (DATA)->new_object = (FUNC); \ } while (0) -/* using cross-interpreter data */ - -PyAPI_FUNC(int) _PyObject_CheckCrossInterpreterData(PyObject *); -PyAPI_FUNC(int) _PyObject_GetCrossInterpreterData(PyObject *, _PyCrossInterpreterData *); -PyAPI_FUNC(PyObject *) _PyCrossInterpreterData_NewObject(_PyCrossInterpreterData *); -PyAPI_FUNC(int) _PyCrossInterpreterData_Release(_PyCrossInterpreterData *); -PyAPI_FUNC(int) _PyCrossInterpreterData_ReleaseAndRawFree(_PyCrossInterpreterData *); - - /* cross-interpreter data registry */ -// For now we use a global registry of shareable classes. An -// alternative would be to add a tp_* slot for a class's -// crossinterpdatafunc. It would be simpler and more efficient. - -typedef int (*crossinterpdatafunc)(PyThreadState *tstate, PyObject *, - _PyCrossInterpreterData *); - -struct _xidregitem; - -struct _xidregitem { - struct _xidregitem *prev; - struct _xidregitem *next; - /* This can be a dangling pointer, but only if weakref is set. */ - PyTypeObject *cls; - /* This is NULL for builtin types. */ - PyObject *weakref; - size_t refcount; - crossinterpdatafunc getdata; -}; - -struct _xidregistry { - int global; /* builtin types or heap types */ - int initialized; - PyMutex mutex; - struct _xidregitem *head; -}; - -PyAPI_FUNC(int) _PyCrossInterpreterData_RegisterClass(PyTypeObject *, crossinterpdatafunc); -PyAPI_FUNC(int) _PyCrossInterpreterData_UnregisterClass(PyTypeObject *); -PyAPI_FUNC(crossinterpdatafunc) _PyCrossInterpreterData_Lookup(PyObject *); +#define Py_CORE_CROSSINTERP_DATA_REGISTRY_H +#include "pycore_crossinterp_data_registry.h" +#undef Py_CORE_CROSSINTERP_DATA_REGISTRY_H /*****************************/ @@ -175,14 +156,12 @@ PyAPI_FUNC(crossinterpdatafunc) _PyCrossInterpreterData_Lookup(PyObject *); struct _xi_runtime_state { // builtin types - // XXX Remove this field once we have a tp_* slot. - struct _xidregistry registry; + _PyXIData_lookup_t data_lookup; }; struct _xi_state { // heap types - // XXX Remove this field once we have a tp_* slot. - struct _xidregistry registry; + _PyXIData_lookup_t data_lookup; // heap types PyObject *PyExc_NotShareableError; @@ -190,7 +169,6 @@ struct _xi_state { extern PyStatus _PyXI_Init(PyInterpreterState *interp); extern void _PyXI_Fini(PyInterpreterState *interp); - extern PyStatus _PyXI_InitTypes(PyInterpreterState *interp); extern void _PyXI_FiniTypes(PyInterpreterState *interp); diff --git a/Include/internal/pycore_crossinterp_data_registry.h b/Include/internal/pycore_crossinterp_data_registry.h new file mode 100644 index 000000000000000..2990c6af62e952a --- /dev/null +++ b/Include/internal/pycore_crossinterp_data_registry.h @@ -0,0 +1,36 @@ +#ifndef Py_CORE_CROSSINTERP_DATA_REGISTRY_H +# error "this header must not be included directly" +#endif + + +// For now we use a global registry of shareable classes. An +// alternative would be to add a tp_* slot for a class's +// xidatafunc. It would be simpler and more efficient. + +struct _xidregitem; + +struct _xidregitem { + struct _xidregitem *prev; + struct _xidregitem *next; + /* This can be a dangling pointer, but only if weakref is set. */ + PyTypeObject *cls; + /* This is NULL for builtin types. */ + PyObject *weakref; + size_t refcount; + xidatafunc getdata; +}; + +struct _xidregistry { + int global; /* builtin types or heap types */ + int initialized; + PyMutex mutex; + struct _xidregitem *head; +}; + +PyAPI_FUNC(int) _PyXIData_RegisterClass(PyTypeObject *, xidatafunc); +PyAPI_FUNC(int) _PyXIData_UnregisterClass(PyTypeObject *); + +struct _xid_lookup_state { + // XXX Remove this field once we have a tp_* slot. + struct _xidregistry registry; +}; diff --git a/Include/internal/pycore_runtime_init.h b/Include/internal/pycore_runtime_init.h index e99febab2f3d57a..bd3d704cb77730d 100644 --- a/Include/internal/pycore_runtime_init.h +++ b/Include/internal/pycore_runtime_init.h @@ -50,8 +50,10 @@ extern PyTypeObject _PyExc_MemoryError; .next_id = -1, \ }, \ .xi = { \ - .registry = { \ - .global = 1, \ + .data_lookup = { \ + .registry = { \ + .global = 1, \ + }, \ }, \ }, \ /* A TSS key must be initialized with Py_tss_NEEDS_INIT \ diff --git a/Makefile.pre.in b/Makefile.pre.in index c650ecaf7be1373..a337223d4d8608c 100644 --- a/Makefile.pre.in +++ b/Makefile.pre.in @@ -1203,6 +1203,7 @@ PYTHON_HEADERS= \ $(srcdir)/Include/internal/pycore_context.h \ $(srcdir)/Include/internal/pycore_critical_section.h \ $(srcdir)/Include/internal/pycore_crossinterp.h \ + $(srcdir)/Include/internal/pycore_crossinterp_data_registry.h \ $(srcdir)/Include/internal/pycore_debug_offsets.h \ $(srcdir)/Include/internal/pycore_descrobject.h \ $(srcdir)/Include/internal/pycore_dict.h \ diff --git a/Modules/_interpchannelsmodule.c b/Modules/_interpchannelsmodule.c index 5dc032b46cac9af..b8d7dfb87cce0e1 100644 --- a/Modules/_interpchannelsmodule.c +++ b/Modules/_interpchannelsmodule.c @@ -6,7 +6,7 @@ #endif #include "Python.h" -#include "pycore_crossinterp.h" // struct _xid +#include "pycore_crossinterp.h" // _PyXIData_t #include "pycore_interp.h" // _PyInterpreterState_LookUpID() #include "pycore_pystate.h" // _PyInterpreterState_GetIDObject() @@ -59,7 +59,7 @@ _globals (static struct globals): first (struct _channelitem *): next (struct _channelitem *): ... - data (_PyCrossInterpreterData *): + data (_PyXIData_t *): data (void *) obj (PyObject *) interpid (int64_t) @@ -80,10 +80,10 @@ The above state includes the following allocations by the module: * 1 struct _channelqueue * for each item in each channel: * 1 struct _channelitem - * 1 _PyCrossInterpreterData + * 1 _PyXIData_t The only objects in that global state are the references held by each -channel's queue, which are safely managed via the _PyCrossInterpreterData_*() +channel's queue, which are safely managed via the _PyXIData_*() API.. The module does not create any objects that are shared globally. */ @@ -102,7 +102,7 @@ API.. The module does not create any objects that are shared globally. #define XID_FREE 2 static int -_release_xid_data(_PyCrossInterpreterData *data, int flags) +_release_xid_data(_PyXIData_t *data, int flags) { int ignoreexc = flags & XID_IGNORE_EXC; PyObject *exc; @@ -111,10 +111,10 @@ _release_xid_data(_PyCrossInterpreterData *data, int flags) } int res; if (flags & XID_FREE) { - res = _PyCrossInterpreterData_ReleaseAndRawFree(data); + res = _PyXIData_ReleaseAndRawFree(data); } else { - res = _PyCrossInterpreterData_Release(data); + res = _PyXIData_Release(data); } if (res < 0) { /* The owning interpreter is already destroyed. */ @@ -519,7 +519,7 @@ typedef struct _channelitem { This is necessary because item->data might be NULL, meaning the interpreter has been destroyed. */ int64_t interpid; - _PyCrossInterpreterData *data; + _PyXIData_t *data; _waiting_t *waiting; int unboundop; struct _channelitem *next; @@ -533,7 +533,7 @@ _channelitem_ID(_channelitem *item) static void _channelitem_init(_channelitem *item, - int64_t interpid, _PyCrossInterpreterData *data, + int64_t interpid, _PyXIData_t *data, _waiting_t *waiting, int unboundop) { if (interpid < 0) { @@ -541,8 +541,8 @@ _channelitem_init(_channelitem *item, } else { assert(data == NULL - || _PyCrossInterpreterData_INTERPID(data) < 0 - || interpid == _PyCrossInterpreterData_INTERPID(data)); + || _PyXIData_INTERPID(data) < 0 + || interpid == _PyXIData_INTERPID(data)); } *item = (_channelitem){ .interpid = interpid, @@ -580,7 +580,7 @@ _channelitem_clear(_channelitem *item) } static _channelitem * -_channelitem_new(int64_t interpid, _PyCrossInterpreterData *data, +_channelitem_new(int64_t interpid, _PyXIData_t *data, _waiting_t *waiting, int unboundop) { _channelitem *item = GLOBAL_MALLOC(_channelitem); @@ -611,7 +611,7 @@ _channelitem_free_all(_channelitem *item) static void _channelitem_popped(_channelitem *item, - _PyCrossInterpreterData **p_data, _waiting_t **p_waiting, + _PyXIData_t **p_data, _waiting_t **p_waiting, int *p_unboundop) { assert(item->waiting == NULL || item->waiting->status == WAITING_ACQUIRED); @@ -634,7 +634,7 @@ _channelitem_clear_interpreter(_channelitem *item) assert(item->unboundop != UNBOUND_REMOVE); return 0; } - assert(_PyCrossInterpreterData_INTERPID(item->data) == item->interpid); + assert(_PyXIData_INTERPID(item->data) == item->interpid); switch (item->unboundop) { case UNBOUND_REMOVE: @@ -691,7 +691,7 @@ _channelqueue_free(_channelqueue *queue) static int _channelqueue_put(_channelqueue *queue, - int64_t interpid, _PyCrossInterpreterData *data, + int64_t interpid, _PyXIData_t *data, _waiting_t *waiting, int unboundop) { _channelitem *item = _channelitem_new(interpid, data, waiting, unboundop); @@ -717,7 +717,7 @@ _channelqueue_put(_channelqueue *queue, static int _channelqueue_get(_channelqueue *queue, - _PyCrossInterpreterData **p_data, _waiting_t **p_waiting, + _PyXIData_t **p_data, _waiting_t **p_waiting, int *p_unboundop) { _channelitem *item = queue->first; @@ -769,7 +769,7 @@ _channelqueue_find(_channelqueue *queue, _channelitem_id_t itemid, static void _channelqueue_remove(_channelqueue *queue, _channelitem_id_t itemid, - _PyCrossInterpreterData **p_data, _waiting_t **p_waiting) + _PyXIData_t **p_data, _waiting_t **p_waiting) { _channelitem *prev = NULL; _channelitem *item = NULL; @@ -1128,8 +1128,7 @@ _channel_free(_channel_state *chan) static int _channel_add(_channel_state *chan, int64_t interpid, - _PyCrossInterpreterData *data, _waiting_t *waiting, - int unboundop) + _PyXIData_t *data, _waiting_t *waiting, int unboundop) { int res = -1; PyThread_acquire_lock(chan->mutex, WAIT_LOCK); @@ -1156,8 +1155,7 @@ _channel_add(_channel_state *chan, int64_t interpid, static int _channel_next(_channel_state *chan, int64_t interpid, - _PyCrossInterpreterData **p_data, _waiting_t **p_waiting, - int *p_unboundop) + _PyXIData_t **p_data, _waiting_t **p_waiting, int *p_unboundop) { int err = 0; PyThread_acquire_lock(chan->mutex, WAIT_LOCK); @@ -1193,7 +1191,7 @@ _channel_next(_channel_state *chan, int64_t interpid, static void _channel_remove(_channel_state *chan, _channelitem_id_t itemid) { - _PyCrossInterpreterData *data = NULL; + _PyXIData_t *data = NULL; _waiting_t *waiting = NULL; PyThread_acquire_lock(chan->mutex, WAIT_LOCK); @@ -1776,12 +1774,12 @@ channel_send(_channels *channels, int64_t cid, PyObject *obj, } // Convert the object to cross-interpreter data. - _PyCrossInterpreterData *data = GLOBAL_MALLOC(_PyCrossInterpreterData); + _PyXIData_t *data = GLOBAL_MALLOC(_PyXIData_t); if (data == NULL) { PyThread_release_lock(mutex); return -1; } - if (_PyObject_GetCrossInterpreterData(obj, data) != 0) { + if (_PyObject_GetXIData(obj, data) != 0) { PyThread_release_lock(mutex); GLOBAL_FREE(data); return -1; @@ -1904,7 +1902,7 @@ channel_recv(_channels *channels, int64_t cid, PyObject **res, int *p_unboundop) // Past this point we are responsible for releasing the mutex. // Pop off the next item from the channel. - _PyCrossInterpreterData *data = NULL; + _PyXIData_t *data = NULL; _waiting_t *waiting = NULL; err = _channel_next(chan, interpid, &data, &waiting, p_unboundop); PyThread_release_lock(mutex); @@ -1919,7 +1917,7 @@ channel_recv(_channels *channels, int64_t cid, PyObject **res, int *p_unboundop) } // Convert the data back to an object. - PyObject *obj = _PyCrossInterpreterData_NewObject(data); + PyObject *obj = _PyXIData_NewObject(data); if (obj == NULL) { assert(PyErr_Occurred()); // It was allocated in channel_send(), so we free it. @@ -2545,10 +2543,9 @@ struct _channelid_xid { }; static PyObject * -_channelid_from_xid(_PyCrossInterpreterData *data) +_channelid_from_xid(_PyXIData_t *data) { - struct _channelid_xid *xid = \ - (struct _channelid_xid *)_PyCrossInterpreterData_DATA(data); + struct _channelid_xid *xid = (struct _channelid_xid *)_PyXIData_DATA(data); // It might not be imported yet, so we can't use _get_current_module(). PyObject *mod = PyImport_ImportModule(MODULE_NAME_STR); @@ -2594,18 +2591,16 @@ _channelid_from_xid(_PyCrossInterpreterData *data) } static int -_channelid_shared(PyThreadState *tstate, PyObject *obj, - _PyCrossInterpreterData *data) +_channelid_shared(PyThreadState *tstate, PyObject *obj, _PyXIData_t *data) { - if (_PyCrossInterpreterData_InitWithSize( + if (_PyXIData_InitWithSize( data, tstate->interp, sizeof(struct _channelid_xid), obj, _channelid_from_xid ) < 0) { return -1; } - struct _channelid_xid *xid = \ - (struct _channelid_xid *)_PyCrossInterpreterData_DATA(data); + struct _channelid_xid *xid = (struct _channelid_xid *)_PyXIData_DATA(data); xid->cid = ((channelid *)obj)->cid; xid->end = ((channelid *)obj)->end; xid->resolve = ((channelid *)obj)->resolve; @@ -2745,7 +2740,7 @@ _get_current_channelend_type(int end) } static PyObject * -_channelend_from_xid(_PyCrossInterpreterData *data) +_channelend_from_xid(_PyXIData_t *data) { channelid *cidobj = (channelid *)_channelid_from_xid(data); if (cidobj == NULL) { @@ -2762,8 +2757,7 @@ _channelend_from_xid(_PyCrossInterpreterData *data) } static int -_channelend_shared(PyThreadState *tstate, PyObject *obj, - _PyCrossInterpreterData *data) +_channelend_shared(PyThreadState *tstate, PyObject *obj, _PyXIData_t *data) { PyObject *cidobj = PyObject_GetAttrString(obj, "_id"); if (cidobj == NULL) { @@ -2774,7 +2768,7 @@ _channelend_shared(PyThreadState *tstate, PyObject *obj, if (res < 0) { return -1; } - _PyCrossInterpreterData_SET_NEW_OBJECT(data, _channelend_from_xid); + _PyXIData_SET_NEW_OBJECT(data, _channelend_from_xid); return 0; } diff --git a/Modules/_interpqueuesmodule.c b/Modules/_interpqueuesmodule.c index 297a1763a98ce6e..8d0e223db7ff194 100644 --- a/Modules/_interpqueuesmodule.c +++ b/Modules/_interpqueuesmodule.c @@ -6,7 +6,7 @@ #endif #include "Python.h" -#include "pycore_crossinterp.h" // struct _xid +#include "pycore_crossinterp.h" // _PyXIData_t #define REGISTERS_HEAP_TYPES #define HAS_UNBOUND_ITEMS @@ -30,7 +30,7 @@ #define XID_FREE 2 static int -_release_xid_data(_PyCrossInterpreterData *data, int flags) +_release_xid_data(_PyXIData_t *data, int flags) { int ignoreexc = flags & XID_IGNORE_EXC; PyObject *exc; @@ -39,10 +39,10 @@ _release_xid_data(_PyCrossInterpreterData *data, int flags) } int res; if (flags & XID_FREE) { - res = _PyCrossInterpreterData_ReleaseAndRawFree(data); + res = _PyXIData_ReleaseAndRawFree(data); } else { - res = _PyCrossInterpreterData_Release(data); + res = _PyXIData_Release(data); } if (res < 0) { /* The owning interpreter is already destroyed. */ @@ -400,7 +400,7 @@ typedef struct _queueitem { This is necessary because item->data might be NULL, meaning the interpreter has been destroyed. */ int64_t interpid; - _PyCrossInterpreterData *data; + _PyXIData_t *data; int fmt; int unboundop; struct _queueitem *next; @@ -408,16 +408,15 @@ typedef struct _queueitem { static void _queueitem_init(_queueitem *item, - int64_t interpid, _PyCrossInterpreterData *data, - int fmt, int unboundop) + int64_t interpid, _PyXIData_t *data, int fmt, int unboundop) { if (interpid < 0) { interpid = _get_interpid(data); } else { assert(data == NULL - || _PyCrossInterpreterData_INTERPID(data) < 0 - || interpid == _PyCrossInterpreterData_INTERPID(data)); + || _PyXIData_INTERPID(data) < 0 + || interpid == _PyXIData_INTERPID(data)); } assert(check_unbound(unboundop)); *item = (_queueitem){ @@ -447,8 +446,7 @@ _queueitem_clear(_queueitem *item) } static _queueitem * -_queueitem_new(int64_t interpid, _PyCrossInterpreterData *data, - int fmt, int unboundop) +_queueitem_new(int64_t interpid, _PyXIData_t *data, int fmt, int unboundop) { _queueitem *item = GLOBAL_MALLOC(_queueitem); if (item == NULL) { @@ -478,7 +476,7 @@ _queueitem_free_all(_queueitem *item) static void _queueitem_popped(_queueitem *item, - _PyCrossInterpreterData **p_data, int *p_fmt, int *p_unboundop) + _PyXIData_t **p_data, int *p_fmt, int *p_unboundop) { *p_data = item->data; *p_fmt = item->fmt; @@ -498,7 +496,7 @@ _queueitem_clear_interpreter(_queueitem *item) assert(item->unboundop != UNBOUND_REMOVE); return 0; } - assert(_PyCrossInterpreterData_INTERPID(item->data) == item->interpid); + assert(_PyXIData_INTERPID(item->data) == item->interpid); switch (item->unboundop) { case UNBOUND_REMOVE: @@ -633,7 +631,7 @@ _queue_unlock(_queue *queue) } static int -_queue_add(_queue *queue, int64_t interpid, _PyCrossInterpreterData *data, +_queue_add(_queue *queue, int64_t interpid, _PyXIData_t *data, int fmt, int unboundop) { int err = _queue_lock(queue); @@ -671,7 +669,7 @@ _queue_add(_queue *queue, int64_t interpid, _PyCrossInterpreterData *data, static int _queue_next(_queue *queue, - _PyCrossInterpreterData **p_data, int *p_fmt, int *p_unboundop) + _PyXIData_t **p_data, int *p_fmt, int *p_unboundop) { int err = _queue_lock(queue); if (err < 0) { @@ -1138,17 +1136,17 @@ queue_put(_queues *queues, int64_t qid, PyObject *obj, int fmt, int unboundop) assert(queue != NULL); // Convert the object to cross-interpreter data. - _PyCrossInterpreterData *data = GLOBAL_MALLOC(_PyCrossInterpreterData); + _PyXIData_t *data = GLOBAL_MALLOC(_PyXIData_t); if (data == NULL) { _queue_unmark_waiter(queue, queues->mutex); return -1; } - if (_PyObject_GetCrossInterpreterData(obj, data) != 0) { + if (_PyObject_GetXIData(obj, data) != 0) { _queue_unmark_waiter(queue, queues->mutex); GLOBAL_FREE(data); return -1; } - assert(_PyCrossInterpreterData_INTERPID(data) == \ + assert(_PyXIData_INTERPID(data) == \ PyInterpreterState_GetID(PyInterpreterState_Get())); // Add the data to the queue. @@ -1184,7 +1182,7 @@ queue_get(_queues *queues, int64_t qid, assert(queue != NULL); // Pop off the next item from the queue. - _PyCrossInterpreterData *data = NULL; + _PyXIData_t *data = NULL; err = _queue_next(queue, &data, p_fmt, p_unboundop); _queue_unmark_waiter(queue, queues->mutex); if (err != 0) { @@ -1196,7 +1194,7 @@ queue_get(_queues *queues, int64_t qid, } // Convert the data back to an object. - PyObject *obj = _PyCrossInterpreterData_NewObject(data); + PyObject *obj = _PyXIData_NewObject(data); if (obj == NULL) { assert(PyErr_Occurred()); // It was allocated in queue_put(), so we free it. @@ -1258,8 +1256,7 @@ queue_get_count(_queues *queues, int64_t qid, Py_ssize_t *p_count) /* external Queue objects ***************************************************/ -static int _queueobj_shared(PyThreadState *, - PyObject *, _PyCrossInterpreterData *); +static int _queueobj_shared(PyThreadState *, PyObject *, _PyXIData_t *); static int set_external_queue_type(module_state *state, PyTypeObject *queue_type) @@ -1339,9 +1336,9 @@ _queueid_xid_free(void *data) } static PyObject * -_queueobj_from_xid(_PyCrossInterpreterData *data) +_queueobj_from_xid(_PyXIData_t *data) { - int64_t qid = *(int64_t *)_PyCrossInterpreterData_DATA(data); + int64_t qid = *(int64_t *)_PyXIData_DATA(data); PyObject *qidobj = PyLong_FromLongLong(qid); if (qidobj == NULL) { return NULL; @@ -1367,8 +1364,7 @@ _queueobj_from_xid(_PyCrossInterpreterData *data) } static int -_queueobj_shared(PyThreadState *tstate, PyObject *queueobj, - _PyCrossInterpreterData *data) +_queueobj_shared(PyThreadState *tstate, PyObject *queueobj, _PyXIData_t *data) { PyObject *qidobj = PyObject_GetAttrString(queueobj, "_id"); if (qidobj == NULL) { @@ -1388,9 +1384,8 @@ _queueobj_shared(PyThreadState *tstate, PyObject *queueobj, if (raw == NULL) { return -1; } - _PyCrossInterpreterData_Init(data, tstate->interp, raw, NULL, - _queueobj_from_xid); - _PyCrossInterpreterData_SET_FREE(data, _queueid_xid_free); + _PyXIData_Init(data, tstate->interp, raw, NULL, _queueobj_from_xid); + _PyXIData_SET_FREE(data, _queueid_xid_free); return 0; } diff --git a/Modules/_interpreters_common.h b/Modules/_interpreters_common.h index 0d2e0c9efd3837f..b0e31a33734dabc 100644 --- a/Modules/_interpreters_common.h +++ b/Modules/_interpreters_common.h @@ -6,27 +6,27 @@ static int -ensure_xid_class(PyTypeObject *cls, crossinterpdatafunc getdata) +ensure_xid_class(PyTypeObject *cls, xidatafunc getdata) { //assert(cls->tp_flags & Py_TPFLAGS_HEAPTYPE); - return _PyCrossInterpreterData_RegisterClass(cls, getdata); + return _PyXIData_RegisterClass(cls, getdata); } #ifdef REGISTERS_HEAP_TYPES static int clear_xid_class(PyTypeObject *cls) { - return _PyCrossInterpreterData_UnregisterClass(cls); + return _PyXIData_UnregisterClass(cls); } #endif static inline int64_t -_get_interpid(_PyCrossInterpreterData *data) +_get_interpid(_PyXIData_t *data) { int64_t interpid; if (data != NULL) { - interpid = _PyCrossInterpreterData_INTERPID(data); + interpid = _PyXIData_INTERPID(data); assert(!PyErr_Occurred()); } else { diff --git a/Modules/_interpretersmodule.c b/Modules/_interpretersmodule.c index 63f2bb387685111..95acdd69e53260f 100644 --- a/Modules/_interpretersmodule.c +++ b/Modules/_interpretersmodule.c @@ -7,7 +7,7 @@ #include "Python.h" #include "pycore_abstract.h" // _PyIndex_Check() -#include "pycore_crossinterp.h" // struct _xid +#include "pycore_crossinterp.h" // _PyXIData_t #include "pycore_interp.h" // _PyInterpreterState_IDIncref() #include "pycore_initconfig.h" // _PyErr_SetFromPyStatus() #include "pycore_modsupport.h" // _PyArg_BadArgument() @@ -84,18 +84,18 @@ typedef struct { } XIBufferViewObject; static PyObject * -xibufferview_from_xid(PyTypeObject *cls, _PyCrossInterpreterData *data) +xibufferview_from_xid(PyTypeObject *cls, _PyXIData_t *data) { - assert(_PyCrossInterpreterData_DATA(data) != NULL); - assert(_PyCrossInterpreterData_OBJ(data) == NULL); - assert(_PyCrossInterpreterData_INTERPID(data) >= 0); + assert(_PyXIData_DATA(data) != NULL); + assert(_PyXIData_OBJ(data) == NULL); + assert(_PyXIData_INTERPID(data) >= 0); XIBufferViewObject *self = PyObject_Malloc(sizeof(XIBufferViewObject)); if (self == NULL) { return NULL; } PyObject_Init((PyObject *)self, cls); - self->view = (Py_buffer *)_PyCrossInterpreterData_DATA(data); - self->interpid = _PyCrossInterpreterData_INTERPID(data); + self->view = (Py_buffer *)_PyXIData_DATA(data); + self->interpid = _PyXIData_INTERPID(data); return (PyObject *)self; } @@ -154,7 +154,7 @@ static PyType_Spec XIBufferViewType_spec = { static PyTypeObject * _get_current_xibufferview_type(void); static PyObject * -_memoryview_from_xid(_PyCrossInterpreterData *data) +_memoryview_from_xid(_PyXIData_t *data) { PyTypeObject *cls = _get_current_xibufferview_type(); if (cls == NULL) { @@ -168,8 +168,7 @@ _memoryview_from_xid(_PyCrossInterpreterData *data) } static int -_memoryview_shared(PyThreadState *tstate, PyObject *obj, - _PyCrossInterpreterData *data) +_memoryview_shared(PyThreadState *tstate, PyObject *obj, _PyXIData_t *data) { Py_buffer *view = PyMem_RawMalloc(sizeof(Py_buffer)); if (view == NULL) { @@ -179,8 +178,7 @@ _memoryview_shared(PyThreadState *tstate, PyObject *obj, PyMem_RawFree(view); return -1; } - _PyCrossInterpreterData_Init(data, tstate->interp, view, NULL, - _memoryview_from_xid); + _PyXIData_Init(data, tstate->interp, view, NULL, _memoryview_from_xid); return 0; } @@ -1183,7 +1181,7 @@ object_is_shareable(PyObject *self, PyObject *args, PyObject *kwds) return NULL; } - if (_PyObject_CheckCrossInterpreterData(obj) == 0) { + if (_PyObject_CheckXIData(obj) == 0) { Py_RETURN_TRUE; } PyErr_Clear(); diff --git a/Modules/_testinternalcapi.c b/Modules/_testinternalcapi.c index 883f32599fbc99e..327a077671047c6 100644 --- a/Modules/_testinternalcapi.c +++ b/Modules/_testinternalcapi.c @@ -1787,11 +1787,10 @@ interpreter_refcount_linked(PyObject *self, PyObject *idobj) static void _xid_capsule_destructor(PyObject *capsule) { - _PyCrossInterpreterData *data = \ - (_PyCrossInterpreterData *)PyCapsule_GetPointer(capsule, NULL); + _PyXIData_t *data = (_PyXIData_t *)PyCapsule_GetPointer(capsule, NULL); if (data != NULL) { - assert(_PyCrossInterpreterData_Release(data) == 0); - _PyCrossInterpreterData_Free(data); + assert(_PyXIData_Release(data) == 0); + _PyXIData_Free(data); } } @@ -1803,18 +1802,18 @@ get_crossinterp_data(PyObject *self, PyObject *args) return NULL; } - _PyCrossInterpreterData *data = _PyCrossInterpreterData_New(); + _PyXIData_t *data = _PyXIData_New(); if (data == NULL) { return NULL; } - if (_PyObject_GetCrossInterpreterData(obj, data) != 0) { - _PyCrossInterpreterData_Free(data); + if (_PyObject_GetXIData(obj, data) != 0) { + _PyXIData_Free(data); return NULL; } PyObject *capsule = PyCapsule_New(data, NULL, _xid_capsule_destructor); if (capsule == NULL) { - assert(_PyCrossInterpreterData_Release(data) == 0); - _PyCrossInterpreterData_Free(data); + assert(_PyXIData_Release(data) == 0); + _PyXIData_Free(data); } return capsule; } @@ -1827,12 +1826,11 @@ restore_crossinterp_data(PyObject *self, PyObject *args) return NULL; } - _PyCrossInterpreterData *data = \ - (_PyCrossInterpreterData *)PyCapsule_GetPointer(capsule, NULL); + _PyXIData_t *data = (_PyXIData_t *)PyCapsule_GetPointer(capsule, NULL); if (data == NULL) { return NULL; } - return _PyCrossInterpreterData_NewObject(data); + return _PyXIData_NewObject(data); } diff --git a/PCbuild/pythoncore.vcxproj b/PCbuild/pythoncore.vcxproj index f840e7fd61f9853..95552cade52b758 100644 --- a/PCbuild/pythoncore.vcxproj +++ b/PCbuild/pythoncore.vcxproj @@ -229,6 +229,7 @@ + diff --git a/PCbuild/pythoncore.vcxproj.filters b/PCbuild/pythoncore.vcxproj.filters index a930cd0b0b10c60..1708cf6e0b3a52e 100644 --- a/PCbuild/pythoncore.vcxproj.filters +++ b/PCbuild/pythoncore.vcxproj.filters @@ -609,6 +609,9 @@ Include\internal + + Include\internal + Include\internal diff --git a/Python/crossinterp.c b/Python/crossinterp.c index 0aca322d987dbad..2daba99988c12a1 100644 --- a/Python/crossinterp.c +++ b/Python/crossinterp.c @@ -3,11 +3,14 @@ #include "Python.h" #include "pycore_ceval.h" // _Py_simple_func -#include "pycore_crossinterp.h" // struct _xid +#include "pycore_crossinterp.h" // _PyXIData_t #include "pycore_initconfig.h" // _PyStatus_OK() #include "pycore_namespace.h" //_PyNamespace_New() #include "pycore_pyerrors.h" // _PyErr_Clear() -#include "pycore_weakref.h" // _PyWeakref_GET_REF() + + +#define _PyXI_GET_GLOBAL_STATE(interp) (&(interp)->runtime->xi) +#define _PyXI_GET_STATE(interp) (&(interp)->xi) /**************/ @@ -57,25 +60,24 @@ _Py_CallInInterpreterAndRawFree(PyInterpreterState *interp, /* cross-interpreter data */ /**************************/ -/* registry of {type -> crossinterpdatafunc} */ +/* registry of {type -> xidatafunc} */ /* For now we use a global registry of shareable classes. An alternative would be to add a tp_* slot for a class's - crossinterpdatafunc. It would be simpler and more efficient. */ + xidatafunc. It would be simpler and more efficient. */ -static void xid_lookup_init(PyInterpreterState *); -static void xid_lookup_fini(PyInterpreterState *); -static crossinterpdatafunc lookup_getdata(PyInterpreterState *, PyObject *); +static void xid_lookup_init(_PyXIData_lookup_t *); +static void xid_lookup_fini(_PyXIData_lookup_t *); +static xidatafunc lookup_getdata(PyInterpreterState *, PyObject *); #include "crossinterp_data_lookup.h" /* lifecycle */ -_PyCrossInterpreterData * -_PyCrossInterpreterData_New(void) +_PyXIData_t * +_PyXIData_New(void) { - _PyCrossInterpreterData *xid = PyMem_RawMalloc( - sizeof(_PyCrossInterpreterData)); + _PyXIData_t *xid = PyMem_RawMalloc(sizeof(_PyXIData_t)); if (xid == NULL) { PyErr_NoMemory(); } @@ -83,10 +85,10 @@ _PyCrossInterpreterData_New(void) } void -_PyCrossInterpreterData_Free(_PyCrossInterpreterData *xid) +_PyXIData_Free(_PyXIData_t *xid) { PyInterpreterState *interp = PyInterpreterState_Get(); - _PyCrossInterpreterData_Clear(interp, xid); + _PyXIData_Clear(interp, xid); PyMem_RawFree(xid); } @@ -94,20 +96,20 @@ _PyCrossInterpreterData_Free(_PyCrossInterpreterData *xid) /* defining cross-interpreter data */ static inline void -_xidata_init(_PyCrossInterpreterData *data) +_xidata_init(_PyXIData_t *data) { // If the value is being reused // then _xidata_clear() should have been called already. assert(data->data == NULL); assert(data->obj == NULL); - *data = (_PyCrossInterpreterData){0}; - _PyCrossInterpreterData_INTERPID(data) = -1; + *data = (_PyXIData_t){0}; + _PyXIData_INTERPID(data) = -1; } static inline void -_xidata_clear(_PyCrossInterpreterData *data) +_xidata_clear(_PyXIData_t *data) { - // _PyCrossInterpreterData only has two members that need to be + // _PyXIData_t only has two members that need to be // cleaned up, if set: "data" must be freed and "obj" must be decref'ed. // In both cases the original (owning) interpreter must be used, // which is the caller's responsibility to ensure. @@ -121,10 +123,10 @@ _xidata_clear(_PyCrossInterpreterData *data) } void -_PyCrossInterpreterData_Init(_PyCrossInterpreterData *data, - PyInterpreterState *interp, - void *shared, PyObject *obj, - xid_newobjectfunc new_object) +_PyXIData_Init(_PyXIData_t *data, + PyInterpreterState *interp, + void *shared, PyObject *obj, + xid_newobjectfunc new_object) { assert(data != NULL); assert(new_object != NULL); @@ -132,29 +134,29 @@ _PyCrossInterpreterData_Init(_PyCrossInterpreterData *data, data->data = shared; if (obj != NULL) { assert(interp != NULL); - // released in _PyCrossInterpreterData_Clear() + // released in _PyXIData_Clear() data->obj = Py_NewRef(obj); } // Ideally every object would know its owning interpreter. // Until then, we have to rely on the caller to identify it // (but we don't need it in all cases). - _PyCrossInterpreterData_INTERPID(data) = (interp != NULL) + _PyXIData_INTERPID(data) = (interp != NULL) ? PyInterpreterState_GetID(interp) : -1; data->new_object = new_object; } int -_PyCrossInterpreterData_InitWithSize(_PyCrossInterpreterData *data, - PyInterpreterState *interp, - const size_t size, PyObject *obj, - xid_newobjectfunc new_object) +_PyXIData_InitWithSize(_PyXIData_t *data, + PyInterpreterState *interp, + const size_t size, PyObject *obj, + xid_newobjectfunc new_object) { assert(size > 0); // For now we always free the shared data in the same interpreter // where it was allocated, so the interpreter is required. assert(interp != NULL); - _PyCrossInterpreterData_Init(data, interp, NULL, obj, new_object); + _PyXIData_Init(data, interp, NULL, obj, new_object); data->data = PyMem_RawMalloc(size); if (data->data == NULL) { return -1; @@ -164,14 +166,13 @@ _PyCrossInterpreterData_InitWithSize(_PyCrossInterpreterData *data, } void -_PyCrossInterpreterData_Clear(PyInterpreterState *interp, - _PyCrossInterpreterData *data) +_PyXIData_Clear(PyInterpreterState *interp, _PyXIData_t *data) { assert(data != NULL); // This must be called in the owning interpreter. assert(interp == NULL - || _PyCrossInterpreterData_INTERPID(data) == -1 - || _PyCrossInterpreterData_INTERPID(data) == PyInterpreterState_GetID(interp)); + || _PyXIData_INTERPID(data) == -1 + || _PyXIData_INTERPID(data) == PyInterpreterState_GetID(interp)); _xidata_clear(data); } @@ -179,13 +180,13 @@ _PyCrossInterpreterData_Clear(PyInterpreterState *interp, /* using cross-interpreter data */ static int -_check_xidata(PyThreadState *tstate, _PyCrossInterpreterData *data) +_check_xidata(PyThreadState *tstate, _PyXIData_t *data) { // data->data can be anything, including NULL, so we don't check it. // data->obj may be NULL, so we don't check it. - if (_PyCrossInterpreterData_INTERPID(data) < 0) { + if (_PyXIData_INTERPID(data) < 0) { PyErr_SetString(PyExc_SystemError, "missing interp"); return -1; } @@ -221,10 +222,10 @@ _set_xid_lookup_failure(PyInterpreterState *interp, } int -_PyObject_CheckCrossInterpreterData(PyObject *obj) +_PyObject_CheckXIData(PyObject *obj) { PyInterpreterState *interp = PyInterpreterState_Get(); - crossinterpdatafunc getdata = lookup_getdata(interp, obj); + xidatafunc getdata = lookup_getdata(interp, obj); if (getdata == NULL) { if (!PyErr_Occurred()) { _set_xid_lookup_failure(interp, obj, NULL); @@ -235,18 +236,18 @@ _PyObject_CheckCrossInterpreterData(PyObject *obj) } int -_PyObject_GetCrossInterpreterData(PyObject *obj, _PyCrossInterpreterData *data) +_PyObject_GetXIData(PyObject *obj, _PyXIData_t *data) { PyThreadState *tstate = PyThreadState_Get(); PyInterpreterState *interp = tstate->interp; // Reset data before re-populating. - *data = (_PyCrossInterpreterData){0}; - _PyCrossInterpreterData_INTERPID(data) = -1; + *data = (_PyXIData_t){0}; + _PyXIData_INTERPID(data) = -1; // Call the "getdata" func for the object. Py_INCREF(obj); - crossinterpdatafunc getdata = lookup_getdata(interp, obj); + xidatafunc getdata = lookup_getdata(interp, obj); if (getdata == NULL) { Py_DECREF(obj); if (!PyErr_Occurred()) { @@ -261,9 +262,9 @@ _PyObject_GetCrossInterpreterData(PyObject *obj, _PyCrossInterpreterData *data) } // Fill in the blanks and validate the result. - _PyCrossInterpreterData_INTERPID(data) = PyInterpreterState_GetID(interp); + _PyXIData_INTERPID(data) = PyInterpreterState_GetID(interp); if (_check_xidata(tstate, data) != 0) { - (void)_PyCrossInterpreterData_Release(data); + (void)_PyXIData_Release(data); return -1; } @@ -271,7 +272,7 @@ _PyObject_GetCrossInterpreterData(PyObject *obj, _PyCrossInterpreterData *data) } PyObject * -_PyCrossInterpreterData_NewObject(_PyCrossInterpreterData *data) +_PyXIData_NewObject(_PyXIData_t *data) { return data->new_object(data); } @@ -279,12 +280,12 @@ _PyCrossInterpreterData_NewObject(_PyCrossInterpreterData *data) static int _call_clear_xidata(void *data) { - _xidata_clear((_PyCrossInterpreterData *)data); + _xidata_clear((_PyXIData_t *)data); return 0; } static int -_xidata_release(_PyCrossInterpreterData *data, int rawfree) +_xidata_release(_PyXIData_t *data, int rawfree) { if ((data->data == NULL || data->free == NULL) && data->obj == NULL) { // Nothing to release! @@ -299,7 +300,7 @@ _xidata_release(_PyCrossInterpreterData *data, int rawfree) // Switch to the original interpreter. PyInterpreterState *interp = _PyInterpreterState_LookUpID( - _PyCrossInterpreterData_INTERPID(data)); + _PyXIData_INTERPID(data)); if (interp == NULL) { // The interpreter was already destroyed. // This function shouldn't have been called. @@ -321,13 +322,13 @@ _xidata_release(_PyCrossInterpreterData *data, int rawfree) } int -_PyCrossInterpreterData_Release(_PyCrossInterpreterData *data) +_PyXIData_Release(_PyXIData_t *data) { return _xidata_release(data, 0); } int -_PyCrossInterpreterData_ReleaseAndRawFree(_PyCrossInterpreterData *data) +_PyXIData_ReleaseAndRawFree(_PyXIData_t *data) { return _xidata_release(data, 1); } @@ -446,15 +447,15 @@ _format_TracebackException(PyObject *tbexc) static int -_release_xid_data(_PyCrossInterpreterData *data, int rawfree) +_release_xid_data(_PyXIData_t *data, int rawfree) { PyObject *exc = PyErr_GetRaisedException(); int res = rawfree - ? _PyCrossInterpreterData_Release(data) - : _PyCrossInterpreterData_ReleaseAndRawFree(data); + ? _PyXIData_Release(data) + : _PyXIData_ReleaseAndRawFree(data); if (res < 0) { /* The owning interpreter is already destroyed. */ - _PyCrossInterpreterData_Clear(NULL, data); + _PyXIData_Clear(NULL, data); // XXX Emit a warning? PyErr_Clear(); } @@ -1094,8 +1095,8 @@ _PyXI_ApplyError(_PyXI_error *error) typedef struct _sharednsitem { const char *name; - _PyCrossInterpreterData *data; - // We could have a "PyCrossInterpreterData _data" field, so it would + _PyXIData_t *data; + // We could have a "PyXIData _data" field, so it would // be allocated as part of the item and avoid an extra allocation. // However, doing so adds a bunch of complexity because we must // ensure the item isn't freed before a pending call might happen @@ -1131,7 +1132,7 @@ _sharednsitem_has_value(_PyXI_namespace_item *item, int64_t *p_interpid) return 0; } if (p_interpid != NULL) { - *p_interpid = _PyCrossInterpreterData_INTERPID(item->data); + *p_interpid = _PyXIData_INTERPID(item->data); } return 1; } @@ -1141,12 +1142,12 @@ _sharednsitem_set_value(_PyXI_namespace_item *item, PyObject *value) { assert(_sharednsitem_is_initialized(item)); assert(item->data == NULL); - item->data = PyMem_RawMalloc(sizeof(_PyCrossInterpreterData)); + item->data = PyMem_RawMalloc(sizeof(_PyXIData_t)); if (item->data == NULL) { PyErr_NoMemory(); return -1; } - if (_PyObject_GetCrossInterpreterData(value, item->data) != 0) { + if (_PyObject_GetXIData(value, item->data) != 0) { PyMem_RawFree(item->data); item->data = NULL; // The caller may want to propagate PyExc_NotShareableError @@ -1159,7 +1160,7 @@ _sharednsitem_set_value(_PyXI_namespace_item *item, PyObject *value) static void _sharednsitem_clear_value(_PyXI_namespace_item *item) { - _PyCrossInterpreterData *data = item->data; + _PyXIData_t *data = item->data; if (data != NULL) { item->data = NULL; int rawfree = 1; @@ -1205,7 +1206,7 @@ _sharednsitem_apply(_PyXI_namespace_item *item, PyObject *ns, PyObject *dflt) } PyObject *value; if (item->data != NULL) { - value = _PyCrossInterpreterData_NewObject(item->data); + value = _PyXIData_NewObject(item->data); if (value == NULL) { Py_DECREF(name); return -1; @@ -1776,7 +1777,10 @@ PyStatus _PyXI_Init(PyInterpreterState *interp) { // Initialize the XID lookup state (e.g. registry). - xid_lookup_init(interp); + if (_Py_IsMainInterpreter(interp)) { + xid_lookup_init(&_PyXI_GET_GLOBAL_STATE(interp)->data_lookup); + } + xid_lookup_init(&_PyXI_GET_STATE(interp)->data_lookup); // Initialize exceptions (heap types). if (_init_not_shareable_error_type(interp) < 0) { @@ -1796,7 +1800,10 @@ _PyXI_Fini(PyInterpreterState *interp) _fini_not_shareable_error_type(interp); // Finalize the XID lookup state (e.g. registry). - xid_lookup_fini(interp); + xid_lookup_fini(&_PyXI_GET_STATE(interp)->data_lookup); + if (_Py_IsMainInterpreter(interp)) { + xid_lookup_fini(&_PyXI_GET_GLOBAL_STATE(interp)->data_lookup); + } } PyStatus diff --git a/Python/crossinterp_data_lookup.h b/Python/crossinterp_data_lookup.h index 863919ad42fb97a..88c662a3df00d64 100644 --- a/Python/crossinterp_data_lookup.h +++ b/Python/crossinterp_data_lookup.h @@ -1,8 +1,31 @@ +#include "pycore_weakref.h" // _PyWeakref_GET_REF() -static crossinterpdatafunc _lookup_getdata_from_registry( - PyInterpreterState *, PyObject *); -static crossinterpdatafunc +typedef struct _xidregistry dlregistry_t; +typedef struct _xidregitem dlregitem_t; + + +// forward +static void _xidregistry_init(dlregistry_t *); +static void _xidregistry_fini(dlregistry_t *); +static xidatafunc _lookup_getdata_from_registry(PyInterpreterState *, PyObject *); + + +/* used in crossinterp.c */ + +static void +xid_lookup_init(_PyXIData_lookup_t *state) +{ + _xidregistry_init(&state->registry); +} + +static void +xid_lookup_fini(_PyXIData_lookup_t *state) +{ + _xidregistry_fini(&state->registry); +} + +static xidatafunc lookup_getdata(PyInterpreterState *interp, PyObject *obj) { /* Cross-interpreter objects are looked up by exact match on the class. @@ -11,8 +34,11 @@ lookup_getdata(PyInterpreterState *interp, PyObject *obj) return _lookup_getdata_from_registry(interp, obj); } -crossinterpdatafunc -_PyCrossInterpreterData_Lookup(PyObject *obj) + +/* exported API */ + +xidatafunc +_PyXIData_Lookup(PyObject *obj) { PyInterpreterState *interp = PyInterpreterState_Get(); return lookup_getdata(interp, obj); @@ -20,20 +46,20 @@ _PyCrossInterpreterData_Lookup(PyObject *obj) /***********************************************/ -/* a registry of {type -> crossinterpdatafunc} */ +/* a registry of {type -> xidatafunc} */ /***********************************************/ /* For now we use a global registry of shareable classes. An alternative would be to add a tp_* slot for a class's - crossinterpdatafunc. It would be simpler and more efficient. */ + xidatafunc. It would be simpler and more efficient. */ /* registry lifecycle */ -static void _register_builtins_for_crossinterpreter_data(struct _xidregistry *); +static void _register_builtins_for_crossinterpreter_data(dlregistry_t *); static void -_xidregistry_init(struct _xidregistry *registry) +_xidregistry_init(dlregistry_t *registry) { if (registry->initialized) { return; @@ -47,10 +73,10 @@ _xidregistry_init(struct _xidregistry *registry) } } -static void _xidregistry_clear(struct _xidregistry *); +static void _xidregistry_clear(dlregistry_t *); static void -_xidregistry_fini(struct _xidregistry *registry) +_xidregistry_fini(dlregistry_t *registry) { if (!registry->initialized) { return; @@ -60,32 +86,11 @@ _xidregistry_fini(struct _xidregistry *registry) _xidregistry_clear(registry); } -static inline struct _xidregistry * _get_global_xidregistry(_PyRuntimeState *); -static inline struct _xidregistry * _get_xidregistry(PyInterpreterState *); - -static void -xid_lookup_init(PyInterpreterState *interp) -{ - if (_Py_IsMainInterpreter(interp)) { - _xidregistry_init(_get_global_xidregistry(interp->runtime)); - } - _xidregistry_init(_get_xidregistry(interp)); -} - -static void -xid_lookup_fini(PyInterpreterState *interp) -{ - _xidregistry_fini(_get_xidregistry(interp)); - if (_Py_IsMainInterpreter(interp)) { - _xidregistry_fini(_get_global_xidregistry(interp->runtime)); - } -} - /* registry thread safety */ static void -_xidregistry_lock(struct _xidregistry *registry) +_xidregistry_lock(dlregistry_t *registry) { if (registry->global) { PyMutex_Lock(®istry->mutex); @@ -94,7 +99,7 @@ _xidregistry_lock(struct _xidregistry *registry) } static void -_xidregistry_unlock(struct _xidregistry *registry) +_xidregistry_unlock(dlregistry_t *registry) { if (registry->global) { PyMutex_Unlock(®istry->mutex); @@ -104,35 +109,34 @@ _xidregistry_unlock(struct _xidregistry *registry) /* accessing the registry */ -static inline struct _xidregistry * +static inline dlregistry_t * _get_global_xidregistry(_PyRuntimeState *runtime) { - return &runtime->xi.registry; + return &runtime->xi.data_lookup.registry; } -static inline struct _xidregistry * +static inline dlregistry_t * _get_xidregistry(PyInterpreterState *interp) { - return &interp->xi.registry; + return &interp->xi.data_lookup.registry; } -static inline struct _xidregistry * +static inline dlregistry_t * _get_xidregistry_for_type(PyInterpreterState *interp, PyTypeObject *cls) { - struct _xidregistry *registry = _get_global_xidregistry(interp->runtime); + dlregistry_t *registry = _get_global_xidregistry(interp->runtime); if (cls->tp_flags & Py_TPFLAGS_HEAPTYPE) { registry = _get_xidregistry(interp); } return registry; } -static struct _xidregitem * _xidregistry_remove_entry( - struct _xidregistry *, struct _xidregitem *); +static dlregitem_t* _xidregistry_remove_entry(dlregistry_t *, dlregitem_t *); -static struct _xidregitem * -_xidregistry_find_type(struct _xidregistry *xidregistry, PyTypeObject *cls) +static dlregitem_t * +_xidregistry_find_type(dlregistry_t *xidregistry, PyTypeObject *cls) { - struct _xidregitem *cur = xidregistry->head; + dlregitem_t *cur = xidregistry->head; while (cur != NULL) { if (cur->weakref != NULL) { // cur is/was a heap type. @@ -155,16 +159,16 @@ _xidregistry_find_type(struct _xidregistry *xidregistry, PyTypeObject *cls) return NULL; } -static crossinterpdatafunc +static xidatafunc _lookup_getdata_from_registry(PyInterpreterState *interp, PyObject *obj) { PyTypeObject *cls = Py_TYPE(obj); - struct _xidregistry *xidregistry = _get_xidregistry_for_type(interp, cls); + dlregistry_t *xidregistry = _get_xidregistry_for_type(interp, cls); _xidregistry_lock(xidregistry); - struct _xidregitem *matched = _xidregistry_find_type(xidregistry, cls); - crossinterpdatafunc func = matched != NULL ? matched->getdata : NULL; + dlregitem_t *matched = _xidregistry_find_type(xidregistry, cls); + xidatafunc func = matched != NULL ? matched->getdata : NULL; _xidregistry_unlock(xidregistry); return func; @@ -174,14 +178,14 @@ _lookup_getdata_from_registry(PyInterpreterState *interp, PyObject *obj) /* updating the registry */ static int -_xidregistry_add_type(struct _xidregistry *xidregistry, - PyTypeObject *cls, crossinterpdatafunc getdata) +_xidregistry_add_type(dlregistry_t *xidregistry, + PyTypeObject *cls, xidatafunc getdata) { - struct _xidregitem *newhead = PyMem_RawMalloc(sizeof(struct _xidregitem)); + dlregitem_t *newhead = PyMem_RawMalloc(sizeof(dlregitem_t)); if (newhead == NULL) { return -1; } - *newhead = (struct _xidregitem){ + *newhead = (dlregitem_t){ // We do not keep a reference, to avoid keeping the class alive. .cls = cls, .refcount = 1, @@ -203,11 +207,10 @@ _xidregistry_add_type(struct _xidregistry *xidregistry, return 0; } -static struct _xidregitem * -_xidregistry_remove_entry(struct _xidregistry *xidregistry, - struct _xidregitem *entry) +static dlregitem_t * +_xidregistry_remove_entry(dlregistry_t *xidregistry, dlregitem_t *entry) { - struct _xidregitem *next = entry->next; + dlregitem_t *next = entry->next; if (entry->prev != NULL) { assert(entry->prev->next == entry); entry->prev->next = next; @@ -225,12 +228,12 @@ _xidregistry_remove_entry(struct _xidregistry *xidregistry, } static void -_xidregistry_clear(struct _xidregistry *xidregistry) +_xidregistry_clear(dlregistry_t *xidregistry) { - struct _xidregitem *cur = xidregistry->head; + dlregitem_t *cur = xidregistry->head; xidregistry->head = NULL; while (cur != NULL) { - struct _xidregitem *next = cur->next; + dlregitem_t *next = cur->next; Py_XDECREF(cur->weakref); PyMem_RawFree(cur); cur = next; @@ -238,8 +241,7 @@ _xidregistry_clear(struct _xidregistry *xidregistry) } int -_PyCrossInterpreterData_RegisterClass(PyTypeObject *cls, - crossinterpdatafunc getdata) +_PyXIData_RegisterClass(PyTypeObject *cls, xidatafunc getdata) { if (!PyType_Check(cls)) { PyErr_Format(PyExc_ValueError, "only classes may be registered"); @@ -252,10 +254,10 @@ _PyCrossInterpreterData_RegisterClass(PyTypeObject *cls, int res = 0; PyInterpreterState *interp = _PyInterpreterState_GET(); - struct _xidregistry *xidregistry = _get_xidregistry_for_type(interp, cls); + dlregistry_t *xidregistry = _get_xidregistry_for_type(interp, cls); _xidregistry_lock(xidregistry); - struct _xidregitem *matched = _xidregistry_find_type(xidregistry, cls); + dlregitem_t *matched = _xidregistry_find_type(xidregistry, cls); if (matched != NULL) { assert(matched->getdata == getdata); matched->refcount += 1; @@ -270,14 +272,14 @@ _PyCrossInterpreterData_RegisterClass(PyTypeObject *cls, } int -_PyCrossInterpreterData_UnregisterClass(PyTypeObject *cls) +_PyXIData_UnregisterClass(PyTypeObject *cls) { int res = 0; PyInterpreterState *interp = _PyInterpreterState_GET(); - struct _xidregistry *xidregistry = _get_xidregistry_for_type(interp, cls); + dlregistry_t *xidregistry = _get_xidregistry_for_type(interp, cls); _xidregistry_lock(xidregistry); - struct _xidregitem *matched = _xidregistry_find_type(xidregistry, cls); + dlregitem_t *matched = _xidregistry_find_type(xidregistry, cls); if (matched != NULL) { assert(matched->refcount > 0); matched->refcount -= 1; @@ -304,17 +306,16 @@ struct _shared_bytes_data { }; static PyObject * -_new_bytes_object(_PyCrossInterpreterData *data) +_new_bytes_object(_PyXIData_t *data) { struct _shared_bytes_data *shared = (struct _shared_bytes_data *)(data->data); return PyBytes_FromStringAndSize(shared->bytes, shared->len); } static int -_bytes_shared(PyThreadState *tstate, PyObject *obj, - _PyCrossInterpreterData *data) +_bytes_shared(PyThreadState *tstate, PyObject *obj, _PyXIData_t *data) { - if (_PyCrossInterpreterData_InitWithSize( + if (_PyXIData_InitWithSize( data, tstate->interp, sizeof(struct _shared_bytes_data), obj, _new_bytes_object ) < 0) @@ -323,7 +324,7 @@ _bytes_shared(PyThreadState *tstate, PyObject *obj, } struct _shared_bytes_data *shared = (struct _shared_bytes_data *)data->data; if (PyBytes_AsStringAndSize(obj, &shared->bytes, &shared->len) < 0) { - _PyCrossInterpreterData_Clear(tstate->interp, data); + _PyXIData_Clear(tstate->interp, data); return -1; } return 0; @@ -338,17 +339,16 @@ struct _shared_str_data { }; static PyObject * -_new_str_object(_PyCrossInterpreterData *data) +_new_str_object(_PyXIData_t *data) { struct _shared_str_data *shared = (struct _shared_str_data *)(data->data); return PyUnicode_FromKindAndData(shared->kind, shared->buffer, shared->len); } static int -_str_shared(PyThreadState *tstate, PyObject *obj, - _PyCrossInterpreterData *data) +_str_shared(PyThreadState *tstate, PyObject *obj, _PyXIData_t *data) { - if (_PyCrossInterpreterData_InitWithSize( + if (_PyXIData_InitWithSize( data, tstate->interp, sizeof(struct _shared_str_data), obj, _new_str_object ) < 0) @@ -365,14 +365,13 @@ _str_shared(PyThreadState *tstate, PyObject *obj, // int static PyObject * -_new_long_object(_PyCrossInterpreterData *data) +_new_long_object(_PyXIData_t *data) { return PyLong_FromSsize_t((Py_ssize_t)(data->data)); } static int -_long_shared(PyThreadState *tstate, PyObject *obj, - _PyCrossInterpreterData *data) +_long_shared(PyThreadState *tstate, PyObject *obj, _PyXIData_t *data) { /* Note that this means the size of shareable ints is bounded by * sys.maxsize. Hence on 32-bit architectures that is half the @@ -385,8 +384,7 @@ _long_shared(PyThreadState *tstate, PyObject *obj, } return -1; } - _PyCrossInterpreterData_Init(data, tstate->interp, (void *)value, NULL, - _new_long_object); + _PyXIData_Init(data, tstate->interp, (void *)value, NULL, _new_long_object); // data->obj and data->free remain NULL return 0; } @@ -394,17 +392,16 @@ _long_shared(PyThreadState *tstate, PyObject *obj, // float static PyObject * -_new_float_object(_PyCrossInterpreterData *data) +_new_float_object(_PyXIData_t *data) { double * value_ptr = data->data; return PyFloat_FromDouble(*value_ptr); } static int -_float_shared(PyThreadState *tstate, PyObject *obj, - _PyCrossInterpreterData *data) +_float_shared(PyThreadState *tstate, PyObject *obj, _PyXIData_t *data) { - if (_PyCrossInterpreterData_InitWithSize( + if (_PyXIData_InitWithSize( data, tstate->interp, sizeof(double), NULL, _new_float_object ) < 0) @@ -419,18 +416,16 @@ _float_shared(PyThreadState *tstate, PyObject *obj, // None static PyObject * -_new_none_object(_PyCrossInterpreterData *data) +_new_none_object(_PyXIData_t *data) { // XXX Singleton refcounts are problematic across interpreters... return Py_NewRef(Py_None); } static int -_none_shared(PyThreadState *tstate, PyObject *obj, - _PyCrossInterpreterData *data) +_none_shared(PyThreadState *tstate, PyObject *obj, _PyXIData_t *data) { - _PyCrossInterpreterData_Init(data, tstate->interp, NULL, NULL, - _new_none_object); + _PyXIData_Init(data, tstate->interp, NULL, NULL, _new_none_object); // data->data, data->obj and data->free remain NULL return 0; } @@ -438,7 +433,7 @@ _none_shared(PyThreadState *tstate, PyObject *obj, // bool static PyObject * -_new_bool_object(_PyCrossInterpreterData *data) +_new_bool_object(_PyXIData_t *data) { if (data->data){ Py_RETURN_TRUE; @@ -447,10 +442,9 @@ _new_bool_object(_PyCrossInterpreterData *data) } static int -_bool_shared(PyThreadState *tstate, PyObject *obj, - _PyCrossInterpreterData *data) +_bool_shared(PyThreadState *tstate, PyObject *obj, _PyXIData_t *data) { - _PyCrossInterpreterData_Init(data, tstate->interp, + _PyXIData_Init(data, tstate->interp, (void *) (Py_IsTrue(obj) ? (uintptr_t) 1 : (uintptr_t) 0), NULL, _new_bool_object); // data->obj and data->free remain NULL @@ -461,11 +455,11 @@ _bool_shared(PyThreadState *tstate, PyObject *obj, struct _shared_tuple_data { Py_ssize_t len; - _PyCrossInterpreterData **data; + _PyXIData_t **data; }; static PyObject * -_new_tuple_object(_PyCrossInterpreterData *data) +_new_tuple_object(_PyXIData_t *data) { struct _shared_tuple_data *shared = (struct _shared_tuple_data *)(data->data); PyObject *tuple = PyTuple_New(shared->len); @@ -474,7 +468,7 @@ _new_tuple_object(_PyCrossInterpreterData *data) } for (Py_ssize_t i = 0; i < shared->len; i++) { - PyObject *item = _PyCrossInterpreterData_NewObject(shared->data[i]); + PyObject *item = _PyXIData_NewObject(shared->data[i]); if (item == NULL){ Py_DECREF(tuple); return NULL; @@ -493,8 +487,8 @@ _tuple_shared_free(void* data) #endif for (Py_ssize_t i = 0; i < shared->len; i++) { if (shared->data[i] != NULL) { - assert(_PyCrossInterpreterData_INTERPID(shared->data[i]) == interpid); - _PyCrossInterpreterData_Release(shared->data[i]); + assert(_PyXIData_INTERPID(shared->data[i]) == interpid); + _PyXIData_Release(shared->data[i]); PyMem_RawFree(shared->data[i]); shared->data[i] = NULL; } @@ -504,8 +498,7 @@ _tuple_shared_free(void* data) } static int -_tuple_shared(PyThreadState *tstate, PyObject *obj, - _PyCrossInterpreterData *data) +_tuple_shared(PyThreadState *tstate, PyObject *obj, _PyXIData_t *data) { Py_ssize_t len = PyTuple_GET_SIZE(obj); if (len < 0) { @@ -518,14 +511,14 @@ _tuple_shared(PyThreadState *tstate, PyObject *obj, } shared->len = len; - shared->data = (_PyCrossInterpreterData **) PyMem_Calloc(shared->len, sizeof(_PyCrossInterpreterData *)); + shared->data = (_PyXIData_t **) PyMem_Calloc(shared->len, sizeof(_PyXIData_t *)); if (shared->data == NULL) { PyErr_NoMemory(); return -1; } for (Py_ssize_t i = 0; i < shared->len; i++) { - _PyCrossInterpreterData *data = _PyCrossInterpreterData_New(); + _PyXIData_t *data = _PyXIData_New(); if (data == NULL) { goto error; // PyErr_NoMemory already set } @@ -533,7 +526,7 @@ _tuple_shared(PyThreadState *tstate, PyObject *obj, int res = -1; if (!_Py_EnterRecursiveCallTstate(tstate, " while sharing a tuple")) { - res = _PyObject_GetCrossInterpreterData(item, data); + res = _PyObject_GetXIData(item, data); _Py_LeaveRecursiveCallTstate(tstate); } if (res < 0) { @@ -542,8 +535,7 @@ _tuple_shared(PyThreadState *tstate, PyObject *obj, } shared->data[i] = data; } - _PyCrossInterpreterData_Init( - data, tstate->interp, shared, obj, _new_tuple_object); + _PyXIData_Init(data, tstate->interp, shared, obj, _new_tuple_object); data->free = _tuple_shared_free; return 0; @@ -555,7 +547,7 @@ _tuple_shared(PyThreadState *tstate, PyObject *obj, // registration static void -_register_builtins_for_crossinterpreter_data(struct _xidregistry *xidregistry) +_register_builtins_for_crossinterpreter_data(dlregistry_t *xidregistry) { // None if (_xidregistry_add_type(xidregistry, (PyTypeObject *)PyObject_Type(Py_None), _none_shared) != 0) { diff --git a/Python/pystate.c b/Python/pystate.c index ded5fde9c4bb511..24ee73c145cbccd 100644 --- a/Python/pystate.c +++ b/Python/pystate.c @@ -396,7 +396,7 @@ _Py_COMP_DIAG_POP #define LOCKS_INIT(runtime) \ { \ &(runtime)->interpreters.mutex, \ - &(runtime)->xi.registry.mutex, \ + &(runtime)->xi.data_lookup.registry.mutex, \ &(runtime)->unicode_state.ids.mutex, \ &(runtime)->imports.extensions.mutex, \ &(runtime)->ceval.pending_mainthread.mutex, \ diff --git a/Tools/c-analyzer/cpython/_parser.py b/Tools/c-analyzer/cpython/_parser.py index 3a73f65f8ff7b3b..21be53e78841d54 100644 --- a/Tools/c-analyzer/cpython/_parser.py +++ b/Tools/c-analyzer/cpython/_parser.py @@ -290,6 +290,7 @@ def clean_lines(text): Modules/_sre/sre_lib.h LOCAL(type) static inline type Modules/_sre/sre_lib.h SRE(F) sre_ucs2_##F Objects/stringlib/codecs.h STRINGLIB_IS_UNICODE 1 +Include/internal/pycore_crossinterp_data_registry.h Py_CORE_CROSSINTERP_DATA_REGISTRY_H 1 # @end=tsv@ ''')[1:] From a38e82bd8c249c126ab033c078170b6dea27a619 Mon Sep 17 00:00:00 2001 From: Michael Droettboom Date: Thu, 7 Nov 2024 11:39:23 -0500 Subject: [PATCH 53/54] gh-126298: Don't deduplicate slice constants based on equality (#126398) * gh-126298: Don't deduplicated slice constants based on equality * NULL check for PySlice_New * Fix refcounting * Fix refcounting some more * Fix refcounting * Make tests more complete * Fix tests --- Lib/test/test_compile.py | 78 ++++++++++++++++++++++++++++++---------- Objects/codeobject.c | 35 +++++++++++++++++- 2 files changed, 93 insertions(+), 20 deletions(-) diff --git a/Lib/test/test_compile.py b/Lib/test/test_compile.py index 85ae71c1f77b28e..519a1207afb1fc1 100644 --- a/Lib/test/test_compile.py +++ b/Lib/test/test_compile.py @@ -2,6 +2,7 @@ import dis import io import itertools +import marshal import math import opcode import os @@ -1385,52 +1386,91 @@ def check_op_count(func, op, expected): self.assertEqual(actual, expected) def check_consts(func, typ, expected): - slice_consts = 0 + expected = set([repr(x) for x in expected]) + all_consts = set() consts = func.__code__.co_consts for instr in dis.Bytecode(func): if instr.opname == "LOAD_CONST" and isinstance(consts[instr.oparg], typ): - slice_consts += 1 - self.assertEqual(slice_consts, expected) + all_consts.add(repr(consts[instr.oparg])) + self.assertEqual(all_consts, expected) def load(): return x[a:b] + x [a:] + x[:b] + x[:] + check_op_count(load, "BINARY_SLICE", 3) + check_op_count(load, "BUILD_SLICE", 0) + check_consts(load, slice, [slice(None, None, None)]) + check_op_count(load, "BINARY_SUBSCR", 1) + def store(): x[a:b] = y x [a:] = y x[:b] = y x[:] = y + check_op_count(store, "STORE_SLICE", 3) + check_op_count(store, "BUILD_SLICE", 0) + check_op_count(store, "STORE_SUBSCR", 1) + check_consts(store, slice, [slice(None, None, None)]) + def long_slice(): return x[a:b:c] + check_op_count(long_slice, "BUILD_SLICE", 1) + check_op_count(long_slice, "BINARY_SLICE", 0) + check_consts(long_slice, slice, []) + check_op_count(long_slice, "BINARY_SUBSCR", 1) + def aug(): x[a:b] += y + check_op_count(aug, "BINARY_SLICE", 1) + check_op_count(aug, "STORE_SLICE", 1) + check_op_count(aug, "BUILD_SLICE", 0) + check_op_count(aug, "BINARY_SUBSCR", 0) + check_op_count(aug, "STORE_SUBSCR", 0) + check_consts(aug, slice, []) + def aug_const(): x[1:2] += y + check_op_count(aug_const, "BINARY_SLICE", 0) + check_op_count(aug_const, "STORE_SLICE", 0) + check_op_count(aug_const, "BINARY_SUBSCR", 1) + check_op_count(aug_const, "STORE_SUBSCR", 1) + check_consts(aug_const, slice, [slice(1, 2)]) + def compound_const_slice(): x[1:2:3, 4:5:6] = y - check_op_count(load, "BINARY_SLICE", 3) - check_op_count(load, "BUILD_SLICE", 0) - check_consts(load, slice, 1) - check_op_count(store, "STORE_SLICE", 3) - check_op_count(store, "BUILD_SLICE", 0) - check_consts(store, slice, 1) - check_op_count(long_slice, "BUILD_SLICE", 1) - check_op_count(long_slice, "BINARY_SLICE", 0) - check_op_count(aug, "BINARY_SLICE", 1) - check_op_count(aug, "STORE_SLICE", 1) - check_op_count(aug, "BUILD_SLICE", 0) - check_op_count(aug_const, "BINARY_SLICE", 0) - check_op_count(aug_const, "STORE_SLICE", 0) - check_consts(aug_const, slice, 1) check_op_count(compound_const_slice, "BINARY_SLICE", 0) check_op_count(compound_const_slice, "BUILD_SLICE", 0) - check_consts(compound_const_slice, slice, 0) - check_consts(compound_const_slice, tuple, 1) + check_op_count(compound_const_slice, "STORE_SLICE", 0) + check_op_count(compound_const_slice, "STORE_SUBSCR", 1) + check_consts(compound_const_slice, slice, []) + check_consts(compound_const_slice, tuple, [(slice(1, 2, 3), slice(4, 5, 6))]) + + def mutable_slice(): + x[[]:] = y + + check_consts(mutable_slice, slice, {}) + + def different_but_equal(): + x[:0] = y + x[:0.0] = y + x[:False] = y + x[:None] = y + + check_consts( + different_but_equal, + slice, + [ + slice(None, 0, None), + slice(None, 0.0, None), + slice(None, False, None), + slice(None, None, None) + ] + ) def test_compare_positions(self): for opname_prefix, op in [ diff --git a/Objects/codeobject.c b/Objects/codeobject.c index 1cf9740af9a2095..dba43d5911da951 100644 --- a/Objects/codeobject.c +++ b/Objects/codeobject.c @@ -2388,7 +2388,6 @@ _PyCode_ConstantKey(PyObject *op) if (op == Py_None || op == Py_Ellipsis || PyLong_CheckExact(op) || PyUnicode_CheckExact(op) - || PySlice_Check(op) /* code_richcompare() uses _PyCode_ConstantKey() internally */ || PyCode_Check(op)) { @@ -2496,6 +2495,40 @@ _PyCode_ConstantKey(PyObject *op) Py_DECREF(set); return key; } + else if (PySlice_Check(op)) { + PySliceObject *slice = (PySliceObject *)op; + PyObject *start_key = NULL; + PyObject *stop_key = NULL; + PyObject *step_key = NULL; + key = NULL; + + start_key = _PyCode_ConstantKey(slice->start); + if (start_key == NULL) { + goto slice_exit; + } + + stop_key = _PyCode_ConstantKey(slice->stop); + if (stop_key == NULL) { + goto slice_exit; + } + + step_key = _PyCode_ConstantKey(slice->step); + if (step_key == NULL) { + goto slice_exit; + } + + PyObject *slice_key = PySlice_New(start_key, stop_key, step_key); + if (slice_key == NULL) { + goto slice_exit; + } + + key = PyTuple_Pack(2, slice_key, op); + Py_DECREF(slice_key); + slice_exit: + Py_XDECREF(start_key); + Py_XDECREF(stop_key); + Py_XDECREF(step_key); + } else { /* for other types, use the object identifier as a unique identifier * to ensure that they are seen as unequal. */ From 09d6f5dc7824c74672add512619e978844ff8051 Mon Sep 17 00:00:00 2001 From: alm Date: Thu, 7 Nov 2024 20:55:31 +0200 Subject: [PATCH 54/54] GH-126464 Fix JIT CI on aarch64-apple-darwin (GH-126494) --- .github/workflows/jit.yml | 15 +++++++-------- 1 file changed, 7 insertions(+), 8 deletions(-) diff --git a/.github/workflows/jit.yml b/.github/workflows/jit.yml index 897c692118e9a4d..35d5d59b7626600 100644 --- a/.github/workflows/jit.yml +++ b/.github/workflows/jit.yml @@ -52,7 +52,7 @@ jobs: - x86_64-pc-windows-msvc/msvc - aarch64-pc-windows-msvc/msvc - x86_64-apple-darwin/clang - # - aarch64-apple-darwin/clang + - aarch64-apple-darwin/clang - x86_64-unknown-linux-gnu/gcc - x86_64-unknown-linux-gnu/clang - aarch64-unknown-linux-gnu/gcc @@ -79,11 +79,10 @@ jobs: architecture: x86_64 runner: macos-13 compiler: clang - # GH-126464: A recent change to either GHA or LLVM broke this job: - # - target: aarch64-apple-darwin/clang - # architecture: aarch64 - # runner: macos-14 - # compiler: clang + - target: aarch64-apple-darwin/clang + architecture: aarch64 + runner: macos-14 + compiler: clang - target: x86_64-unknown-linux-gnu/gcc architecture: x86_64 runner: ubuntu-22.04 @@ -132,8 +131,8 @@ jobs: brew update find /usr/local/bin -lname '*/Library/Frameworks/Python.framework/*' -delete brew install llvm@${{ matrix.llvm }} - SDKROOT="$(xcrun --show-sdk-path)" \ - ./configure --enable-experimental-jit ${{ matrix.debug && '--with-pydebug' || '--enable-optimizations --with-lto' }} + export SDKROOT="$(xcrun --show-sdk-path)" + ./configure --enable-experimental-jit ${{ matrix.debug && '--with-pydebug' || '--enable-optimizations --with-lto' }} make all --jobs 4 ./python.exe -m test --multiprocess 0 --timeout 4500 --verbose2 --verbose3