From 4efe64aa56e7a9a96b94c0ae0201db8d402a5f53 Mon Sep 17 00:00:00 2001 From: Mikhail Efimov Date: Tue, 22 Oct 2024 11:41:30 +0300 Subject: [PATCH 001/127] gh-125811: Remove DeprecationWarnings in test_peg_generator (#125812) --- Lib/test/test_peg_generator/test_pegen.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/Lib/test/test_peg_generator/test_pegen.py b/Lib/test/test_peg_generator/test_pegen.py index 86db767b99a228..54c9dce2d0c90d 100644 --- a/Lib/test/test_peg_generator/test_pegen.py +++ b/Lib/test/test_peg_generator/test_pegen.py @@ -484,7 +484,7 @@ def test_left_recursive(self) -> None: def test_python_expr(self) -> None: grammar = """ - start: expr NEWLINE? $ { ast.Expression(expr, lineno=1, col_offset=0) } + start: expr NEWLINE? $ { ast.Expression(expr) } expr: ( expr '+' term { ast.BinOp(expr, ast.Add(), term, lineno=expr.lineno, col_offset=expr.col_offset, end_lineno=term.end_lineno, end_col_offset=term.end_col_offset) } | expr '-' term { ast.BinOp(expr, ast.Sub(), term, lineno=expr.lineno, col_offset=expr.col_offset, end_lineno=term.end_lineno, end_col_offset=term.end_col_offset) } | term { term } @@ -893,7 +893,7 @@ def test_unreachable_implicit3(self) -> None: def test_locations_in_alt_action_and_group(self) -> None: grammar = """ - start: t=term NEWLINE? $ { ast.Expression(t, LOCATIONS) } + start: t=term NEWLINE? $ { ast.Expression(t) } term: | l=term '*' r=factor { ast.BinOp(l, ast.Mult(), r, LOCATIONS) } | l=term '/' r=factor { ast.BinOp(l, ast.Div(), r, LOCATIONS) } From c1bdbe84c8ab29b68bb109328e02af9464f104b3 Mon Sep 17 00:00:00 2001 From: Mikhail Efimov Date: Tue, 22 Oct 2024 11:42:56 +0300 Subject: [PATCH 002/127] gh-124889: Rework Python generator cache (#125816) --- Tools/peg_generator/pegen/python_generator.py | 75 ++++++++++++------- 1 file changed, 48 insertions(+), 27 deletions(-) diff --git a/Tools/peg_generator/pegen/python_generator.py b/Tools/peg_generator/pegen/python_generator.py index 588d3d3f6ef8f8..7057135a9061f6 100644 --- a/Tools/peg_generator/pegen/python_generator.py +++ b/Tools/peg_generator/pegen/python_generator.py @@ -1,6 +1,6 @@ import os.path import token -from typing import IO, Any, Dict, Optional, Sequence, Set, Text, Tuple +from typing import IO, Any, Callable, Dict, Optional, Sequence, Set, Text, Tuple from pegen import grammar from pegen.grammar import ( @@ -93,7 +93,7 @@ def visit_Forced(self, node: Forced) -> bool: class PythonCallMakerVisitor(GrammarVisitor): def __init__(self, parser_generator: ParserGenerator): self.gen = parser_generator - self.cache: Dict[Any, Any] = {} + self.cache: Dict[str, Tuple[str, str]] = {} def visit_NameLeaf(self, node: NameLeaf) -> Tuple[Optional[str], str]: name = node.value @@ -110,16 +110,6 @@ def visit_NameLeaf(self, node: NameLeaf) -> Tuple[Optional[str], str]: def visit_StringLeaf(self, node: StringLeaf) -> Tuple[str, str]: return "literal", f"self.expect({node.value})" - def visit_Rhs(self, node: Rhs) -> Tuple[Optional[str], str]: - if node in self.cache: - return self.cache[node] - if len(node.alts) == 1 and len(node.alts[0].items) == 1: - self.cache[node] = self.visit(node.alts[0].items[0]) - else: - name = self.gen.artificial_rule_from_rhs(node) - self.cache[node] = name, f"self.{name}()" - return self.cache[node] - def visit_NamedItem(self, node: NamedItem) -> Tuple[Optional[str], str]: name, call = self.visit(node.item) if node.name: @@ -151,26 +141,57 @@ def visit_Opt(self, node: Opt) -> Tuple[str, str]: else: return "opt", f"{call}," + def _generate_artificial_rule_call( + self, + node: Any, + prefix: str, + call_by_name_func: Callable[[str], str], + rule_generation_func: Callable[[], str], + ) -> Tuple[str, str]: + node_str = f"{node}" + key = f"{prefix}_{node_str}" + if key in self.cache: + return self.cache[key] + + name = rule_generation_func() + call = call_by_name_func(name) + self.cache[key] = name, call + return self.cache[key] + + def visit_Rhs(self, node: Rhs) -> Tuple[str, str]: + if len(node.alts) == 1 and len(node.alts[0].items) == 1: + return self.visit(node.alts[0].items[0]) + + return self._generate_artificial_rule_call( + node, + "rhs", + lambda name: f"self.{name}()", + lambda: self.gen.artificial_rule_from_rhs(node), + ) + def visit_Repeat0(self, node: Repeat0) -> Tuple[str, str]: - if node in self.cache: - return self.cache[node] - name = self.gen.artificial_rule_from_repeat(node.node, False) - self.cache[node] = name, f"self.{name}()," # Also a trailing comma! - return self.cache[node] + return self._generate_artificial_rule_call( + node, + "repeat0", + lambda name: f"self.{name}(),", # Also a trailing comma! + lambda: self.gen.artificial_rule_from_repeat(node.node, is_repeat1=False), + ) def visit_Repeat1(self, node: Repeat1) -> Tuple[str, str]: - if node in self.cache: - return self.cache[node] - name = self.gen.artificial_rule_from_repeat(node.node, True) - self.cache[node] = name, f"self.{name}()" # But no trailing comma here! - return self.cache[node] + return self._generate_artificial_rule_call( + node, + "repeat1", + lambda name: f"self.{name}()", # But no trailing comma here! + lambda: self.gen.artificial_rule_from_repeat(node.node, is_repeat1=True), + ) def visit_Gather(self, node: Gather) -> Tuple[str, str]: - if node in self.cache: - return self.cache[node] - name = self.gen.artificial_rule_from_gather(node) - self.cache[node] = name, f"self.{name}()" # No trailing comma here either! - return self.cache[node] + return self._generate_artificial_rule_call( + node, + "gather", + lambda name: f"self.{name}()", # No trailing comma here either! + lambda: self.gen.artificial_rule_from_gather(node), + ) def visit_Group(self, node: Group) -> Tuple[Optional[str], str]: return self.visit(node.rhs) From 57e3c59bb64fc2f8b2845a7e03ab0abb029ccd02 Mon Sep 17 00:00:00 2001 From: Mark Shannon Date: Tue, 22 Oct 2024 10:11:29 +0100 Subject: [PATCH 003/127] GH-125521: Remove `if (true)` from generated output to reduce C compiler warnings (GH-125700) --- Lib/test/test_generated_cases.py | 27 +++++++++ Python/generated_cases.c.h | 68 +++++++++++----------- Tools/cases_generator/analyzer.py | 2 +- Tools/cases_generator/generators_common.py | 18 ++++-- 4 files changed, 75 insertions(+), 40 deletions(-) diff --git a/Lib/test/test_generated_cases.py b/Lib/test/test_generated_cases.py index cd3718b80612bd..95813e1e32c7af 100644 --- a/Lib/test/test_generated_cases.py +++ b/Lib/test/test_generated_cases.py @@ -1270,6 +1270,33 @@ def test_push_then_error(self): """ self.run_cases_test(input, output) + def test_error_if_true(self): + + input = """ + inst(OP1, ( --)) { + ERROR_IF(true, here); + } + inst(OP2, ( --)) { + ERROR_IF(1, there); + } + """ + output = """ + TARGET(OP1) { + frame->instr_ptr = next_instr; + next_instr += 1; + INSTRUCTION_STATS(OP1); + goto here; + } + + TARGET(OP2) { + frame->instr_ptr = next_instr; + next_instr += 1; + INSTRUCTION_STATS(OP2); + goto there; + } + """ + self.run_cases_test(input, output) + def test_scalar_array_inconsistency(self): input = """ diff --git a/Python/generated_cases.c.h b/Python/generated_cases.c.h index 388031af87a79f..efbf2fba8c3106 100644 --- a/Python/generated_cases.c.h +++ b/Python/generated_cases.c.h @@ -689,7 +689,7 @@ for (int _i = oparg*2; --_i >= 0;) { PyStackRef_CLOSE(values[_i]); } - if (true) { + { stack_pointer += -oparg*2; assert(WITHIN_STACK_BOUNDS()); goto error; @@ -731,7 +731,7 @@ for (int _i = oparg; --_i >= 0;) { PyStackRef_CLOSE(values[_i]); } - if (true) { + { stack_pointer += -oparg; assert(WITHIN_STACK_BOUNDS()); goto error; @@ -748,7 +748,7 @@ } if (err != 0) { Py_DECREF(set_o); - if (true) { + { stack_pointer += -oparg; assert(WITHIN_STACK_BOUNDS()); goto error; @@ -803,7 +803,7 @@ for (int _i = oparg; --_i >= 0;) { PyStackRef_CLOSE(pieces[_i]); } - if (true) { + { stack_pointer += -oparg; assert(WITHIN_STACK_BOUNDS()); goto error; @@ -945,7 +945,7 @@ for (int i = 0; i < total_args; i++) { PyStackRef_CLOSE(args[i]); } - if (true) { + { stack_pointer += -2 - oparg; assert(WITHIN_STACK_BOUNDS()); goto error; @@ -1343,7 +1343,7 @@ for (int _i = oparg; --_i >= 0;) { PyStackRef_CLOSE(args[_i]); } - if (true) { + { stack_pointer += -2 - oparg; assert(WITHIN_STACK_BOUNDS()); goto error; @@ -1422,7 +1422,7 @@ for (int _i = oparg; --_i >= 0;) { PyStackRef_CLOSE(args[_i]); } - if (true) { + { stack_pointer += -2 - oparg; assert(WITHIN_STACK_BOUNDS()); goto error; @@ -1509,7 +1509,7 @@ for (int _i = oparg; --_i >= 0;) { PyStackRef_CLOSE(args[_i]); } - if (true) { + { stack_pointer += -2 - oparg; assert(WITHIN_STACK_BOUNDS()); goto error; @@ -1971,7 +1971,7 @@ PyStackRef_CLOSE(args[_i]); } PyStackRef_CLOSE(kwnames); - if (true) { + { stack_pointer += -3 - oparg; assert(WITHIN_STACK_BOUNDS()); goto error; @@ -2174,7 +2174,7 @@ PyStackRef_CLOSE(args[_i]); } PyStackRef_CLOSE(kwnames); - if (true) { + { stack_pointer += -3 - oparg; assert(WITHIN_STACK_BOUNDS()); goto error; @@ -2431,7 +2431,7 @@ for (int _i = oparg; --_i >= 0;) { PyStackRef_CLOSE(args[_i]); } - if (true) { + { stack_pointer += -2 - oparg; assert(WITHIN_STACK_BOUNDS()); goto error; @@ -2516,7 +2516,7 @@ for (int _i = oparg; --_i >= 0;) { PyStackRef_CLOSE(args[_i]); } - if (true) { + { stack_pointer += -2 - oparg; assert(WITHIN_STACK_BOUNDS()); goto error; @@ -2749,7 +2749,7 @@ for (int _i = oparg; --_i >= 0;) { PyStackRef_CLOSE(args[_i]); } - if (true) { + { stack_pointer += -2 - oparg; assert(WITHIN_STACK_BOUNDS()); goto error; @@ -3103,7 +3103,7 @@ if (err < 0) { PyStackRef_CLOSE(exc_value_st); PyStackRef_CLOSE(match_type_st); - if (true) goto pop_2_error; + goto pop_2_error; } PyObject *match_o = NULL; PyObject *rest_o = NULL; @@ -3149,7 +3149,7 @@ stack_pointer = _PyFrame_GetStackPointer(frame); if (err < 0) { PyStackRef_CLOSE(right); - if (true) goto pop_1_error; + goto pop_1_error; } _PyFrame_SetStackPointer(frame, stack_pointer); int res = PyErr_GivenExceptionMatches(left_o, right_o); @@ -3583,7 +3583,7 @@ PyTuple_GetItem(_PyFrame_GetCode(frame)->co_localsplusnames, oparg) ); stack_pointer = _PyFrame_GetStackPointer(frame); - if (1) goto error; + goto error; } SETLOCAL(oparg, PyStackRef_NULL); DISPATCH(); @@ -3682,7 +3682,7 @@ _PyEval_FormatKwargsError(tstate, callable_o, update_o); stack_pointer = _PyFrame_GetStackPointer(frame); PyStackRef_CLOSE(update); - if (true) goto pop_1_error; + goto pop_1_error; } PyStackRef_CLOSE(update); stack_pointer += -1; @@ -3715,7 +3715,7 @@ stack_pointer = _PyFrame_GetStackPointer(frame); } PyStackRef_CLOSE(update); - if (true) goto pop_1_error; + goto pop_1_error; } PyStackRef_CLOSE(update); stack_pointer += -1; @@ -4173,7 +4173,7 @@ type->tp_name); stack_pointer = _PyFrame_GetStackPointer(frame); PyStackRef_CLOSE(obj); - if (true) goto pop_1_error; + goto pop_1_error; } _PyFrame_SetStackPointer(frame, stack_pointer); iter_o = (*getter)(obj_o); @@ -4191,7 +4191,7 @@ Py_TYPE(iter_o)->tp_name); stack_pointer = _PyFrame_GetStackPointer(frame); Py_DECREF(iter_o); - if (true) goto error; + goto error; } iter = PyStackRef_FromPyObjectSteal(iter_o); stack_pointer[-1] = iter; @@ -4458,7 +4458,7 @@ for (int i = 0; i < total_args; i++) { PyStackRef_CLOSE(args[i]); } - if (true) { + { stack_pointer += -2 - oparg; assert(WITHIN_STACK_BOUNDS()); goto error; @@ -5210,7 +5210,7 @@ stack_pointer = _PyFrame_GetStackPointer(frame); } PyStackRef_CLOSE(iterable_st); - if (true) goto pop_1_error; + goto pop_1_error; } assert(Py_IsNone(none_val)); PyStackRef_CLOSE(iterable_st); @@ -5866,7 +5866,7 @@ _PyErr_SetString(tstate, PyExc_NameError, "__build_class__ not found"); stack_pointer = _PyFrame_GetStackPointer(frame); - if (true) goto error; + goto error; } bc = PyStackRef_FromPyObjectSteal(bc_o); stack_pointer[0] = bc; @@ -5920,7 +5920,7 @@ _PyFrame_SetStackPointer(frame, stack_pointer); _PyEval_FormatExcUnbound(tstate, _PyFrame_GetCode(frame), oparg); stack_pointer = _PyFrame_GetStackPointer(frame); - if (true) goto error; + goto error; } value = PyStackRef_FromPyObjectSteal(value_o); stack_pointer[0] = value; @@ -5969,7 +5969,7 @@ PyTuple_GetItem(_PyFrame_GetCode(frame)->co_localsplusnames, oparg) ); stack_pointer = _PyFrame_GetStackPointer(frame); - if (1) goto error; + goto error; } value = PyStackRef_DUP(value_s); stack_pointer[0] = value; @@ -6088,7 +6088,7 @@ tstate, PyExc_NameError, NAME_ERROR_MSG, name); stack_pointer = _PyFrame_GetStackPointer(frame); - if (true) goto error; + goto error; } } } @@ -6237,7 +6237,7 @@ _PyErr_SetString(tstate, PyExc_SystemError, "no locals found"); stack_pointer = _PyFrame_GetStackPointer(frame); - if (true) goto error; + goto error; } locals = PyStackRef_FromPyObjectNew(l); stack_pointer[0] = locals; @@ -6288,7 +6288,7 @@ Py_TYPE(owner_o)->tp_name); stack_pointer = _PyFrame_GetStackPointer(frame); } - if (true) goto error; + goto error; } attr = PyStackRef_FromPyObjectSteal(attr_o); self_or_null = self_or_null_o == NULL ? @@ -6348,7 +6348,7 @@ PyStackRef_CLOSE(global_super_st); PyStackRef_CLOSE(class_st); PyStackRef_CLOSE(self_st); - if (true) goto pop_3_error; + goto pop_3_error; } } // we make no attempt to optimize here; specializations should @@ -6466,7 +6466,7 @@ PyStackRef_CLOSE(class_st); if (attr_o == NULL) { PyStackRef_CLOSE(self_st); - if (true) goto pop_3_error; + goto pop_3_error; } if (method_found) { self_or_null = self_st; // transfer ownership @@ -6838,7 +6838,7 @@ stack_pointer = _PyFrame_GetStackPointer(frame); goto exception_unwind; } - if (true) goto error; + goto error; } TARGET(RERAISE) { @@ -7130,7 +7130,7 @@ } else { PyStackRef_CLOSE(v); - if (true) goto pop_1_error; + goto pop_1_error; } } PyStackRef_CLOSE(v); @@ -7202,7 +7202,7 @@ _PyErr_Format(tstate, PyExc_SystemError, "no locals found when setting up annotations"); stack_pointer = _PyFrame_GetStackPointer(frame); - if (true) goto error; + goto error; } /* check if __annotations__ in locals()... */ _PyFrame_SetStackPointer(frame, stack_pointer); @@ -7559,7 +7559,7 @@ "no locals found when storing %R", name); stack_pointer = _PyFrame_GetStackPointer(frame); PyStackRef_CLOSE(v); - if (true) goto pop_1_error; + goto pop_1_error; } if (PyDict_CheckExact(ns)) { _PyFrame_SetStackPointer(frame, stack_pointer); diff --git a/Tools/cases_generator/analyzer.py b/Tools/cases_generator/analyzer.py index 381ad3a4e2082c..f41a8d161099df 100644 --- a/Tools/cases_generator/analyzer.py +++ b/Tools/cases_generator/analyzer.py @@ -744,7 +744,7 @@ def always_exits(op: parser.InstDef) -> bool: if tkn.text == "DEOPT_IF" or tkn.text == "ERROR_IF": next(tkn_iter) # '(' t = next(tkn_iter) - if t.text == "true": + if t.text in ("true", "1"): return True return False diff --git a/Tools/cases_generator/generators_common.py b/Tools/cases_generator/generators_common.py index 7e032c21d2485c..3b158f5ac4eb48 100644 --- a/Tools/cases_generator/generators_common.py +++ b/Tools/cases_generator/generators_common.py @@ -165,16 +165,24 @@ def error_if( storage: Storage, inst: Instruction | None, ) -> bool: - self.out.emit_at("if ", tkn) lparen = next(tkn_iter) - self.emit(lparen) assert lparen.kind == "LPAREN" first_tkn = tkn_iter.peek() - emit_to(self.out, tkn_iter, "COMMA") + unconditional = always_true(first_tkn) + if unconditional: + next(tkn_iter) + comma = next(tkn_iter) + if comma.kind != "COMMA": + raise analysis_error(f"Expected comma, got '{comma.text}'", comma) + self.out.start_line() + else: + self.out.emit_at("if ", tkn) + self.emit(lparen) + emit_to(self.out, tkn_iter, "COMMA") + self.out.emit(") ") label = next(tkn_iter).text next(tkn_iter) # RPAREN next(tkn_iter) # Semi colon - self.out.emit(") ") storage.clear_inputs("at ERROR_IF") c_offset = storage.stack.peek_offset() try: @@ -196,7 +204,7 @@ def error_if( self.out.emit(label) self.out.emit(";\n") self.out.emit("}\n") - return not always_true(first_tkn) + return not unconditional def error_no_pop( self, From 759a54d28ffe7eac8c23917f5d3dfad8309856be Mon Sep 17 00:00:00 2001 From: Serhiy Storchaka Date: Tue, 22 Oct 2024 13:57:25 +0300 Subject: [PATCH 004/127] gh-125355: Rewrite parse_intermixed_args() in argparse (GH-125356) * The parser no longer changes temporarily during parsing. * Default values are not processed twice. * Required mutually exclusive groups containing positional arguments are now supported. * The missing arguments report now includes the names of all required optional and positional arguments. * Unknown options can be intermixed with positional arguments in parse_known_intermixed_args(). --- Lib/argparse.py | 99 +++++++------------ Lib/test/test_argparse.py | 56 +++++++---- ...-10-22-13-28-00.gh-issue-125355.zssHm_.rst | 7 ++ 3 files changed, 80 insertions(+), 82 deletions(-) create mode 100644 Misc/NEWS.d/next/Library/2024-10-22-13-28-00.gh-issue-125355.zssHm_.rst diff --git a/Lib/argparse.py b/Lib/argparse.py index 49271a146c7282..024622bec17c3b 100644 --- a/Lib/argparse.py +++ b/Lib/argparse.py @@ -1924,6 +1924,9 @@ def parse_args(self, args=None, namespace=None): return args def parse_known_args(self, args=None, namespace=None): + return self._parse_known_args2(args, namespace, intermixed=False) + + def _parse_known_args2(self, args, namespace, intermixed): if args is None: # args default to the system args args = _sys.argv[1:] @@ -1950,18 +1953,18 @@ def parse_known_args(self, args=None, namespace=None): # parse the arguments and exit if there are any errors if self.exit_on_error: try: - namespace, args = self._parse_known_args(args, namespace) + namespace, args = self._parse_known_args(args, namespace, intermixed) except ArgumentError as err: self.error(str(err)) else: - namespace, args = self._parse_known_args(args, namespace) + namespace, args = self._parse_known_args(args, namespace, intermixed) if hasattr(namespace, _UNRECOGNIZED_ARGS_ATTR): args.extend(getattr(namespace, _UNRECOGNIZED_ARGS_ATTR)) delattr(namespace, _UNRECOGNIZED_ARGS_ATTR) return namespace, args - def _parse_known_args(self, arg_strings, namespace): + def _parse_known_args(self, arg_strings, namespace, intermixed): # replace arg strings that are file references if self.fromfile_prefix_chars is not None: arg_strings = self._read_args_from_files(arg_strings) @@ -2052,6 +2055,7 @@ def consume_optional(start_index): # if we found no optional action, skip it if action is None: extras.append(arg_strings[start_index]) + extras_pattern.append('O') return start_index + 1 # if there is an explicit argument, try to match the @@ -2087,6 +2091,7 @@ def consume_optional(start_index): sep = '' else: extras.append(char + explicit_arg) + extras_pattern.append('O') stop = start_index + 1 break # if the action expect exactly one argument, we've @@ -2165,6 +2170,7 @@ def consume_positionals(start_index): # consume Positionals and Optionals alternately, until we have # passed the last option string extras = [] + extras_pattern = [] start_index = 0 if option_string_indices: max_option_string_index = max(option_string_indices) @@ -2178,7 +2184,7 @@ def consume_positionals(start_index): if next_option_string_index in option_string_indices: break next_option_string_index += 1 - if start_index != next_option_string_index: + if not intermixed and start_index != next_option_string_index: positionals_end_index = consume_positionals(start_index) # only try to parse the next optional if we didn't consume @@ -2194,16 +2200,35 @@ def consume_positionals(start_index): if start_index not in option_string_indices: strings = arg_strings[start_index:next_option_string_index] extras.extend(strings) + extras_pattern.extend(arg_strings_pattern[start_index:next_option_string_index]) start_index = next_option_string_index # consume the next optional and any arguments for it start_index = consume_optional(start_index) - # consume any positionals following the last Optional - stop_index = consume_positionals(start_index) + if not intermixed: + # consume any positionals following the last Optional + stop_index = consume_positionals(start_index) - # if we didn't consume all the argument strings, there were extras - extras.extend(arg_strings[stop_index:]) + # if we didn't consume all the argument strings, there were extras + extras.extend(arg_strings[stop_index:]) + else: + extras.extend(arg_strings[start_index:]) + extras_pattern.extend(arg_strings_pattern[start_index:]) + extras_pattern = ''.join(extras_pattern) + assert len(extras_pattern) == len(extras) + # consume all positionals + arg_strings = [s for s, c in zip(extras, extras_pattern) if c != 'O'] + arg_strings_pattern = extras_pattern.replace('O', '') + stop_index = consume_positionals(0) + # leave unknown optionals and non-consumed positionals in extras + for i, c in enumerate(extras_pattern): + if not stop_index: + break + if c != 'O': + stop_index -= 1 + extras[i] = None + extras = [s for s in extras if s is not None] # make sure all required actions were present and also convert # action defaults which were not given as arguments @@ -2469,10 +2494,6 @@ def parse_known_intermixed_args(self, args=None, namespace=None): # are then parsed. If the parser definition is incompatible with the # intermixed assumptions (e.g. use of REMAINDER, subparsers) a # TypeError is raised. - # - # positionals are 'deactivated' by setting nargs and default to - # SUPPRESS. This blocks the addition of that positional to the - # namespace positionals = self._get_positional_actions() a = [action for action in positionals @@ -2481,59 +2502,7 @@ def parse_known_intermixed_args(self, args=None, namespace=None): raise TypeError('parse_intermixed_args: positional arg' ' with nargs=%s'%a[0].nargs) - if [action.dest for group in self._mutually_exclusive_groups - for action in group._group_actions if action in positionals]: - raise TypeError('parse_intermixed_args: positional in' - ' mutuallyExclusiveGroup') - - try: - save_usage = self.usage - try: - if self.usage is None: - # capture the full usage for use in error messages - self.usage = self.format_usage()[7:] - for action in positionals: - # deactivate positionals - action.save_nargs = action.nargs - # action.nargs = 0 - action.nargs = SUPPRESS - action.save_default = action.default - action.default = SUPPRESS - namespace, remaining_args = self.parse_known_args(args, - namespace) - for action in positionals: - # remove the empty positional values from namespace - if (hasattr(namespace, action.dest) - and getattr(namespace, action.dest)==[]): - from warnings import warn - warn('Do not expect %s in %s' % (action.dest, namespace)) - delattr(namespace, action.dest) - finally: - # restore nargs and usage before exiting - for action in positionals: - action.nargs = action.save_nargs - action.default = action.save_default - optionals = self._get_optional_actions() - try: - # parse positionals. optionals aren't normally required, but - # they could be, so make sure they aren't. - for action in optionals: - action.save_required = action.required - action.required = False - for group in self._mutually_exclusive_groups: - group.save_required = group.required - group.required = False - namespace, extras = self.parse_known_args(remaining_args, - namespace) - finally: - # restore parser values before exiting - for action in optionals: - action.required = action.save_required - for group in self._mutually_exclusive_groups: - group.required = group.save_required - finally: - self.usage = save_usage - return namespace, extras + return self._parse_known_args2(args, namespace, intermixed=True) # ======================== # Value conversion methods diff --git a/Lib/test/test_argparse.py b/Lib/test/test_argparse.py index 4fa669718abc50..4bd7a935b9b757 100644 --- a/Lib/test/test_argparse.py +++ b/Lib/test/test_argparse.py @@ -6412,12 +6412,23 @@ def test_basic(self): # cannot parse the '1,2,3' self.assertEqual(NS(bar='y', cmd='cmd', foo='x', rest=[1]), args) self.assertEqual(["2", "3"], extras) + args, extras = parser.parse_known_intermixed_args(argv) + self.assertEqual(NS(bar='y', cmd='cmd', foo='x', rest=[1, 2, 3]), args) + self.assertEqual([], extras) + # unknown optionals go into extras + argv = 'cmd --foo x --error 1 2 --bar y 3'.split() + args, extras = parser.parse_known_intermixed_args(argv) + self.assertEqual(NS(bar='y', cmd='cmd', foo='x', rest=[1, 2, 3]), args) + self.assertEqual(['--error'], extras) argv = 'cmd --foo x 1 --error 2 --bar y 3'.split() args, extras = parser.parse_known_intermixed_args(argv) - # unknown optionals go into extras - self.assertEqual(NS(bar='y', cmd='cmd', foo='x', rest=[1]), args) - self.assertEqual(['--error', '2', '3'], extras) + self.assertEqual(NS(bar='y', cmd='cmd', foo='x', rest=[1, 2, 3]), args) + self.assertEqual(['--error'], extras) + argv = 'cmd --foo x 1 2 --error --bar y 3'.split() + args, extras = parser.parse_known_intermixed_args(argv) + self.assertEqual(NS(bar='y', cmd='cmd', foo='x', rest=[1, 2, 3]), args) + self.assertEqual(['--error'], extras) # restores attributes that were temporarily changed self.assertIsNone(parser.usage) @@ -6436,37 +6447,48 @@ def test_remainder(self): parser.parse_intermixed_args(argv) self.assertRegex(str(cm.exception), r'\.\.\.') - def test_exclusive(self): - # mutually exclusive group; intermixed works fine - parser = ErrorRaisingArgumentParser(prog='PROG') + def test_required_exclusive(self): + # required mutually exclusive group; intermixed works fine + parser = argparse.ArgumentParser(prog='PROG', exit_on_error=False) group = parser.add_mutually_exclusive_group(required=True) group.add_argument('--foo', action='store_true', help='FOO') group.add_argument('--spam', help='SPAM') parser.add_argument('badger', nargs='*', default='X', help='BADGER') + args = parser.parse_intermixed_args('--foo 1 2'.split()) + self.assertEqual(NS(badger=['1', '2'], foo=True, spam=None), args) args = parser.parse_intermixed_args('1 --foo 2'.split()) self.assertEqual(NS(badger=['1', '2'], foo=True, spam=None), args) - self.assertRaises(ArgumentParserError, parser.parse_intermixed_args, '1 2'.split()) + self.assertRaisesRegex(argparse.ArgumentError, + 'one of the arguments --foo --spam is required', + parser.parse_intermixed_args, '1 2'.split()) self.assertEqual(group.required, True) - def test_exclusive_incompatible(self): - # mutually exclusive group including positional - fail - parser = ErrorRaisingArgumentParser(prog='PROG') + def test_required_exclusive_with_positional(self): + # required mutually exclusive group with positional argument + parser = argparse.ArgumentParser(prog='PROG', exit_on_error=False) group = parser.add_mutually_exclusive_group(required=True) group.add_argument('--foo', action='store_true', help='FOO') group.add_argument('--spam', help='SPAM') group.add_argument('badger', nargs='*', default='X', help='BADGER') - self.assertRaises(TypeError, parser.parse_intermixed_args, []) + args = parser.parse_intermixed_args(['--foo']) + self.assertEqual(NS(foo=True, spam=None, badger='X'), args) + args = parser.parse_intermixed_args(['a', 'b']) + self.assertEqual(NS(foo=False, spam=None, badger=['a', 'b']), args) + self.assertRaisesRegex(argparse.ArgumentError, + 'one of the arguments --foo --spam badger is required', + parser.parse_intermixed_args, []) + self.assertRaisesRegex(argparse.ArgumentError, + 'argument badger: not allowed with argument --foo', + parser.parse_intermixed_args, ['--foo', 'a', 'b']) + self.assertRaisesRegex(argparse.ArgumentError, + 'argument badger: not allowed with argument --foo', + parser.parse_intermixed_args, ['a', '--foo', 'b']) self.assertEqual(group.required, True) def test_invalid_args(self): parser = ErrorRaisingArgumentParser(prog='PROG') self.assertRaises(ArgumentParserError, parser.parse_intermixed_args, ['a']) - parser = ErrorRaisingArgumentParser(prog='PROG') - parser.add_argument('--foo', nargs="*") - parser.add_argument('foo') - with self.assertWarns(UserWarning): - parser.parse_intermixed_args(['hello', '--foo']) class TestIntermixedMessageContentError(TestCase): # case where Intermixed gives different error message @@ -6485,7 +6507,7 @@ def test_missing_argument_name_in_message(self): with self.assertRaises(ArgumentParserError) as cm: parser.parse_intermixed_args([]) msg = str(cm.exception) - self.assertNotRegex(msg, 'req_pos') + self.assertRegex(msg, 'req_pos') self.assertRegex(msg, 'req_opt') # ========================== diff --git a/Misc/NEWS.d/next/Library/2024-10-22-13-28-00.gh-issue-125355.zssHm_.rst b/Misc/NEWS.d/next/Library/2024-10-22-13-28-00.gh-issue-125355.zssHm_.rst new file mode 100644 index 00000000000000..fd67f697641d92 --- /dev/null +++ b/Misc/NEWS.d/next/Library/2024-10-22-13-28-00.gh-issue-125355.zssHm_.rst @@ -0,0 +1,7 @@ +Fix several bugs in :meth:`argparse.ArgumentParser.parse_intermixed_args`. + +* The parser no longer changes temporarily during parsing. +* Default values are not processed twice. +* Required mutually exclusive groups containing positional arguments are now supported. +* The missing arguments report now includes the names of all required optional and positional arguments. +* Unknown options can be intermixed with positional arguments in parse_known_intermixed_args(). From 91ddde4af0c3031c84a967bcf59f6fb4f8a48c0d Mon Sep 17 00:00:00 2001 From: Adam Turner <9087854+AA-Turner@users.noreply.github.com> Date: Tue, 22 Oct 2024 14:07:09 +0100 Subject: [PATCH 005/127] Doc: Show object descriptions in the table of contents (#125757) --- Doc/conf.py | 3 ++- Doc/tools/extensions/pyspecific.py | 1 + Doc/tools/static/sidebar-wrap.css | 6 ++++++ 3 files changed, 9 insertions(+), 1 deletion(-) create mode 100644 Doc/tools/static/sidebar-wrap.css diff --git a/Doc/conf.py b/Doc/conf.py index db8fb9a9a68c6b..7ee3c91581345d 100644 --- a/Doc/conf.py +++ b/Doc/conf.py @@ -94,7 +94,8 @@ # Create table of contents entries for domain objects (e.g. functions, classes, # attributes, etc.). Default is True. -toc_object_entries = False +toc_object_entries = True +toc_object_entries_show_parents = 'hide' # Ignore any .rst files in the includes/ directory; # they're embedded in pages but not rendered individually. diff --git a/Doc/tools/extensions/pyspecific.py b/Doc/tools/extensions/pyspecific.py index bcb8a421e32d09..f4df7ec0839339 100644 --- a/Doc/tools/extensions/pyspecific.py +++ b/Doc/tools/extensions/pyspecific.py @@ -434,5 +434,6 @@ def setup(app): app.add_directive_to_domain('py', 'awaitablemethod', PyAwaitableMethod) app.add_directive_to_domain('py', 'abstractmethod', PyAbstractMethod) app.add_directive('miscnews', MiscNews) + app.add_css_file('sidebar-wrap.css') app.connect('env-check-consistency', patch_pairindextypes) return {'version': '1.0', 'parallel_read_safe': True} diff --git a/Doc/tools/static/sidebar-wrap.css b/Doc/tools/static/sidebar-wrap.css new file mode 100644 index 00000000000000..0a80f516f28349 --- /dev/null +++ b/Doc/tools/static/sidebar-wrap.css @@ -0,0 +1,6 @@ +div.sphinxsidebarwrapper { + overflow-x: scroll; +} +div.sphinxsidebarwrapper li code { + overflow-wrap: normal; +} From 079875e39589eb0628b5883f7ffa387e7476ec06 Mon Sep 17 00:00:00 2001 From: Mikhail Efimov Date: Tue, 22 Oct 2024 19:00:25 +0300 Subject: [PATCH 006/127] gh-125038: Fix crash after genexpr.gi_frame.f_locals manipulations (#125178) --- Lib/test/test_dis.py | 1 + Lib/test/test_generators.py | 73 +++++++++++++++++++ ...-10-09-13-53-50.gh-issue-125038.ffSLCz.rst | 2 + Python/codegen.c | 1 + 4 files changed, 77 insertions(+) create mode 100644 Misc/NEWS.d/next/Core_and_Builtins/2024-10-09-13-53-50.gh-issue-125038.ffSLCz.rst diff --git a/Lib/test/test_dis.py b/Lib/test/test_dis.py index 1ee0fbe98914be..1f9c04cdbc926c 100644 --- a/Lib/test/test_dis.py +++ b/Lib/test/test_dis.py @@ -810,6 +810,7 @@ def foo(x): POP_TOP L1: RESUME 0 LOAD_FAST 0 (.0) + GET_ITER L2: FOR_ITER 10 (to L3) STORE_FAST 1 (z) LOAD_DEREF 2 (x) diff --git a/Lib/test/test_generators.py b/Lib/test/test_generators.py index 03a31ec6a05726..bf2cb1160723b0 100644 --- a/Lib/test/test_generators.py +++ b/Lib/test/test_generators.py @@ -268,6 +268,79 @@ def loop(): #This should not raise loop() + +class ModifyUnderlyingIterableTest(unittest.TestCase): + iterables = [ + range(0), + range(20), + [1, 2, 3], + (2,), + {13, 48, 211}, + frozenset((15, 8, 6)), + {1: 2, 3: 4}, + ] + + non_iterables = [ + None, + 42, + 3.0, + 2j, + ] + + def genexpr(self): + return (x for x in range(10)) + + def genfunc(self): + def gen(it): + for x in it: + yield x + return gen(range(10)) + + def process_tests(self, get_generator): + for obj in self.iterables: + g_obj = get_generator(obj) + with self.subTest(g_obj=g_obj, obj=obj): + self.assertListEqual(list(g_obj), list(obj)) + + g_iter = get_generator(iter(obj)) + with self.subTest(g_iter=g_iter, obj=obj): + self.assertListEqual(list(g_iter), list(obj)) + + err_regex = "'.*' object is not iterable" + for obj in self.non_iterables: + g_obj = get_generator(obj) + with self.subTest(g_obj=g_obj): + self.assertRaisesRegex(TypeError, err_regex, list, g_obj) + + def test_modify_f_locals(self): + def modify_f_locals(g, local, obj): + g.gi_frame.f_locals[local] = obj + return g + + def get_generator_genexpr(obj): + return modify_f_locals(self.genexpr(), '.0', obj) + + def get_generator_genfunc(obj): + return modify_f_locals(self.genfunc(), 'it', obj) + + self.process_tests(get_generator_genexpr) + self.process_tests(get_generator_genfunc) + + def test_new_gen_from_gi_code(self): + def new_gen_from_gi_code(g, obj): + generator_func = types.FunctionType(g.gi_code, {}) + return generator_func(obj) + + def get_generator_genexpr(obj): + return new_gen_from_gi_code(self.genexpr(), obj) + + def get_generator_genfunc(obj): + return new_gen_from_gi_code(self.genfunc(), obj) + + self.process_tests(get_generator_genexpr) + self.process_tests(get_generator_genfunc) + + class ExceptionTest(unittest.TestCase): # Tests for the issue #23353: check that the currently handled exception # is correctly saved/restored in PyEval_EvalFrameEx(). diff --git a/Misc/NEWS.d/next/Core_and_Builtins/2024-10-09-13-53-50.gh-issue-125038.ffSLCz.rst b/Misc/NEWS.d/next/Core_and_Builtins/2024-10-09-13-53-50.gh-issue-125038.ffSLCz.rst new file mode 100644 index 00000000000000..15de48ec0e4450 --- /dev/null +++ b/Misc/NEWS.d/next/Core_and_Builtins/2024-10-09-13-53-50.gh-issue-125038.ffSLCz.rst @@ -0,0 +1,2 @@ +Fix crash when iterating over a generator expression after direct changes on ``gi_frame.f_locals``. +Patch by Mikhail Efimov. diff --git a/Python/codegen.c b/Python/codegen.c index 689d2b5124e9d3..bfacc6f0c55593 100644 --- a/Python/codegen.c +++ b/Python/codegen.c @@ -4164,6 +4164,7 @@ codegen_sync_comprehension_generator(compiler *c, location loc, if (IS_JUMP_TARGET_LABEL(start)) { depth++; + ADDOP(c, LOC(gen->iter), GET_ITER); USE_LABEL(c, start); ADDOP_JUMP(c, LOC(gen->iter), FOR_ITER, anchor); } From aaed91cabcedc16c089c4b1c9abb1114659a83d3 Mon Sep 17 00:00:00 2001 From: Ethan Furman Date: Tue, 22 Oct 2024 11:04:00 -0700 Subject: [PATCH 007/127] gh-125710: [Enum] fix hashable<->nonhashable comparisons for member values (GH-125735) --- Lib/enum.py | 26 ++++++++++++++----- Lib/test/test_enum.py | 7 +++++ ...-10-19-13-37-37.gh-issue-125710.FyFAAr.rst | 1 + 3 files changed, 28 insertions(+), 6 deletions(-) create mode 100644 Misc/NEWS.d/next/Library/2024-10-19-13-37-37.gh-issue-125710.FyFAAr.rst diff --git a/Lib/enum.py b/Lib/enum.py index 17d72738792982..4f9912229603a6 100644 --- a/Lib/enum.py +++ b/Lib/enum.py @@ -327,6 +327,8 @@ def __set_name__(self, enum_class, member_name): # to the map, and by-value lookups for this value will be # linear. enum_class._value2member_map_.setdefault(value, enum_member) + if value not in enum_class._hashable_values_: + enum_class._hashable_values_.append(value) except TypeError: # keep track of the value in a list so containment checks are quick enum_class._unhashable_values_.append(value) @@ -538,7 +540,8 @@ def __new__(metacls, cls, bases, classdict, *, boundary=None, _simple=False, **k classdict['_member_names_'] = [] classdict['_member_map_'] = {} classdict['_value2member_map_'] = {} - classdict['_unhashable_values_'] = [] + classdict['_hashable_values_'] = [] # for comparing with non-hashable types + classdict['_unhashable_values_'] = [] # e.g. frozenset() with set() classdict['_unhashable_values_map_'] = {} classdict['_member_type_'] = member_type # now set the __repr__ for the value @@ -748,7 +751,10 @@ def __contains__(cls, value): try: return value in cls._value2member_map_ except TypeError: - return value in cls._unhashable_values_ + return ( + value in cls._unhashable_values_ # both structures are lists + or value in cls._hashable_values_ + ) def __delattr__(cls, attr): # nicer error message when someone tries to delete an attribute @@ -1166,8 +1172,11 @@ def __new__(cls, value): pass except TypeError: # not there, now do long search -- O(n) behavior - for name, values in cls._unhashable_values_map_.items(): - if value in values: + for name, unhashable_values in cls._unhashable_values_map_.items(): + if value in unhashable_values: + return cls[name] + for name, member in cls._member_map_.items(): + if value == member._value_: return cls[name] # still not found -- verify that members exist, in-case somebody got here mistakenly # (such as via super when trying to override __new__) @@ -1233,6 +1242,7 @@ def _add_value_alias_(self, value): # to the map, and by-value lookups for this value will be # linear. cls._value2member_map_.setdefault(value, self) + cls._hashable_values_.append(value) except TypeError: # keep track of the value in a list so containment checks are quick cls._unhashable_values_.append(value) @@ -1763,6 +1773,7 @@ def convert_class(cls): body['_member_names_'] = member_names = [] body['_member_map_'] = member_map = {} body['_value2member_map_'] = value2member_map = {} + body['_hashable_values_'] = hashable_values = [] body['_unhashable_values_'] = unhashable_values = [] body['_unhashable_values_map_'] = {} body['_member_type_'] = member_type = etype._member_type_ @@ -1826,7 +1837,7 @@ def convert_class(cls): contained = value2member_map.get(member._value_) except TypeError: contained = None - if member._value_ in unhashable_values: + if member._value_ in unhashable_values or member.value in hashable_values: for m in enum_class: if m._value_ == member._value_: contained = m @@ -1846,6 +1857,7 @@ def convert_class(cls): else: enum_class._add_member_(name, member) value2member_map[value] = member + hashable_values.append(value) if _is_single_bit(value): # not a multi-bit alias, record in _member_names_ and _flag_mask_ member_names.append(name) @@ -1882,7 +1894,7 @@ def convert_class(cls): contained = value2member_map.get(member._value_) except TypeError: contained = None - if member._value_ in unhashable_values: + if member._value_ in unhashable_values or member._value_ in hashable_values: for m in enum_class: if m._value_ == member._value_: contained = m @@ -1908,6 +1920,8 @@ def convert_class(cls): # to the map, and by-value lookups for this value will be # linear. enum_class._value2member_map_.setdefault(value, member) + if value not in hashable_values: + hashable_values.append(value) except TypeError: # keep track of the value in a list so containment checks are quick enum_class._unhashable_values_.append(value) diff --git a/Lib/test/test_enum.py b/Lib/test/test_enum.py index 5b4a8070526fcf..7184769bfd6fc3 100644 --- a/Lib/test/test_enum.py +++ b/Lib/test/test_enum.py @@ -3460,6 +3460,13 @@ def test_empty_names(self): self.assertRaisesRegex(TypeError, '.int. object is not iterable', Enum, 'bad_enum', names=0) self.assertRaisesRegex(TypeError, '.int. object is not iterable', Enum, 'bad_enum', 0, type=int) + def test_nonhashable_matches_hashable(self): # issue 125710 + class Directions(Enum): + DOWN_ONLY = frozenset({"sc"}) + UP_ONLY = frozenset({"cs"}) + UNRESTRICTED = frozenset({"sc", "cs"}) + self.assertIs(Directions({"sc"}), Directions.DOWN_ONLY) + class TestOrder(unittest.TestCase): "test usage of the `_order_` attribute" diff --git a/Misc/NEWS.d/next/Library/2024-10-19-13-37-37.gh-issue-125710.FyFAAr.rst b/Misc/NEWS.d/next/Library/2024-10-19-13-37-37.gh-issue-125710.FyFAAr.rst new file mode 100644 index 00000000000000..8d5220e9889c3a --- /dev/null +++ b/Misc/NEWS.d/next/Library/2024-10-19-13-37-37.gh-issue-125710.FyFAAr.rst @@ -0,0 +1 @@ +[Enum] fix hashable<->nonhashable comparisons for member values From 34653bba644aa5481613f398153757d7357e39ea Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Mario=20=C5=A0a=C5=A1ko?= Date: Tue, 22 Oct 2024 22:42:22 +0200 Subject: [PATCH 008/127] gh-125259: Fix error notes removal in enum initialization (GH-125647) --- Lib/enum.py | 16 +++++----------- Lib/test/test_enum.py | 19 +++++++++++++++++++ ...-10-17-16-10-29.gh-issue-125259.oMew0c.rst | 1 + 3 files changed, 25 insertions(+), 11 deletions(-) create mode 100644 Misc/NEWS.d/next/Library/2024-10-17-16-10-29.gh-issue-125259.oMew0c.rst diff --git a/Lib/enum.py b/Lib/enum.py index 4f9912229603a6..27be3fb83b2afb 100644 --- a/Lib/enum.py +++ b/Lib/enum.py @@ -557,22 +557,16 @@ def __new__(metacls, cls, bases, classdict, *, boundary=None, _simple=False, **k classdict['_all_bits_'] = 0 classdict['_inverted_'] = None try: - exc = None classdict['_%s__in_progress' % cls] = True enum_class = super().__new__(metacls, cls, bases, classdict, **kwds) classdict['_%s__in_progress' % cls] = False delattr(enum_class, '_%s__in_progress' % cls) except Exception as e: - # since 3.12 the line "Error calling __set_name__ on '_proto_member' instance ..." - # is tacked on to the error instead of raising a RuntimeError - # recreate the exception to discard - exc = type(e)(str(e)) - exc.__cause__ = e.__cause__ - exc.__context__ = e.__context__ - tb = e.__traceback__ - if exc is not None: - raise exc.with_traceback(tb) - # + # since 3.12 the note "Error calling __set_name__ on '_proto_member' instance ..." + # is tacked on to the error instead of raising a RuntimeError, so discard it + if hasattr(e, '__notes__'): + del e.__notes__ + raise # update classdict with any changes made by __init_subclass__ classdict.update(enum_class.__dict__) # diff --git a/Lib/test/test_enum.py b/Lib/test/test_enum.py index 7184769bfd6fc3..b9e13fb8c3585e 100644 --- a/Lib/test/test_enum.py +++ b/Lib/test/test_enum.py @@ -1888,6 +1888,25 @@ def test_wrong_inheritance_order(self): class Wrong(Enum, str): NotHere = 'error before this point' + def test_raise_custom_error_on_creation(self): + class InvalidRgbColorError(ValueError): + def __init__(self, r, g, b): + self.r = r + self.g = g + self.b = b + super().__init__(f'({r}, {g}, {b}) is not a valid RGB color') + + with self.assertRaises(InvalidRgbColorError): + class RgbColor(Enum): + RED = (255, 0, 0) + GREEN = (0, 255, 0) + BLUE = (0, 0, 255) + INVALID = (256, 0, 0) + + def __init__(self, r, g, b): + if not all(0 <= val <= 255 for val in (r, g, b)): + raise InvalidRgbColorError(r, g, b) + def test_intenum_transitivity(self): class number(IntEnum): one = 1 diff --git a/Misc/NEWS.d/next/Library/2024-10-17-16-10-29.gh-issue-125259.oMew0c.rst b/Misc/NEWS.d/next/Library/2024-10-17-16-10-29.gh-issue-125259.oMew0c.rst new file mode 100644 index 00000000000000..4fa6330abea512 --- /dev/null +++ b/Misc/NEWS.d/next/Library/2024-10-17-16-10-29.gh-issue-125259.oMew0c.rst @@ -0,0 +1 @@ +Fix the notes removal logic for errors thrown in enum initialization. From c75ff2ef8eb71d91b1f92db9c2bc7ff18c582ab1 Mon Sep 17 00:00:00 2001 From: Jacob Walls Date: Wed, 23 Oct 2024 00:41:33 -0400 Subject: [PATCH 009/127] gh-80958: unittest: discovery support for namespace packages as start directory (#123820) --- Doc/library/unittest.rst | 35 +++++------ Doc/whatsnew/3.14.rst | 9 +++ .../namespace_test_pkg/bar/__init__.py | 0 .../namespace_test_pkg/bar/test_bar.py | 5 ++ .../namespace_test_pkg/noop/no2/__init__.py | 0 .../namespace_test_pkg/noop/no2/test_no2.py | 5 ++ .../namespace_test_pkg/noop/test_noop.py | 5 ++ .../namespace_test_pkg/test_foo.py | 5 ++ Lib/test/test_unittest/test_discovery.py | 54 ++++++++++++++++- Lib/unittest/loader.py | 59 ++++++++++++++----- Makefile.pre.in | 4 ++ ...4-09-07-13-57-49.gh-issue-80958.fVYnqV.rst | 1 + 12 files changed, 145 insertions(+), 37 deletions(-) create mode 100644 Lib/test/test_unittest/namespace_test_pkg/bar/__init__.py create mode 100644 Lib/test/test_unittest/namespace_test_pkg/bar/test_bar.py create mode 100644 Lib/test/test_unittest/namespace_test_pkg/noop/no2/__init__.py create mode 100644 Lib/test/test_unittest/namespace_test_pkg/noop/no2/test_no2.py create mode 100644 Lib/test/test_unittest/namespace_test_pkg/noop/test_noop.py create mode 100644 Lib/test/test_unittest/namespace_test_pkg/test_foo.py create mode 100644 Misc/NEWS.d/next/Library/2024-09-07-13-57-49.gh-issue-80958.fVYnqV.rst diff --git a/Doc/library/unittest.rst b/Doc/library/unittest.rst index c49aba69b12126..38bad9405597dd 100644 --- a/Doc/library/unittest.rst +++ b/Doc/library/unittest.rst @@ -340,28 +340,21 @@ Test modules and packages can customize test loading and discovery by through the `load_tests protocol`_. .. versionchanged:: 3.4 - Test discovery supports :term:`namespace packages ` - for the start directory. Note that you need to specify the top level - directory too (e.g. - ``python -m unittest discover -s root/namespace -t root``). + Test discovery supports :term:`namespace packages `. .. versionchanged:: 3.11 - :mod:`unittest` dropped the :term:`namespace packages ` - support in Python 3.11. It has been broken since Python 3.7. Start directory and - subdirectories containing tests must be regular package that have - ``__init__.py`` file. + Test discovery dropped the :term:`namespace packages ` + support. It has been broken since Python 3.7. + Start directory and its subdirectories containing tests must be regular + package that have ``__init__.py`` file. - Directories containing start directory still can be a namespace package. - In this case, you need to specify start directory as dotted package name, - and target directory explicitly. For example:: + If the start directory is the dotted name of the package, the ancestor packages + can be namespace packages. - # proj/ <-- current directory - # namespace/ - # mypkg/ - # __init__.py - # test_mypkg.py - - python -m unittest discover -s namespace.mypkg -t . +.. versionchanged:: 3.14 + Test discovery supports :term:`namespace package` as start directory again. + To avoid scanning directories unrelated to Python, + tests are not searched in subdirectories that do not contain ``__init__.py``. .. _organizing-tests: @@ -1915,10 +1908,8 @@ Loading and running tests Modules that raise :exc:`SkipTest` on import are recorded as skips, not errors. - .. versionchanged:: 3.4 *start_dir* can be a :term:`namespace packages `. - .. versionchanged:: 3.4 Paths are sorted before being imported so that execution order is the same even if the underlying file system's ordering is not dependent on file name. @@ -1930,11 +1921,13 @@ Loading and running tests .. versionchanged:: 3.11 *start_dir* can not be a :term:`namespace packages `. - It has been broken since Python 3.7 and Python 3.11 officially remove it. + It has been broken since Python 3.7, and Python 3.11 officially removes it. .. versionchanged:: 3.13 *top_level_dir* is only stored for the duration of *discover* call. + .. versionchanged:: 3.14 + *start_dir* can once again be a :term:`namespace package`. The following attributes of a :class:`TestLoader` can be configured either by subclassing or assignment on an instance: diff --git a/Doc/whatsnew/3.14.rst b/Doc/whatsnew/3.14.rst index d52faa614db94e..1dd6c19018934b 100644 --- a/Doc/whatsnew/3.14.rst +++ b/Doc/whatsnew/3.14.rst @@ -421,6 +421,15 @@ unicodedata * The Unicode database has been updated to Unicode 16.0.0. + +unittest +-------- + +* unittest discovery supports :term:`namespace package` as start + directory again. It was removed in Python 3.11. + (Contributed by Jacob Walls in :gh:`80958`.) + + .. Add improved modules above alphabetically, not here at the end. Optimizations diff --git a/Lib/test/test_unittest/namespace_test_pkg/bar/__init__.py b/Lib/test/test_unittest/namespace_test_pkg/bar/__init__.py new file mode 100644 index 00000000000000..e69de29bb2d1d6 diff --git a/Lib/test/test_unittest/namespace_test_pkg/bar/test_bar.py b/Lib/test/test_unittest/namespace_test_pkg/bar/test_bar.py new file mode 100644 index 00000000000000..05b184d9eba685 --- /dev/null +++ b/Lib/test/test_unittest/namespace_test_pkg/bar/test_bar.py @@ -0,0 +1,5 @@ +import unittest + +class PassingTest(unittest.TestCase): + def test_true(self): + self.assertTrue(True) diff --git a/Lib/test/test_unittest/namespace_test_pkg/noop/no2/__init__.py b/Lib/test/test_unittest/namespace_test_pkg/noop/no2/__init__.py new file mode 100644 index 00000000000000..e69de29bb2d1d6 diff --git a/Lib/test/test_unittest/namespace_test_pkg/noop/no2/test_no2.py b/Lib/test/test_unittest/namespace_test_pkg/noop/no2/test_no2.py new file mode 100644 index 00000000000000..05b184d9eba685 --- /dev/null +++ b/Lib/test/test_unittest/namespace_test_pkg/noop/no2/test_no2.py @@ -0,0 +1,5 @@ +import unittest + +class PassingTest(unittest.TestCase): + def test_true(self): + self.assertTrue(True) diff --git a/Lib/test/test_unittest/namespace_test_pkg/noop/test_noop.py b/Lib/test/test_unittest/namespace_test_pkg/noop/test_noop.py new file mode 100644 index 00000000000000..05b184d9eba685 --- /dev/null +++ b/Lib/test/test_unittest/namespace_test_pkg/noop/test_noop.py @@ -0,0 +1,5 @@ +import unittest + +class PassingTest(unittest.TestCase): + def test_true(self): + self.assertTrue(True) diff --git a/Lib/test/test_unittest/namespace_test_pkg/test_foo.py b/Lib/test/test_unittest/namespace_test_pkg/test_foo.py new file mode 100644 index 00000000000000..05b184d9eba685 --- /dev/null +++ b/Lib/test/test_unittest/namespace_test_pkg/test_foo.py @@ -0,0 +1,5 @@ +import unittest + +class PassingTest(unittest.TestCase): + def test_true(self): + self.assertTrue(True) diff --git a/Lib/test/test_unittest/test_discovery.py b/Lib/test/test_unittest/test_discovery.py index a44b18406c08be..38c9779daaf87d 100644 --- a/Lib/test/test_unittest/test_discovery.py +++ b/Lib/test/test_unittest/test_discovery.py @@ -4,12 +4,14 @@ import sys import types import pickle +from importlib._bootstrap_external import NamespaceLoader from test import support from test.support import import_helper import unittest import unittest.mock import test.test_unittest +from test.test_importlib import util as test_util class TestableTestProgram(unittest.TestProgram): @@ -395,7 +397,7 @@ def restore_isdir(): self.addCleanup(restore_isdir) _find_tests_args = [] - def _find_tests(start_dir, pattern): + def _find_tests(start_dir, pattern, namespace=None): _find_tests_args.append((start_dir, pattern)) return ['tests'] loader._find_tests = _find_tests @@ -815,7 +817,7 @@ def test_discovery_from_dotted_path(self): expectedPath = os.path.abspath(os.path.dirname(test.test_unittest.__file__)) self.wasRun = False - def _find_tests(start_dir, pattern): + def _find_tests(start_dir, pattern, namespace=None): self.wasRun = True self.assertEqual(start_dir, expectedPath) return tests @@ -848,6 +850,54 @@ def restore(): 'Can not use builtin modules ' 'as dotted module names') + def test_discovery_from_dotted_namespace_packages(self): + loader = unittest.TestLoader() + + package = types.ModuleType('package') + package.__name__ = "tests" + package.__path__ = ['/a', '/b'] + package.__file__ = None + package.__spec__ = types.SimpleNamespace( + name=package.__name__, + loader=NamespaceLoader(package.__name__, package.__path__, None), + submodule_search_locations=['/a', '/b'] + ) + + def _import(packagename, *args, **kwargs): + sys.modules[packagename] = package + return package + + _find_tests_args = [] + def _find_tests(start_dir, pattern, namespace=None): + _find_tests_args.append((start_dir, pattern)) + return ['%s/tests' % start_dir] + + loader._find_tests = _find_tests + loader.suiteClass = list + + with unittest.mock.patch('builtins.__import__', _import): + # Since loader.discover() can modify sys.path, restore it when done. + with import_helper.DirsOnSysPath(): + # Make sure to remove 'package' from sys.modules when done. + with test_util.uncache('package'): + suite = loader.discover('package') + + self.assertEqual(suite, ['/a/tests', '/b/tests']) + + def test_discovery_start_dir_is_namespace(self): + """Subdirectory discovery not affected if start_dir is a namespace pkg.""" + loader = unittest.TestLoader() + with ( + import_helper.DirsOnSysPath(os.path.join(os.path.dirname(__file__))), + test_util.uncache('namespace_test_pkg') + ): + suite = loader.discover('namespace_test_pkg') + self.assertEqual( + {list(suite)[0]._tests[0].__module__ for suite in suite._tests if list(suite)}, + # files under namespace_test_pkg.noop not discovered. + {'namespace_test_pkg.test_foo', 'namespace_test_pkg.bar.test_bar'}, + ) + def test_discovery_failed_discovery(self): from test.test_importlib import util diff --git a/Lib/unittest/loader.py b/Lib/unittest/loader.py index 22797b83a68bc8..a52950dad224ee 100644 --- a/Lib/unittest/loader.py +++ b/Lib/unittest/loader.py @@ -274,6 +274,8 @@ def discover(self, start_dir, pattern='test*.py', top_level_dir=None): self._top_level_dir = top_level_dir is_not_importable = False + is_namespace = False + tests = [] if os.path.isdir(os.path.abspath(start_dir)): start_dir = os.path.abspath(start_dir) if start_dir != top_level_dir: @@ -286,12 +288,25 @@ def discover(self, start_dir, pattern='test*.py', top_level_dir=None): is_not_importable = True else: the_module = sys.modules[start_dir] - top_part = start_dir.split('.')[0] - try: - start_dir = os.path.abspath( - os.path.dirname((the_module.__file__))) - except AttributeError: - if the_module.__name__ in sys.builtin_module_names: + if not hasattr(the_module, "__file__") or the_module.__file__ is None: + # look for namespace packages + try: + spec = the_module.__spec__ + except AttributeError: + spec = None + + if spec and spec.submodule_search_locations is not None: + is_namespace = True + + for path in the_module.__path__: + if (not set_implicit_top and + not path.startswith(top_level_dir)): + continue + self._top_level_dir = \ + (path.split(the_module.__name__ + .replace(".", os.path.sep))[0]) + tests.extend(self._find_tests(path, pattern, namespace=True)) + elif the_module.__name__ in sys.builtin_module_names: # builtin module raise TypeError('Can not use builtin modules ' 'as dotted module names') from None @@ -300,14 +315,27 @@ def discover(self, start_dir, pattern='test*.py', top_level_dir=None): f"don't know how to discover from {the_module!r}" ) from None + else: + top_part = start_dir.split('.')[0] + start_dir = os.path.abspath(os.path.dirname((the_module.__file__))) + if set_implicit_top: - self._top_level_dir = self._get_directory_containing_module(top_part) + if not is_namespace: + if sys.modules[top_part].__file__ is None: + self._top_level_dir = os.path.dirname(the_module.__file__) + if self._top_level_dir not in sys.path: + sys.path.insert(0, self._top_level_dir) + else: + self._top_level_dir = \ + self._get_directory_containing_module(top_part) sys.path.remove(top_level_dir) if is_not_importable: raise ImportError('Start directory is not importable: %r' % start_dir) - tests = list(self._find_tests(start_dir, pattern)) + if not is_namespace: + tests = list(self._find_tests(start_dir, pattern)) + self._top_level_dir = original_top_level_dir return self.suiteClass(tests) @@ -343,7 +371,7 @@ def _match_path(self, path, full_path, pattern): # override this method to use alternative matching strategy return fnmatch(path, pattern) - def _find_tests(self, start_dir, pattern): + def _find_tests(self, start_dir, pattern, namespace=False): """Used by discovery. Yields test suites it loads.""" # Handle the __init__ in this package name = self._get_name_from_path(start_dir) @@ -352,7 +380,8 @@ def _find_tests(self, start_dir, pattern): if name != '.' and name not in self._loading_packages: # name is in self._loading_packages while we have called into # loadTestsFromModule with name. - tests, should_recurse = self._find_test_path(start_dir, pattern) + tests, should_recurse = self._find_test_path( + start_dir, pattern, namespace) if tests is not None: yield tests if not should_recurse: @@ -363,7 +392,8 @@ def _find_tests(self, start_dir, pattern): paths = sorted(os.listdir(start_dir)) for path in paths: full_path = os.path.join(start_dir, path) - tests, should_recurse = self._find_test_path(full_path, pattern) + tests, should_recurse = self._find_test_path( + full_path, pattern, False) if tests is not None: yield tests if should_recurse: @@ -371,11 +401,11 @@ def _find_tests(self, start_dir, pattern): name = self._get_name_from_path(full_path) self._loading_packages.add(name) try: - yield from self._find_tests(full_path, pattern) + yield from self._find_tests(full_path, pattern, False) finally: self._loading_packages.discard(name) - def _find_test_path(self, full_path, pattern): + def _find_test_path(self, full_path, pattern, namespace=False): """Used by discovery. Loads tests from a single file, or a directories' __init__.py when @@ -419,7 +449,8 @@ def _find_test_path(self, full_path, pattern): msg % (mod_name, module_dir, expected_dir)) return self.loadTestsFromModule(module, pattern=pattern), False elif os.path.isdir(full_path): - if not os.path.isfile(os.path.join(full_path, '__init__.py')): + if (not namespace and + not os.path.isfile(os.path.join(full_path, '__init__.py'))): return None, False load_tests = None diff --git a/Makefile.pre.in b/Makefile.pre.in index fb6f22d57397db..d6f75a931a3db2 100644 --- a/Makefile.pre.in +++ b/Makefile.pre.in @@ -2534,6 +2534,10 @@ TESTSUBDIRS= idlelib/idle_test \ test/test_tools \ test/test_ttk \ test/test_unittest \ + test/test_unittest/namespace_test_pkg \ + test/test_unittest/namespace_test_pkg/bar \ + test/test_unittest/namespace_test_pkg/noop \ + test/test_unittest/namespace_test_pkg/noop/no2 \ test/test_unittest/testmock \ test/test_warnings \ test/test_warnings/data \ diff --git a/Misc/NEWS.d/next/Library/2024-09-07-13-57-49.gh-issue-80958.fVYnqV.rst b/Misc/NEWS.d/next/Library/2024-09-07-13-57-49.gh-issue-80958.fVYnqV.rst new file mode 100644 index 00000000000000..f0edd7b1ac6e8b --- /dev/null +++ b/Misc/NEWS.d/next/Library/2024-09-07-13-57-49.gh-issue-80958.fVYnqV.rst @@ -0,0 +1 @@ +unittest discovery supports PEP 420 namespace packages as start directory again. From 834ba5aaf21ac7fd123534dae8f9e478ee526aaa Mon Sep 17 00:00:00 2001 From: Serhiy Storchaka Date: Wed, 23 Oct 2024 10:50:29 +0300 Subject: [PATCH 010/127] gh-58032: Deprecate the argparse.FileType type converter (GH-124664) --- .../pending-removal-in-future.rst | 21 +++---- Doc/library/argparse.rst | 25 +++++--- Doc/whatsnew/3.14.rst | 6 ++ Lib/argparse.py | 18 ++++-- Lib/test/test_argparse.py | 57 ++++++++++++------- ...4-09-27-13-10-17.gh-issue-58032.0aNAQ0.rst | 1 + 6 files changed, 83 insertions(+), 45 deletions(-) create mode 100644 Misc/NEWS.d/next/Library/2024-09-27-13-10-17.gh-issue-58032.0aNAQ0.rst diff --git a/Doc/deprecations/pending-removal-in-future.rst b/Doc/deprecations/pending-removal-in-future.rst index d77fc86eab0ed6..5a4502ac08a5f0 100644 --- a/Doc/deprecations/pending-removal-in-future.rst +++ b/Doc/deprecations/pending-removal-in-future.rst @@ -4,16 +4,6 @@ Pending removal in future versions The following APIs will be removed in the future, although there is currently no date scheduled for their removal. -* :mod:`argparse`: - - * Nesting argument groups and nesting mutually exclusive - groups are deprecated. - * Passing the undocumented keyword argument *prefix_chars* to - :meth:`~argparse.ArgumentParser.add_argument_group` is now - deprecated. - -* :mod:`array`'s ``'u'`` format code (:gh:`57281`) - * :mod:`builtins`: * ``bool(NotImplemented)``. @@ -43,6 +33,17 @@ although there is currently no date scheduled for their removal. as a single positional argument. (Contributed by Serhiy Storchaka in :gh:`109218`.) +* :mod:`argparse`: + + * Nesting argument groups and nesting mutually exclusive + groups are deprecated. + * Passing the undocumented keyword argument *prefix_chars* to + :meth:`~argparse.ArgumentParser.add_argument_group` is now + deprecated. + * The :class:`argparse.FileType` type converter is deprecated. + +* :mod:`array`'s ``'u'`` format code (:gh:`57281`) + * :mod:`calendar`: ``calendar.January`` and ``calendar.February`` constants are deprecated and replaced by :data:`calendar.JANUARY` and :data:`calendar.FEBRUARY`. diff --git a/Doc/library/argparse.rst b/Doc/library/argparse.rst index ef0db3e9789c98..65663d43f50a9d 100644 --- a/Doc/library/argparse.rst +++ b/Doc/library/argparse.rst @@ -865,16 +865,14 @@ See also :ref:`specifying-ambiguous-arguments`. The supported values are: output files:: >>> parser = argparse.ArgumentParser() - >>> parser.add_argument('infile', nargs='?', type=argparse.FileType('r'), - ... default=sys.stdin) - >>> parser.add_argument('outfile', nargs='?', type=argparse.FileType('w'), - ... default=sys.stdout) + >>> parser.add_argument('infile', nargs='?') + >>> parser.add_argument('outfile', nargs='?') >>> parser.parse_args(['input.txt', 'output.txt']) - Namespace(infile=<_io.TextIOWrapper name='input.txt' encoding='UTF-8'>, - outfile=<_io.TextIOWrapper name='output.txt' encoding='UTF-8'>) + Namespace(infile='input.txt', outfile='output.txt') + >>> parser.parse_args(['input.txt']) + Namespace(infile='input.txt', outfile=None) >>> parser.parse_args([]) - Namespace(infile=<_io.TextIOWrapper name='' encoding='UTF-8'>, - outfile=<_io.TextIOWrapper name='' encoding='UTF-8'>) + Namespace(infile=None, outfile=None) .. index:: single: * (asterisk); in argparse module @@ -1033,7 +1031,6 @@ Common built-in types and functions can be used as type converters: parser.add_argument('distance', type=float) parser.add_argument('street', type=ascii) parser.add_argument('code_point', type=ord) - parser.add_argument('dest_file', type=argparse.FileType('w', encoding='latin-1')) parser.add_argument('datapath', type=pathlib.Path) User defined functions can be used as well: @@ -1827,9 +1824,19 @@ FileType objects >>> parser.parse_args(['-']) Namespace(infile=<_io.TextIOWrapper name='' encoding='UTF-8'>) + .. note:: + + If one argument uses *FileType* and then a subsequent argument fails, + an error is reported but the file is not automatically closed. + This can also clobber the output files. + In this case, it would be better to wait until after the parser has + run and then use the :keyword:`with`-statement to manage the files. + .. versionchanged:: 3.4 Added the *encodings* and *errors* parameters. + .. deprecated:: 3.14 + Argument groups ^^^^^^^^^^^^^^^ diff --git a/Doc/whatsnew/3.14.rst b/Doc/whatsnew/3.14.rst index 1dd6c19018934b..b389e6da4c0ac3 100644 --- a/Doc/whatsnew/3.14.rst +++ b/Doc/whatsnew/3.14.rst @@ -464,6 +464,12 @@ Deprecated as a single positional argument. (Contributed by Serhiy Storchaka in :gh:`109218`.) +* :mod:`argparse`: + Deprecated the :class:`argparse.FileType` type converter. + Anything with resource management should be done downstream after the + arguments are parsed. + (Contributed by Serhiy Storchaka in :gh:`58032`.) + * :mod:`multiprocessing` and :mod:`concurrent.futures`: The default start method (see :ref:`multiprocessing-start-methods`) changed away from *fork* to *forkserver* on platforms where it was not already diff --git a/Lib/argparse.py b/Lib/argparse.py index 024622bec17c3b..9746173984c6ca 100644 --- a/Lib/argparse.py +++ b/Lib/argparse.py @@ -18,11 +18,12 @@ 'integers', metavar='int', nargs='+', type=int, help='an integer to be summed') parser.add_argument( - '--log', default=sys.stdout, type=argparse.FileType('w'), + '--log', help='the file where the sum should be written') args = parser.parse_args() - args.log.write('%s' % sum(args.integers)) - args.log.close() + with (open(args.log, 'w') if args.log is not None + else contextlib.nullcontext(sys.stdout)) as log: + log.write('%s' % sum(args.integers)) The module contains the following public classes: @@ -39,7 +40,8 @@ - FileType -- A factory for defining types of files to be created. As the example above shows, instances of FileType are typically passed as - the type= argument of add_argument() calls. + the type= argument of add_argument() calls. Deprecated since + Python 3.14. - Action -- The base class for parser actions. Typically actions are selected by passing strings like 'store_true' or 'append_const' to @@ -1252,7 +1254,7 @@ def __call__(self, parser, namespace, values, option_string=None): # ============== class FileType(object): - """Factory for creating file object types + """Deprecated factory for creating file object types Instances of FileType are typically passed as type= arguments to the ArgumentParser add_argument() method. @@ -1269,6 +1271,12 @@ class FileType(object): """ def __init__(self, mode='r', bufsize=-1, encoding=None, errors=None): + import warnings + warnings.warn( + "FileType is deprecated. Simply open files after parsing arguments.", + category=PendingDeprecationWarning, + stacklevel=2 + ) self._mode = mode self._bufsize = bufsize self._encoding = encoding diff --git a/Lib/test/test_argparse.py b/Lib/test/test_argparse.py index 4bd7a935b9b757..ed1c5c34e526aa 100644 --- a/Lib/test/test_argparse.py +++ b/Lib/test/test_argparse.py @@ -1773,27 +1773,43 @@ def convert_arg_line_to_args(self, arg_line): # Type conversion tests # ===================== +def FileType(*args, **kwargs): + with warnings.catch_warnings(): + warnings.filterwarnings('ignore', 'FileType is deprecated', + PendingDeprecationWarning, __name__) + return argparse.FileType(*args, **kwargs) + + +class TestFileTypeDeprecation(TestCase): + + def test(self): + with self.assertWarns(PendingDeprecationWarning) as cm: + argparse.FileType() + self.assertIn('FileType is deprecated', str(cm.warning)) + self.assertEqual(cm.filename, __file__) + + class TestFileTypeRepr(TestCase): def test_r(self): - type = argparse.FileType('r') + type = FileType('r') self.assertEqual("FileType('r')", repr(type)) def test_wb_1(self): - type = argparse.FileType('wb', 1) + type = FileType('wb', 1) self.assertEqual("FileType('wb', 1)", repr(type)) def test_r_latin(self): - type = argparse.FileType('r', encoding='latin_1') + type = FileType('r', encoding='latin_1') self.assertEqual("FileType('r', encoding='latin_1')", repr(type)) def test_w_big5_ignore(self): - type = argparse.FileType('w', encoding='big5', errors='ignore') + type = FileType('w', encoding='big5', errors='ignore') self.assertEqual("FileType('w', encoding='big5', errors='ignore')", repr(type)) def test_r_1_replace(self): - type = argparse.FileType('r', 1, errors='replace') + type = FileType('r', 1, errors='replace') self.assertEqual("FileType('r', 1, errors='replace')", repr(type)) @@ -1847,7 +1863,6 @@ def __eq__(self, other): text = text.decode('ascii') return self.name == other.name == text - class TestFileTypeR(TempDirMixin, ParserTestCase): """Test the FileType option/argument type for reading files""" @@ -1860,8 +1875,8 @@ def setUp(self): self.create_readonly_file('readonly') argument_signatures = [ - Sig('-x', type=argparse.FileType()), - Sig('spam', type=argparse.FileType('r')), + Sig('-x', type=FileType()), + Sig('spam', type=FileType('r')), ] failures = ['-x', '', 'non-existent-file.txt'] successes = [ @@ -1881,7 +1896,7 @@ def setUp(self): file.close() argument_signatures = [ - Sig('-c', type=argparse.FileType('r'), default='no-file.txt'), + Sig('-c', type=FileType('r'), default='no-file.txt'), ] # should provoke no such file error failures = [''] @@ -1900,8 +1915,8 @@ def setUp(self): file.write(file_name) argument_signatures = [ - Sig('-x', type=argparse.FileType('rb')), - Sig('spam', type=argparse.FileType('rb')), + Sig('-x', type=FileType('rb')), + Sig('spam', type=FileType('rb')), ] failures = ['-x', ''] successes = [ @@ -1939,8 +1954,8 @@ def setUp(self): self.create_writable_file('writable') argument_signatures = [ - Sig('-x', type=argparse.FileType('w')), - Sig('spam', type=argparse.FileType('w')), + Sig('-x', type=FileType('w')), + Sig('spam', type=FileType('w')), ] failures = ['-x', '', 'readonly'] successes = [ @@ -1962,8 +1977,8 @@ def setUp(self): self.create_writable_file('writable') argument_signatures = [ - Sig('-x', type=argparse.FileType('x')), - Sig('spam', type=argparse.FileType('x')), + Sig('-x', type=FileType('x')), + Sig('spam', type=FileType('x')), ] failures = ['-x', '', 'readonly', 'writable'] successes = [ @@ -1977,8 +1992,8 @@ class TestFileTypeWB(TempDirMixin, ParserTestCase): """Test the FileType option/argument type for writing binary files""" argument_signatures = [ - Sig('-x', type=argparse.FileType('wb')), - Sig('spam', type=argparse.FileType('wb')), + Sig('-x', type=FileType('wb')), + Sig('spam', type=FileType('wb')), ] failures = ['-x', ''] successes = [ @@ -1994,8 +2009,8 @@ class TestFileTypeXB(TestFileTypeX): "Test the FileType option/argument type for writing new binary files only" argument_signatures = [ - Sig('-x', type=argparse.FileType('xb')), - Sig('spam', type=argparse.FileType('xb')), + Sig('-x', type=FileType('xb')), + Sig('spam', type=FileType('xb')), ] successes = [ ('-x foo bar', NS(x=WFile('foo'), spam=WFile('bar'))), @@ -2007,7 +2022,7 @@ class TestFileTypeOpenArgs(TestCase): """Test that open (the builtin) is correctly called""" def test_open_args(self): - FT = argparse.FileType + FT = FileType cases = [ (FT('rb'), ('rb', -1, None, None)), (FT('w', 1), ('w', 1, None, None)), @@ -2022,7 +2037,7 @@ def test_open_args(self): def test_invalid_file_type(self): with self.assertRaises(ValueError): - argparse.FileType('b')('-test') + FileType('b')('-test') class TestFileTypeMissingInitialization(TestCase): diff --git a/Misc/NEWS.d/next/Library/2024-09-27-13-10-17.gh-issue-58032.0aNAQ0.rst b/Misc/NEWS.d/next/Library/2024-09-27-13-10-17.gh-issue-58032.0aNAQ0.rst new file mode 100644 index 00000000000000..278512b22a8d3f --- /dev/null +++ b/Misc/NEWS.d/next/Library/2024-09-27-13-10-17.gh-issue-58032.0aNAQ0.rst @@ -0,0 +1 @@ +Deprecate the :class:`argparse.FileType` type converter. From de0d5c6e2e12f24ade1ccc457afaf5fb2c650c64 Mon Sep 17 00:00:00 2001 From: Irit Katriel <1055913+iritkatriel@users.noreply.github.com> Date: Wed, 23 Oct 2024 14:48:39 +0100 Subject: [PATCH 011/127] gh-119786: move 'changing grammar' checklist from devguide to InternalDocs (#125874) --- InternalDocs/README.md | 2 + InternalDocs/changing_grammar.md | 63 ++++++++++++++++++++++++++++++++ 2 files changed, 65 insertions(+) create mode 100644 InternalDocs/changing_grammar.md diff --git a/InternalDocs/README.md b/InternalDocs/README.md index 48c893bde2a631..2ef6e653ac19d4 100644 --- a/InternalDocs/README.md +++ b/InternalDocs/README.md @@ -19,6 +19,8 @@ Compiling Python Source Code - [Compiler Design](compiler.md) +- [Changing Python's Grammar](changing_grammar.md) + Runtime Objects --- diff --git a/InternalDocs/changing_grammar.md b/InternalDocs/changing_grammar.md new file mode 100644 index 00000000000000..1a5eebdc1418dc --- /dev/null +++ b/InternalDocs/changing_grammar.md @@ -0,0 +1,63 @@ +# Changing CPython's grammar + +There's more to changing Python's grammar than editing +[`Grammar/python.gram`](../Grammar/python.gram). +Below is a checklist of things that may need to change. + +> [!NOTE] +> +> Many of these changes require re-generating some of the derived +> files. If things mysteriously don't work, it may help to run +> ``make clean``. + +## Checklist + +* [`Grammar/python.gram`](../Grammar/python.gram): The grammar definition, + with actions that build AST nodes. + After changing it, run ``make regen-pegen`` (or ``build.bat --regen`` on Windows), + to regenerate [`Parser/parser.c`](../Parser/parser.c). + (This runs Python's parser generator, [`Tools/peg_generator`](../Tools/peg_generator)). + +* [`Grammar/Tokens`](../Grammar/Tokens) is a place for adding new token types. After + changing it, run ``make regen-token`` to regenerate + [`Include/internal/pycore_token.h`](../Include/internal/pycore_token.h), + [`Parser/token.c`](../Parser/token.c), [`Lib/token.py`](../Lib/token.py) + and [`Doc/library/token-list.inc`](../Doc/library/token-list.inc). + If you change both ``python.gram`` and ``Tokens``, run ``make regen-token`` + before ``make regen-pegen``. + On Windows, ``build.bat --regen`` will regenerate both at the same time. + +* [`Parser/Python.asdl`](../Parser/Python.asdl) may need changes to match the grammar. + Then run ``make regen-ast`` to regenerate + [`Include/internal/pycore_ast.h`](../Include/internal/pycore_ast.h) and + [`Python/Python-ast.c`](../Python/Python-ast.c). + +* [`Parser/lexer/`](../Parser/lexer/) contains the tokenization code. + This is where you would add a new type of comment or string literal, for example. + +* [`Python/ast.c`](../Python/ast.c) will need changes to validate AST objects + involved with the grammar change. + +* [`Python/ast_unparse.c`](../Python/ast_unparse.c) will need changes to unparse + AST involved with the grammar change ("unparsing" is used to turn annotations + into strings per [PEP 563](https://peps.python.org/pep-0563/). + +* The [`compiler`](compiler.md) may need to change when there are changes + to the `AST`. + +* ``_Unparser`` in the [`Lib/ast.py`](../Lib/ast.py) file may need changes + to accommodate any modifications in the AST nodes. + +* [`Doc/library/ast.rst`](../Doc/library/ast.rst) may need to be updated + to reflect changes to AST nodes. + +* Add some usage of your new syntax to ``test_grammar.py``. + +* Certain changes may require tweaks to the library module + [`pyclbr`](https://docs.python.org/3/library/pyclbr.html#module-pyclbr). + +* [`Lib/tokenize.py`](../Lib/tokenize.py) needs changes to match changes + to the tokenizer. + +* Documentation must be written! Specifically, one or more of the pages in + [`Doc/reference/`](../Doc/reference/) will need to be updated. From 6f26d496d3c894970ee18a125e9100791ebc2b36 Mon Sep 17 00:00:00 2001 From: Eric Snow Date: Wed, 23 Oct 2024 10:10:06 -0600 Subject: [PATCH 012/127] gh-125286: Share the Main Refchain With Legacy Interpreters (gh-125709) They used to be shared, before 3.12. Returning to sharing them resolves a failure on Py_TRACE_REFS builds. Co-authored-by: Petr Viktorin --- Doc/library/sys.rst | 29 +++++++++++++ Doc/using/configure.rst | 2 +- Doc/whatsnew/3.14.rst | 9 ++++ Objects/object.c | 92 ++++++++++++++++++++--------------------- Objects/unicodeobject.c | 8 ---- Python/pylifecycle.c | 14 +++++++ Python/pystate.c | 6 +-- 7 files changed, 99 insertions(+), 61 deletions(-) diff --git a/Doc/library/sys.rst b/Doc/library/sys.rst index 20a06a1ecd1a4c..37f1719db607de 100644 --- a/Doc/library/sys.rst +++ b/Doc/library/sys.rst @@ -920,6 +920,35 @@ always available. It is not guaranteed to exist in all implementations of Python. +.. function:: getobjects(limit[, type]) + + This function only exists if CPython was built using the + specialized configure option :option:`--with-trace-refs`. + It is intended only for debugging garbage-collection issues. + + Return a list of up to *limit* dynamically allocated Python objects. + If *type* is given, only objects of that exact type (not subtypes) + are included. + + Objects from the list are not safe to use. + Specifically, the result will include objects from all interpreters that + share their object allocator state (that is, ones created with + :c:member:`PyInterpreterConfig.use_main_obmalloc` set to 1 + or using :c:func:`Py_NewInterpreter`, and the + :ref:`main interpreter `). + Mixing objects from different interpreters may lead to crashes + or other unexpected behavior. + + .. impl-detail:: + + This function should be used for specialized purposes only. + It is not guaranteed to exist in all implementations of Python. + + .. versionchanged:: next + + The result may include objects from other interpreters. + + .. function:: getprofile() .. index:: diff --git a/Doc/using/configure.rst b/Doc/using/configure.rst index 10cdf2376229ff..0e7b1be5b4bc2e 100644 --- a/Doc/using/configure.rst +++ b/Doc/using/configure.rst @@ -702,7 +702,7 @@ Debug options Effects: * Define the ``Py_TRACE_REFS`` macro. - * Add :func:`!sys.getobjects` function. + * Add :func:`sys.getobjects` function. * Add :envvar:`PYTHONDUMPREFS` environment variable. The :envvar:`PYTHONDUMPREFS` environment variable can be used to dump diff --git a/Doc/whatsnew/3.14.rst b/Doc/whatsnew/3.14.rst index b389e6da4c0ac3..64f3d18e7fc6a4 100644 --- a/Doc/whatsnew/3.14.rst +++ b/Doc/whatsnew/3.14.rst @@ -416,6 +416,15 @@ symtable (Contributed by Bénédikt Tran in :gh:`120029`.) + +sys +--- + +* The previously undocumented special function :func:`sys.getobjects`, + which only exists in specialized builds of Python, may now return objects + from other interpreters than the one it's called in. + + unicodedata ----------- diff --git a/Objects/object.c b/Objects/object.c index 1a15b70d3dc63f..7cc74a8dc0d8eb 100644 --- a/Objects/object.c +++ b/Objects/object.c @@ -171,6 +171,48 @@ _PyDebug_PrintTotalRefs(void) { #define REFCHAIN(interp) interp->object_state.refchain #define REFCHAIN_VALUE ((void*)(uintptr_t)1) +static inline int +has_own_refchain(PyInterpreterState *interp) +{ + if (interp->feature_flags & Py_RTFLAGS_USE_MAIN_OBMALLOC) { + return (_Py_IsMainInterpreter(interp) + || _PyInterpreterState_Main() == NULL); + } + return 1; +} + +static int +refchain_init(PyInterpreterState *interp) +{ + if (!has_own_refchain(interp)) { + // Legacy subinterpreters share a refchain with the main interpreter. + REFCHAIN(interp) = REFCHAIN(_PyInterpreterState_Main()); + return 0; + } + _Py_hashtable_allocator_t alloc = { + // Don't use default PyMem_Malloc() and PyMem_Free() which + // require the caller to hold the GIL. + .malloc = PyMem_RawMalloc, + .free = PyMem_RawFree, + }; + REFCHAIN(interp) = _Py_hashtable_new_full( + _Py_hashtable_hash_ptr, _Py_hashtable_compare_direct, + NULL, NULL, &alloc); + if (REFCHAIN(interp) == NULL) { + return -1; + } + return 0; +} + +static void +refchain_fini(PyInterpreterState *interp) +{ + if (has_own_refchain(interp) && REFCHAIN(interp) != NULL) { + _Py_hashtable_destroy(REFCHAIN(interp)); + } + REFCHAIN(interp) = NULL; +} + bool _PyRefchain_IsTraced(PyInterpreterState *interp, PyObject *obj) { @@ -2191,16 +2233,7 @@ PyStatus _PyObject_InitState(PyInterpreterState *interp) { #ifdef Py_TRACE_REFS - _Py_hashtable_allocator_t alloc = { - // Don't use default PyMem_Malloc() and PyMem_Free() which - // require the caller to hold the GIL. - .malloc = PyMem_RawMalloc, - .free = PyMem_RawFree, - }; - REFCHAIN(interp) = _Py_hashtable_new_full( - _Py_hashtable_hash_ptr, _Py_hashtable_compare_direct, - NULL, NULL, &alloc); - if (REFCHAIN(interp) == NULL) { + if (refchain_init(interp) < 0) { return _PyStatus_NO_MEMORY(); } #endif @@ -2211,8 +2244,7 @@ void _PyObject_FiniState(PyInterpreterState *interp) { #ifdef Py_TRACE_REFS - _Py_hashtable_destroy(REFCHAIN(interp)); - REFCHAIN(interp) = NULL; + refchain_fini(interp); #endif } @@ -2501,42 +2533,6 @@ _Py_ResurrectReference(PyObject *op) #ifdef Py_TRACE_REFS -/* Make sure the ref is associated with the right interpreter. - * This only needs special attention for heap-allocated objects - * that have been immortalized, and only when the object might - * outlive the interpreter where it was created. That means the - * object was necessarily created using a global allocator - * (i.e. from the main interpreter). Thus in that specific case - * we move the object over to the main interpreter's refchain. - * - * This was added for the sake of the immortal interned strings, - * where legacy subinterpreters share the main interpreter's - * interned dict (and allocator), and therefore the strings can - * outlive the subinterpreter. - * - * It may make sense to fold this into _Py_SetImmortalUntracked(), - * but that requires further investigation. In the meantime, it is - * up to the caller to know if this is needed. There should be - * very few cases. - */ -void -_Py_NormalizeImmortalReference(PyObject *op) -{ - assert(_Py_IsImmortal(op)); - PyInterpreterState *interp = _PyInterpreterState_GET(); - if (!_PyRefchain_IsTraced(interp, op)) { - return; - } - PyInterpreterState *main_interp = _PyInterpreterState_Main(); - if (interp != main_interp - && interp->feature_flags & Py_RTFLAGS_USE_MAIN_OBMALLOC) - { - assert(!_PyRefchain_IsTraced(main_interp, op)); - _PyRefchain_Remove(interp, op); - _PyRefchain_Trace(main_interp, op); - } -} - void _Py_ForgetReference(PyObject *op) { diff --git a/Objects/unicodeobject.c b/Objects/unicodeobject.c index b94a74c2c688a9..9cd9781e412524 100644 --- a/Objects/unicodeobject.c +++ b/Objects/unicodeobject.c @@ -15444,10 +15444,6 @@ _PyUnicode_InternStatic(PyInterpreterState *interp, PyObject **p) assert(*p); } -#ifdef Py_TRACE_REFS -extern void _Py_NormalizeImmortalReference(PyObject *); -#endif - static void immortalize_interned(PyObject *s) { @@ -15463,10 +15459,6 @@ immortalize_interned(PyObject *s) #endif _PyUnicode_STATE(s).interned = SSTATE_INTERNED_IMMORTAL; _Py_SetImmortal(s); -#ifdef Py_TRACE_REFS - /* Make sure the ref is associated with the right interpreter. */ - _Py_NormalizeImmortalReference(s); -#endif } static /* non-null */ PyObject* diff --git a/Python/pylifecycle.c b/Python/pylifecycle.c index b8f424854ecb86..8f38fbedae9842 100644 --- a/Python/pylifecycle.c +++ b/Python/pylifecycle.c @@ -674,6 +674,13 @@ pycore_create_interpreter(_PyRuntimeState *runtime, return status; } + // This could be done in init_interpreter() (in pystate.c) if it + // didn't depend on interp->feature_flags being set already. + status = _PyObject_InitState(interp); + if (_PyStatus_EXCEPTION(status)) { + return status; + } + // initialize the interp->obmalloc state. This must be done after // the settings are loaded (so that feature_flags are set) but before // any calls are made to obmalloc functions. @@ -2297,6 +2304,13 @@ new_interpreter(PyThreadState **tstate_p, goto error; } + // This could be done in init_interpreter() (in pystate.c) if it + // didn't depend on interp->feature_flags being set already. + status = _PyObject_InitState(interp); + if (_PyStatus_EXCEPTION(status)) { + return status; + } + // initialize the interp->obmalloc state. This must be done after // the settings are loaded (so that feature_flags are set) but before // any calls are made to obmalloc functions. diff --git a/Python/pystate.c b/Python/pystate.c index 7df872cd6d7d8a..36b31f3b9e4200 100644 --- a/Python/pystate.c +++ b/Python/pystate.c @@ -629,10 +629,8 @@ init_interpreter(PyInterpreterState *interp, assert(next != NULL || (interp == runtime->interpreters.main)); interp->next = next; - PyStatus status = _PyObject_InitState(interp); - if (_PyStatus_EXCEPTION(status)) { - return status; - } + // We would call _PyObject_InitState() at this point + // if interp->feature_flags were alredy set. _PyEval_InitState(interp); _PyGC_InitState(&interp->gc); From 9c01db40aa5edbd75ce50342c08f7ed018ee7864 Mon Sep 17 00:00:00 2001 From: Wulian <1055917385@qq.com> Date: Thu, 24 Oct 2024 04:29:32 +0800 Subject: [PATCH 013/127] gh-125665: Update turtledemo docstrings with correct file names (#125691) Co-authored-by: Wulian Co-authored-by: Terry Jan Reedy --- Doc/library/turtle.rst | 3 --- Lib/turtledemo/bytedesign.py | 4 +--- Lib/turtledemo/chaos.py | 8 +++----- Lib/turtledemo/clock.py | 9 ++------- Lib/turtledemo/colormixer.py | 3 +-- Lib/turtledemo/forest.py | 15 ++++++--------- Lib/turtledemo/fractalcurves.py | 4 +--- Lib/turtledemo/lindenmayer.py | 4 +--- Lib/turtledemo/minimal_hanoi.py | 7 +------ Lib/turtledemo/nim.py | 4 +--- Lib/turtledemo/paint.py | 15 +++++---------- Lib/turtledemo/peace.py | 4 +--- Lib/turtledemo/penrose.py | 6 ++---- Lib/turtledemo/planet_and_moon.py | 4 +--- Lib/turtledemo/rosette.py | 4 +--- Lib/turtledemo/round_dance.py | 7 +------ Lib/turtledemo/sorting_animate.py | 7 +------ Lib/turtledemo/tree.py | 4 +--- Lib/turtledemo/two_canvases.py | 2 +- Lib/turtledemo/yinyang.py | 4 +--- 20 files changed, 32 insertions(+), 86 deletions(-) diff --git a/Doc/library/turtle.rst b/Doc/library/turtle.rst index efa4b6f8f1d3f9..8eb4f8271fcfae 100644 --- a/Doc/library/turtle.rst +++ b/Doc/library/turtle.rst @@ -2778,9 +2778,6 @@ Changes since Python 3.0 :func:`Screen.numinput `. These pop up input dialogs and return strings and numbers respectively. -- Two example scripts :file:`tdemo_nim.py` and :file:`tdemo_round_dance.py` - have been added to the :file:`Lib/turtledemo` directory. - .. doctest:: :skipif: _tkinter is None diff --git a/Lib/turtledemo/bytedesign.py b/Lib/turtledemo/bytedesign.py index 476cdaabfceab1..a5d76a6b6ff295 100644 --- a/Lib/turtledemo/bytedesign.py +++ b/Lib/turtledemo/bytedesign.py @@ -1,6 +1,4 @@ -""" turtle-example-suite: - - tdemo_bytedesign.py +"""turtledemo/bytedesign.py An example adapted from the example-suite of PythonCard's turtle graphics. diff --git a/Lib/turtledemo/chaos.py b/Lib/turtledemo/chaos.py index 6a45d0d807ef0b..b25f0fa42c901d 100644 --- a/Lib/turtledemo/chaos.py +++ b/Lib/turtledemo/chaos.py @@ -1,9 +1,7 @@ -# File: tdemo_chaos.py -# Author: Gregor Lingl -# Date: 2009-06-24 - -# A demonstration of chaos +"""turtledemo/chaos.py +A demonstration of chaos. +""" from turtle import * N = 80 diff --git a/Lib/turtledemo/clock.py b/Lib/turtledemo/clock.py index 8a630e29b8da50..8b639066c4f440 100644 --- a/Lib/turtledemo/clock.py +++ b/Lib/turtledemo/clock.py @@ -1,12 +1,7 @@ -""" turtle-example-suite: - - turtledemo/clock.py +"""turtledemo/clock.py Enhanced clock-program, showing date -and time - ------------------------------------ - Press STOP to exit the program! - ------------------------------------ +and time. """ from turtle import * from datetime import datetime diff --git a/Lib/turtledemo/colormixer.py b/Lib/turtledemo/colormixer.py index 448db83361a649..f66012c8154317 100644 --- a/Lib/turtledemo/colormixer.py +++ b/Lib/turtledemo/colormixer.py @@ -1,5 +1,4 @@ -# colormixer - +"""turtledemo/colormixer.py""" from turtle import Screen, Turtle, mainloop class ColorTurtle(Turtle): diff --git a/Lib/turtledemo/forest.py b/Lib/turtledemo/forest.py index cac553223828db..e1fa85a577ffce 100644 --- a/Lib/turtledemo/forest.py +++ b/Lib/turtledemo/forest.py @@ -1,14 +1,11 @@ -""" turtlegraphics-example-suite: +"""turtledemo/forest.py - tdemo_forest.py +Displays a 'forest' of 3 breadth-first trees, +similar to the one in tree.py. +For further details, see tree.py. -Displays a 'forest' of 3 breadth-first-trees -similar to the one in tree. -For further remarks see tree.py - -This example is a 'breadth-first'-rewrite of -a Logo program written by Erich Neuwirth. See -http://homepage.univie.ac.at/erich.neuwirth/ +This example is a breadth-first rewrite of +a Logo program by Erich Neuwirth. """ from turtle import Turtle, colormode, tracer, mainloop from random import randrange diff --git a/Lib/turtledemo/fractalcurves.py b/Lib/turtledemo/fractalcurves.py index fda193e06fedee..2d0a506a4f5b9f 100644 --- a/Lib/turtledemo/fractalcurves.py +++ b/Lib/turtledemo/fractalcurves.py @@ -1,6 +1,4 @@ -""" turtle-example-suite: - - tdemo_fractalCurves.py +"""turtledemo/fractalcurves.py This program draws two fractal-curve-designs: (1) A hilbert curve (in a box) diff --git a/Lib/turtledemo/lindenmayer.py b/Lib/turtledemo/lindenmayer.py index 7c7a84796c3c28..eb309afb9381b1 100644 --- a/Lib/turtledemo/lindenmayer.py +++ b/Lib/turtledemo/lindenmayer.py @@ -1,6 +1,4 @@ -""" turtle-example-suite: - - xtx_lindenmayer_indian.py +"""turtledemo/lindenmayer.py Each morning women in Tamil Nadu, in southern India, place designs, created by using rice diff --git a/Lib/turtledemo/minimal_hanoi.py b/Lib/turtledemo/minimal_hanoi.py index 08d8b630fec3b4..e44330eaaf7f18 100644 --- a/Lib/turtledemo/minimal_hanoi.py +++ b/Lib/turtledemo/minimal_hanoi.py @@ -1,6 +1,4 @@ -""" turtle-example-suite: - - tdemo_minimal_hanoi.py +"""turtledemo/minimal_hanoi.py A minimal 'Towers of Hanoi' animation: A tower of 6 discs is transferred from the @@ -12,9 +10,6 @@ Discs are turtles with shape "square", but stretched to rectangles by shapesize() - --------------------------------------- - To exit press STOP button - --------------------------------------- """ from turtle import * diff --git a/Lib/turtledemo/nim.py b/Lib/turtledemo/nim.py index 9ae6cc5c01b903..f87c479714d662 100644 --- a/Lib/turtledemo/nim.py +++ b/Lib/turtledemo/nim.py @@ -1,6 +1,4 @@ -""" turtle-example-suite: - - tdemo_nim.py +"""turtledemo/nim.py Play nim against the computer. The player who takes the last stick is the winner. diff --git a/Lib/turtledemo/paint.py b/Lib/turtledemo/paint.py index 6e63d004454589..780300fb2da9d1 100644 --- a/Lib/turtledemo/paint.py +++ b/Lib/turtledemo/paint.py @@ -1,12 +1,9 @@ -""" turtle-example-suite: +"""turtledemo/paint.py - tdemo_paint.py - -A simple event-driven paint program - -- left mouse button moves turtle -- middle mouse button changes color -- right mouse button toggles between pen up +A simple event-driven paint program. +- Left mouse button moves turtle. +- Middle mouse button changes color. +- Right mouse button toggles between pen up (no line drawn when the turtle moves) and pen down (line is drawn). If pen up follows at least two pen-down moves, the polygon that @@ -14,8 +11,6 @@ ------------------------------------------- Play around by clicking into the canvas using all three mouse buttons. - ------------------------------------------- - To exit press STOP button ------------------------------------------- """ from turtle import * diff --git a/Lib/turtledemo/peace.py b/Lib/turtledemo/peace.py index fd6abe390ef198..d86c94a48a2472 100644 --- a/Lib/turtledemo/peace.py +++ b/Lib/turtledemo/peace.py @@ -1,6 +1,4 @@ -""" turtle-example-suite: - - tdemo_peace.py +"""turtledemo/peace.py A simple drawing suitable as a beginner's programming example. Aside from the diff --git a/Lib/turtledemo/penrose.py b/Lib/turtledemo/penrose.py index ac12c899d3844e..ceaefedac24a67 100644 --- a/Lib/turtledemo/penrose.py +++ b/Lib/turtledemo/penrose.py @@ -1,6 +1,4 @@ -""" xturtle-example-suite: - - xtx_kites_and_darts.py +"""turtledemo/penrose.py Constructs two aperiodic penrose-tilings, consisting of kites and darts, by the method @@ -11,7 +9,7 @@ consisting of five darts. For more information see: - http://en.wikipedia.org/wiki/Penrose_tiling + https://en.wikipedia.org/wiki/Penrose_tiling ------------------------------------------- """ from turtle import * diff --git a/Lib/turtledemo/planet_and_moon.py b/Lib/turtledemo/planet_and_moon.py index c0e2c5b79e173e..571afcf922103f 100644 --- a/Lib/turtledemo/planet_and_moon.py +++ b/Lib/turtledemo/planet_and_moon.py @@ -1,6 +1,4 @@ -""" turtle-example-suite: - - tdemo_planets_and_moon.py +"""turtledemo/planets_and_moon.py Gravitational system simulation using the approximation method from Feynman-lectures, diff --git a/Lib/turtledemo/rosette.py b/Lib/turtledemo/rosette.py index 47d0f00e9da9d1..48897a620f9d8b 100644 --- a/Lib/turtledemo/rosette.py +++ b/Lib/turtledemo/rosette.py @@ -1,6 +1,4 @@ -""" turtle-example-suite: - - tdemo_wikipedia3.py +"""turtledemo/rosette.py This example is inspired by the Wikipedia article on turtle diff --git a/Lib/turtledemo/round_dance.py b/Lib/turtledemo/round_dance.py index 10383614c6e974..9da6389b213207 100644 --- a/Lib/turtledemo/round_dance.py +++ b/Lib/turtledemo/round_dance.py @@ -1,9 +1,4 @@ -""" turtle-example-suite: - - tdemo_round_dance.py - -(Needs version 1.1 of the turtle module that -comes with Python 3.1) +"""turtledemo/round_dance.py Dancing turtles have a compound shape consisting of a series of triangles of diff --git a/Lib/turtledemo/sorting_animate.py b/Lib/turtledemo/sorting_animate.py index ef4946db38250e..e0a2877cd5d621 100644 --- a/Lib/turtledemo/sorting_animate.py +++ b/Lib/turtledemo/sorting_animate.py @@ -1,6 +1,4 @@ -""" - - sorting_animation.py +"""turtledemo/sorting_animation.py A minimal sorting algorithm animation: Sorts a shelf of 10 blocks using insertion @@ -10,9 +8,6 @@ Blocks are turtles with shape "square", but stretched to rectangles by shapesize() - --------------------------------------- - To exit press space button - --------------------------------------- """ from turtle import * import random diff --git a/Lib/turtledemo/tree.py b/Lib/turtledemo/tree.py index 12729e23688a48..6ad8fcc854a155 100644 --- a/Lib/turtledemo/tree.py +++ b/Lib/turtledemo/tree.py @@ -1,6 +1,4 @@ -""" turtle-example-suite: - - tdemo_tree.py +"""turtledemo/tree.py Displays a 'breadth-first-tree' - in contrast to the classical Logo tree drawing programs, diff --git a/Lib/turtledemo/two_canvases.py b/Lib/turtledemo/two_canvases.py index f3602585ab0592..2c8020378edf1b 100644 --- a/Lib/turtledemo/two_canvases.py +++ b/Lib/turtledemo/two_canvases.py @@ -1,4 +1,4 @@ -"""turtledemo.two_canvases +"""turtledemo/two_canvases.py Use TurtleScreen and RawTurtle to draw on two distinct canvases in a separate window. The diff --git a/Lib/turtledemo/yinyang.py b/Lib/turtledemo/yinyang.py index 791060d17e6b6a..6e92d4bf739194 100644 --- a/Lib/turtledemo/yinyang.py +++ b/Lib/turtledemo/yinyang.py @@ -1,6 +1,4 @@ -""" turtle-example-suite: - - tdemo_yinyang.py +"""turtledemo/yinyang.py Another drawing suitable as a beginner's programming example. From 13c9fa3d64e0653d696daad716703ef05fd5002b Mon Sep 17 00:00:00 2001 From: Petr Viktorin Date: Wed, 23 Oct 2024 23:37:06 +0200 Subject: [PATCH 014/127] gh-121938: ctypes: Skip test of _pack_-ed struct with c_int64 on x86 (GH-125877) The current auto-generated tests don't cover this; it's instead tested manually. --- Lib/test/test_ctypes/test_generated_structs.py | 14 ++++++++++++++ 1 file changed, 14 insertions(+) diff --git a/Lib/test/test_ctypes/test_generated_structs.py b/Lib/test/test_ctypes/test_generated_structs.py index cbd73c4e911e4e..d61754d6d49e70 100644 --- a/Lib/test/test_ctypes/test_generated_structs.py +++ b/Lib/test/test_ctypes/test_generated_structs.py @@ -135,6 +135,18 @@ class Packed3(Structure): @register() class Packed4(Structure): + def _maybe_skip(): + # `_pack_` enables MSVC-style packing, but keeps platform-specific + # alignments. + # The C code we generate for GCC/clang currently uses + # `__attribute__((ms_struct))`, which activates MSVC layout *and* + # alignments, that is, sizeof(basic type) == alignment(basic type). + # On a Pentium, int64 is 32-bit aligned, so the two won't match. + # The expected behavior is instead tested in + # StructureTestCase.test_packed, over in test_structures.py. + if sizeof(c_int64) != alignment(c_int64): + raise unittest.SkipTest('cannot test on this platform') + _fields_ = [('a', c_int8), ('b', c_int64)] _pack_ = 8 @@ -436,6 +448,8 @@ def test_generated_data(self): """ for name, cls in TESTCASES.items(): with self.subTest(name=name): + if _maybe_skip := getattr(cls, '_maybe_skip', None): + _maybe_skip() expected = iter(_ctypes_test.get_generated_test_data(name)) expected_name = next(expected) if expected_name is None: From 8f2c0f7a03b71485b5635cb47c000e4e8ace8800 Mon Sep 17 00:00:00 2001 From: Tian Gao Date: Wed, 23 Oct 2024 15:04:30 -0700 Subject: [PATCH 015/127] gh-125884: Support breakpoint on functions with annotations (#125892) --- Lib/pdb.py | 7 ++-- Lib/test/test_pdb.py | 36 +++++++++++++++++++ ...-10-23-17-45-40.gh-issue-125884.41E_PD.rst | 1 + 3 files changed, 42 insertions(+), 2 deletions(-) create mode 100644 Misc/NEWS.d/next/Library/2024-10-23-17-45-40.gh-issue-125884.41E_PD.rst diff --git a/Lib/pdb.py b/Lib/pdb.py index 832213abbb98e6..3c0cbb525e28ef 100644 --- a/Lib/pdb.py +++ b/Lib/pdb.py @@ -118,7 +118,7 @@ def find_first_executable_line(code): return code.co_firstlineno def find_function(funcname, filename): - cre = re.compile(r'def\s+%s\s*[(]' % re.escape(funcname)) + cre = re.compile(r'def\s+%s(\s*\[.+\])?\s*[(]' % re.escape(funcname)) try: fp = tokenize.open(filename) except OSError: @@ -138,9 +138,12 @@ def find_function(funcname, filename): if funcdef: try: - funccode = compile(funcdef, filename, 'exec').co_consts[0] + code = compile(funcdef, filename, 'exec') except SyntaxError: continue + # We should always be able to find the code object here + funccode = next(c for c in code.co_consts if + isinstance(c, CodeType) and c.co_name == funcname) lineno_offset = find_first_executable_line(funccode) return funcname, filename, funcstart + lineno_offset - 1 return None diff --git a/Lib/test/test_pdb.py b/Lib/test/test_pdb.py index 1ea93ed037005d..e5f9848319021a 100644 --- a/Lib/test/test_pdb.py +++ b/Lib/test/test_pdb.py @@ -363,6 +363,42 @@ def test_pdb_breakpoint_commands(): 4 """ +def test_pdb_breakpoint_on_annotated_function_def(): + """Test breakpoints on function definitions with annotation. + + >>> def foo[T](): + ... return 0 + + >>> def bar() -> int: + ... return 0 + + >>> def foobar[T]() -> int: + ... return 0 + + >>> reset_Breakpoint() + + >>> def test_function(): + ... import pdb; pdb.Pdb(nosigint=True, readrc=False).set_trace() + ... pass + + >>> with PdbTestInput([ # doctest: +NORMALIZE_WHITESPACE + ... 'break foo', + ... 'break bar', + ... 'break foobar', + ... 'continue', + ... ]): + ... test_function() + > (2)test_function() + -> import pdb; pdb.Pdb(nosigint=True, readrc=False).set_trace() + (Pdb) break foo + Breakpoint 1 at :2 + (Pdb) break bar + Breakpoint 2 at :2 + (Pdb) break foobar + Breakpoint 3 at :2 + (Pdb) continue + """ + def test_pdb_commands(): """Test the commands command of pdb. diff --git a/Misc/NEWS.d/next/Library/2024-10-23-17-45-40.gh-issue-125884.41E_PD.rst b/Misc/NEWS.d/next/Library/2024-10-23-17-45-40.gh-issue-125884.41E_PD.rst new file mode 100644 index 00000000000000..684b1f282b143e --- /dev/null +++ b/Misc/NEWS.d/next/Library/2024-10-23-17-45-40.gh-issue-125884.41E_PD.rst @@ -0,0 +1 @@ +Fixed the bug for :mod:`pdb` where it can't set breakpoints on functions with certain annotations. From d3be6f945a4def7d123b2ef4d11d59abcdd3e446 Mon Sep 17 00:00:00 2001 From: Jelle Zijlstra Date: Wed, 23 Oct 2024 16:27:55 -0700 Subject: [PATCH 016/127] gh-125614: annotationlib: Fix bug where not all Stringifiers are converted (#125635) --- Lib/annotationlib.py | 28 +++++++++-- Lib/test/test_annotationlib.py | 46 +++++++++++++++++++ ...-10-16-22-45-50.gh-issue-125614.3OEo_Q.rst | 3 ++ 3 files changed, 73 insertions(+), 4 deletions(-) create mode 100644 Misc/NEWS.d/next/Library/2024-10-16-22-45-50.gh-issue-125614.3OEo_Q.rst diff --git a/Lib/annotationlib.py b/Lib/annotationlib.py index d5166170c071c4..732fbfa628cf5f 100644 --- a/Lib/annotationlib.py +++ b/Lib/annotationlib.py @@ -45,6 +45,7 @@ class Format(enum.IntEnum): "__globals__", "__owner__", "__cell__", + "__stringifier_dict__", ) @@ -268,7 +269,16 @@ class _Stringifier: # instance of the other in place. __slots__ = _SLOTS - def __init__(self, node, globals=None, owner=None, is_class=False, cell=None): + def __init__( + self, + node, + globals=None, + owner=None, + is_class=False, + cell=None, + *, + stringifier_dict, + ): # Either an AST node or a simple str (for the common case where a ForwardRef # represent a single name). assert isinstance(node, (ast.AST, str)) @@ -283,6 +293,7 @@ def __init__(self, node, globals=None, owner=None, is_class=False, cell=None): self.__globals__ = globals self.__cell__ = cell self.__owner__ = owner + self.__stringifier_dict__ = stringifier_dict def __convert_to_ast(self, other): if isinstance(other, _Stringifier): @@ -317,9 +328,15 @@ def __get_ast(self): return node def __make_new(self, node): - return _Stringifier( - node, self.__globals__, self.__owner__, self.__forward_is_class__ + stringifier = _Stringifier( + node, + self.__globals__, + self.__owner__, + self.__forward_is_class__, + stringifier_dict=self.__stringifier_dict__, ) + self.__stringifier_dict__.stringifiers.append(stringifier) + return stringifier # Must implement this since we set __eq__. We hash by identity so that # stringifiers in dict keys are kept separate. @@ -462,6 +479,7 @@ def __missing__(self, key): globals=self.globals, owner=self.owner, is_class=self.is_class, + stringifier_dict=self, ) self.stringifiers.append(fwdref) return fwdref @@ -516,7 +534,7 @@ def call_annotate_function(annotate, format, *, owner=None, _is_evaluate=False): name = freevars[i] else: name = "__cell__" - fwdref = _Stringifier(name) + fwdref = _Stringifier(name, stringifier_dict=globals) new_closure.append(types.CellType(fwdref)) closure = tuple(new_closure) else: @@ -573,6 +591,7 @@ def call_annotate_function(annotate, format, *, owner=None, _is_evaluate=False): owner=owner, globals=annotate.__globals__, is_class=is_class, + stringifier_dict=globals, ) globals.stringifiers.append(fwdref) new_closure.append(types.CellType(fwdref)) @@ -591,6 +610,7 @@ def call_annotate_function(annotate, format, *, owner=None, _is_evaluate=False): result = func(Format.VALUE) for obj in globals.stringifiers: obj.__class__ = ForwardRef + obj.__stringifier_dict__ = None # not needed for ForwardRef if isinstance(obj.__ast_node__, str): obj.__arg__ = obj.__ast_node__ obj.__ast_node__ = None diff --git a/Lib/test/test_annotationlib.py b/Lib/test/test_annotationlib.py index eedf2506a14912..2ca7058c14398c 100644 --- a/Lib/test/test_annotationlib.py +++ b/Lib/test/test_annotationlib.py @@ -80,6 +80,42 @@ def f(x: int, y: doesntexist): fwdref.evaluate() self.assertEqual(fwdref.evaluate(globals={"doesntexist": 1}), 1) + def test_nonexistent_attribute(self): + def f( + x: some.module, + y: some[module], + z: some(module), + alpha: some | obj, + beta: +some, + gamma: some < obj, + ): + pass + + anno = annotationlib.get_annotations(f, format=Format.FORWARDREF) + x_anno = anno["x"] + self.assertIsInstance(x_anno, ForwardRef) + self.assertEqual(x_anno, ForwardRef("some.module")) + + y_anno = anno["y"] + self.assertIsInstance(y_anno, ForwardRef) + self.assertEqual(y_anno, ForwardRef("some[module]")) + + z_anno = anno["z"] + self.assertIsInstance(z_anno, ForwardRef) + self.assertEqual(z_anno, ForwardRef("some(module)")) + + alpha_anno = anno["alpha"] + self.assertIsInstance(alpha_anno, ForwardRef) + self.assertEqual(alpha_anno, ForwardRef("some | obj")) + + beta_anno = anno["beta"] + self.assertIsInstance(beta_anno, ForwardRef) + self.assertEqual(beta_anno, ForwardRef("+some")) + + gamma_anno = anno["gamma"] + self.assertIsInstance(gamma_anno, ForwardRef) + self.assertEqual(gamma_anno, ForwardRef("some < obj")) + class TestSourceFormat(unittest.TestCase): def test_closure(self): @@ -91,6 +127,16 @@ def inner(arg: x): anno = annotationlib.get_annotations(inner, format=Format.STRING) self.assertEqual(anno, {"arg": "x"}) + def test_closure_undefined(self): + if False: + x = 0 + + def inner(arg: x): + pass + + anno = annotationlib.get_annotations(inner, format=Format.STRING) + self.assertEqual(anno, {"arg": "x"}) + def test_function(self): def f(x: int, y: doesntexist): pass diff --git a/Misc/NEWS.d/next/Library/2024-10-16-22-45-50.gh-issue-125614.3OEo_Q.rst b/Misc/NEWS.d/next/Library/2024-10-16-22-45-50.gh-issue-125614.3OEo_Q.rst new file mode 100644 index 00000000000000..5f4803c9b74578 --- /dev/null +++ b/Misc/NEWS.d/next/Library/2024-10-16-22-45-50.gh-issue-125614.3OEo_Q.rst @@ -0,0 +1,3 @@ +In the :data:`~annotationlib.Format.FORWARDREF` format of +:mod:`annotationlib`, fix bug where nested expressions were not returned as +:class:`annotationlib.ForwardRef` format. From c35b33bfb7c491dfbdd40195d70dcfc4618265db Mon Sep 17 00:00:00 2001 From: Marat Sharafutdinov Date: Thu, 24 Oct 2024 05:04:49 +0300 Subject: [PATCH 017/127] Fix typo in garbage_collector.md (#125556) --- InternalDocs/garbage_collector.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/InternalDocs/garbage_collector.md b/InternalDocs/garbage_collector.md index a6ee5c09e19efd..d624cf4befd31a 100644 --- a/InternalDocs/garbage_collector.md +++ b/InternalDocs/garbage_collector.md @@ -56,7 +56,7 @@ Starting in version 3.13, CPython contains two GC implementations: performing a collection for thread safety. Both implementations use the same basic algorithms, but operate on different -data structures. The the section on +data structures. See the section on [Differences between GC implementations](#Differences-between-GC-implementations) for the details. From b61fece8523d0fa6d9cc6ad3fd855a136c34f0cd Mon Sep 17 00:00:00 2001 From: Mark Shannon Date: Thu, 24 Oct 2024 11:57:02 +0100 Subject: [PATCH 018/127] GH-125868: Fix STORE_ATTR_WITH_HINT specialization (GH-125876) --- Lib/dis.py | 4 +- Lib/test/test_opcache.py | 44 +++++++++++++++++++ ...-10-23-14-05-47.gh-issue-125868.uLfXYB.rst | 3 ++ Python/bytecodes.c | 7 ++- Python/executor_cases.c.h | 10 +++-- Python/generated_cases.c.h | 7 ++- 6 files changed, 62 insertions(+), 13 deletions(-) create mode 100644 Misc/NEWS.d/next/Core_and_Builtins/2024-10-23-14-05-47.gh-issue-125868.uLfXYB.rst diff --git a/Lib/dis.py b/Lib/dis.py index e87e6a78469ab0..db69848e9ab8ee 100644 --- a/Lib/dis.py +++ b/Lib/dis.py @@ -778,8 +778,10 @@ def _get_instructions_bytes(code, linestarts=None, line_offset=0, co_positions=N if caches: cache_info = [] + cache_offset = offset for name, size in _cache_format[opname[deop]].items(): - data = code[offset + 2: offset + 2 + 2 * size] + data = code[cache_offset + 2: cache_offset + 2 + 2 * size] + cache_offset += size * 2 cache_info.append((name, size, data)) else: cache_info = None diff --git a/Lib/test/test_opcache.py b/Lib/test/test_opcache.py index acf8158b0d0ea1..cdcddb0d717f23 100644 --- a/Lib/test/test_opcache.py +++ b/Lib/test/test_opcache.py @@ -1155,6 +1155,50 @@ class D(dict): pass {'a':1, 'b':2} ) + def test_125868(self): + + def make_special_dict(): + """Create a dictionary an object with a this table: + index | key | value + ----- | --- | ----- + 0 | 'b' | 'value' + 1 | 'b' | NULL + """ + class A: + pass + a = A() + a.a = 1 + a.b = 2 + d = a.__dict__.copy() + del d['a'] + del d['b'] + d['b'] = "value" + return d + + class NoInlineAorB: + pass + for i in range(ord('c'), ord('z')): + setattr(NoInlineAorB(), chr(i), i) + + c = NoInlineAorB() + c.a = 0 + c.b = 1 + self.assertFalse(_testinternalcapi.has_inline_values(c)) + + def f(o, n): + for i in range(n): + o.b = i + # Prime f to store to dict slot 1 + f(c, 100) + + test_obj = NoInlineAorB() + test_obj.__dict__ = make_special_dict() + self.assertEqual(test_obj.b, "value") + + #This should set x.b = 0 + f(test_obj, 1) + self.assertEqual(test_obj.b, 0) + if __name__ == "__main__": unittest.main() diff --git a/Misc/NEWS.d/next/Core_and_Builtins/2024-10-23-14-05-47.gh-issue-125868.uLfXYB.rst b/Misc/NEWS.d/next/Core_and_Builtins/2024-10-23-14-05-47.gh-issue-125868.uLfXYB.rst new file mode 100644 index 00000000000000..dea250e7166ec6 --- /dev/null +++ b/Misc/NEWS.d/next/Core_and_Builtins/2024-10-23-14-05-47.gh-issue-125868.uLfXYB.rst @@ -0,0 +1,3 @@ +It was possible in 3.14.0a1 only for attribute lookup to give the wrong +value. This was due to an incorrect specialization in very specific +circumstances. This is fixed in 3.14.0a2. diff --git a/Python/bytecodes.c b/Python/bytecodes.c index 62e9b5ddd1584c..eaf2537fa07d27 100644 --- a/Python/bytecodes.c +++ b/Python/bytecodes.c @@ -2303,17 +2303,16 @@ dummy_func( assert(PyDict_CheckExact((PyObject *)dict)); PyObject *name = GETITEM(FRAME_CO_NAMES, oparg); DEOPT_IF(hint >= (size_t)dict->ma_keys->dk_nentries); - PyObject *old_value; DEOPT_IF(!DK_IS_UNICODE(dict->ma_keys)); PyDictUnicodeEntry *ep = DK_UNICODE_ENTRIES(dict->ma_keys) + hint; DEOPT_IF(ep->me_key != name); + PyObject *old_value = ep->me_value; + DEOPT_IF(old_value == NULL); /* Ensure dict is GC tracked if it needs to be */ if (!_PyObject_GC_IS_TRACKED(dict) && _PyObject_GC_MAY_BE_TRACKED(PyStackRef_AsPyObjectBorrow(value))) { _PyObject_GC_TRACK(dict); } - old_value = ep->me_value; - PyDict_WatchEvent event = old_value == NULL ? PyDict_EVENT_ADDED : PyDict_EVENT_MODIFIED; - _PyDict_NotifyEvent(tstate->interp, event, dict, name, PyStackRef_AsPyObjectBorrow(value)); + _PyDict_NotifyEvent(tstate->interp, PyDict_EVENT_MODIFIED, dict, name, PyStackRef_AsPyObjectBorrow(value)); ep->me_value = PyStackRef_AsPyObjectSteal(value); // old_value should be DECREFed after GC track checking is done, if not, it could raise a segmentation fault, // when dict only holds the strong reference to value in ep->me_value. diff --git a/Python/executor_cases.c.h b/Python/executor_cases.c.h index 5df4986cd838b5..3a7015ccb78987 100644 --- a/Python/executor_cases.c.h +++ b/Python/executor_cases.c.h @@ -2815,7 +2815,6 @@ UOP_STAT_INC(uopcode, miss); JUMP_TO_JUMP_TARGET(); } - PyObject *old_value; if (!DK_IS_UNICODE(dict->ma_keys)) { UOP_STAT_INC(uopcode, miss); JUMP_TO_JUMP_TARGET(); @@ -2825,14 +2824,17 @@ UOP_STAT_INC(uopcode, miss); JUMP_TO_JUMP_TARGET(); } + PyObject *old_value = ep->me_value; + if (old_value == NULL) { + UOP_STAT_INC(uopcode, miss); + JUMP_TO_JUMP_TARGET(); + } /* Ensure dict is GC tracked if it needs to be */ if (!_PyObject_GC_IS_TRACKED(dict) && _PyObject_GC_MAY_BE_TRACKED(PyStackRef_AsPyObjectBorrow(value))) { _PyObject_GC_TRACK(dict); } - old_value = ep->me_value; - PyDict_WatchEvent event = old_value == NULL ? PyDict_EVENT_ADDED : PyDict_EVENT_MODIFIED; _PyFrame_SetStackPointer(frame, stack_pointer); - _PyDict_NotifyEvent(tstate->interp, event, dict, name, PyStackRef_AsPyObjectBorrow(value)); + _PyDict_NotifyEvent(tstate->interp, PyDict_EVENT_MODIFIED, dict, name, PyStackRef_AsPyObjectBorrow(value)); stack_pointer = _PyFrame_GetStackPointer(frame); ep->me_value = PyStackRef_AsPyObjectSteal(value); // old_value should be DECREFed after GC track checking is done, if not, it could raise a segmentation fault, diff --git a/Python/generated_cases.c.h b/Python/generated_cases.c.h index efbf2fba8c3106..f658ae503cd70e 100644 --- a/Python/generated_cases.c.h +++ b/Python/generated_cases.c.h @@ -7443,18 +7443,17 @@ assert(PyDict_CheckExact((PyObject *)dict)); PyObject *name = GETITEM(FRAME_CO_NAMES, oparg); DEOPT_IF(hint >= (size_t)dict->ma_keys->dk_nentries, STORE_ATTR); - PyObject *old_value; DEOPT_IF(!DK_IS_UNICODE(dict->ma_keys), STORE_ATTR); PyDictUnicodeEntry *ep = DK_UNICODE_ENTRIES(dict->ma_keys) + hint; DEOPT_IF(ep->me_key != name, STORE_ATTR); + PyObject *old_value = ep->me_value; + DEOPT_IF(old_value == NULL, STORE_ATTR); /* Ensure dict is GC tracked if it needs to be */ if (!_PyObject_GC_IS_TRACKED(dict) && _PyObject_GC_MAY_BE_TRACKED(PyStackRef_AsPyObjectBorrow(value))) { _PyObject_GC_TRACK(dict); } - old_value = ep->me_value; - PyDict_WatchEvent event = old_value == NULL ? PyDict_EVENT_ADDED : PyDict_EVENT_MODIFIED; _PyFrame_SetStackPointer(frame, stack_pointer); - _PyDict_NotifyEvent(tstate->interp, event, dict, name, PyStackRef_AsPyObjectBorrow(value)); + _PyDict_NotifyEvent(tstate->interp, PyDict_EVENT_MODIFIED, dict, name, PyStackRef_AsPyObjectBorrow(value)); stack_pointer = _PyFrame_GetStackPointer(frame); ep->me_value = PyStackRef_AsPyObjectSteal(value); // old_value should be DECREFed after GC track checking is done, if not, it could raise a segmentation fault, From e545ead66ce725aae6fb0ad5d733abe806c19750 Mon Sep 17 00:00:00 2001 From: Sam Gross Date: Thu, 24 Oct 2024 09:33:11 -0400 Subject: [PATCH 019/127] gh-125859: Fix crash when `gc.get_objects` is called during GC (#125882) This fixes a crash when `gc.get_objects()` or `gc.get_referrers()` is called during a GC in the free threading build. Switch to `_PyObjectStack` to avoid corrupting the `struct worklist` linked list maintained by the GC. Also, don't return objects that are frozen (`gc.freeze()`) or in the process of being collected to more closely match the behavior of the default build. --- Include/internal/pycore_object_stack.h | 10 ++ Lib/test/test_free_threading/test_gc.py | 61 ++++++++ Lib/test/test_gc.py | 23 +++ ...-10-23-14-42-27.gh-issue-125859.m3EF9E.rst | 2 + Python/gc_free_threading.c | 137 ++++++++---------- 5 files changed, 160 insertions(+), 73 deletions(-) create mode 100644 Lib/test/test_free_threading/test_gc.py create mode 100644 Misc/NEWS.d/next/Core_and_Builtins/2024-10-23-14-42-27.gh-issue-125859.m3EF9E.rst diff --git a/Include/internal/pycore_object_stack.h b/Include/internal/pycore_object_stack.h index c607ea8bc52545..39e69b7cde52a1 100644 --- a/Include/internal/pycore_object_stack.h +++ b/Include/internal/pycore_object_stack.h @@ -71,6 +71,16 @@ _PyObjectStack_Pop(_PyObjectStack *stack) return obj; } +static inline Py_ssize_t +_PyObjectStack_Size(_PyObjectStack *stack) +{ + Py_ssize_t size = 0; + for (_PyObjectStackChunk *buf = stack->head; buf != NULL; buf = buf->prev) { + size += buf->n; + } + return size; +} + // Merge src into dst, leaving src empty extern void _PyObjectStack_Merge(_PyObjectStack *dst, _PyObjectStack *src); diff --git a/Lib/test/test_free_threading/test_gc.py b/Lib/test/test_free_threading/test_gc.py new file mode 100644 index 00000000000000..401067fe9c612c --- /dev/null +++ b/Lib/test/test_free_threading/test_gc.py @@ -0,0 +1,61 @@ +import unittest + +import threading +from threading import Thread +from unittest import TestCase +import gc + +from test.support import threading_helper + + +class MyObj: + pass + + +@threading_helper.requires_working_threading() +class TestGC(TestCase): + def test_get_objects(self): + event = threading.Event() + + def gc_thread(): + for i in range(100): + o = gc.get_objects() + event.set() + + def mutator_thread(): + while not event.is_set(): + o1 = MyObj() + o2 = MyObj() + o3 = MyObj() + o4 = MyObj() + + gcs = [Thread(target=gc_thread)] + mutators = [Thread(target=mutator_thread) for _ in range(4)] + with threading_helper.start_threads(gcs + mutators): + pass + + def test_get_referrers(self): + event = threading.Event() + + obj = MyObj() + + def gc_thread(): + for i in range(100): + o = gc.get_referrers(obj) + event.set() + + def mutator_thread(): + while not event.is_set(): + d1 = { "key": obj } + d2 = { "key": obj } + d3 = { "key": obj } + d4 = { "key": obj } + + gcs = [Thread(target=gc_thread) for _ in range(2)] + mutators = [Thread(target=mutator_thread) for _ in range(4)] + with threading_helper.start_threads(gcs + mutators): + pass + + +if __name__ == "__main__": + unittest.main() diff --git a/Lib/test/test_gc.py b/Lib/test/test_gc.py index bb7df1f5cfa7f7..cc2b4fac05b48b 100644 --- a/Lib/test/test_gc.py +++ b/Lib/test/test_gc.py @@ -1065,6 +1065,29 @@ def test_get_referents_on_capsule(self): self.assertEqual(len(gc.get_referents(untracked_capsule)), 0) gc.get_referents(tracked_capsule) + @cpython_only + def test_get_objects_during_gc(self): + # gh-125859: Calling gc.get_objects() or gc.get_referrers() during a + # collection should not crash. + test = self + collected = False + + class GetObjectsOnDel: + def __del__(self): + nonlocal collected + collected = True + objs = gc.get_objects() + # NB: can't use "in" here because some objects override __eq__ + for obj in objs: + test.assertTrue(obj is not self) + test.assertEqual(gc.get_referrers(self), []) + + obj = GetObjectsOnDel() + obj.cycle = obj + del obj + + gc.collect() + self.assertTrue(collected) class IncrementalGCTests(unittest.TestCase): diff --git a/Misc/NEWS.d/next/Core_and_Builtins/2024-10-23-14-42-27.gh-issue-125859.m3EF9E.rst b/Misc/NEWS.d/next/Core_and_Builtins/2024-10-23-14-42-27.gh-issue-125859.m3EF9E.rst new file mode 100644 index 00000000000000..d36aa8fbe7482f --- /dev/null +++ b/Misc/NEWS.d/next/Core_and_Builtins/2024-10-23-14-42-27.gh-issue-125859.m3EF9E.rst @@ -0,0 +1,2 @@ +Fix a crash in the free threading build when :func:`gc.get_objects` or +:func:`gc.get_referrers` is called during an in-progress garbage collection. diff --git a/Python/gc_free_threading.c b/Python/gc_free_threading.c index 8558d4555a9a3a..1969ed608ea524 100644 --- a/Python/gc_free_threading.c +++ b/Python/gc_free_threading.c @@ -1401,10 +1401,32 @@ gc_collect_main(PyThreadState *tstate, int generation, _PyGC_Reason reason) return n + m; } +static PyObject * +list_from_object_stack(_PyObjectStack *stack) +{ + PyObject *list = PyList_New(_PyObjectStack_Size(stack)); + if (list == NULL) { + PyObject *op; + while ((op = _PyObjectStack_Pop(stack)) != NULL) { + Py_DECREF(op); + } + return NULL; + } + + PyObject *op; + Py_ssize_t idx = 0; + while ((op = _PyObjectStack_Pop(stack)) != NULL) { + assert(idx < PyList_GET_SIZE(list)); + PyList_SET_ITEM(list, idx++, op); + } + assert(idx == PyList_GET_SIZE(list)); + return list; +} + struct get_referrers_args { struct visitor_args base; PyObject *objs; - struct worklist results; + _PyObjectStack results; }; static int @@ -1428,11 +1450,21 @@ visit_get_referrers(const mi_heap_t *heap, const mi_heap_area_t *area, if (op == NULL) { return true; } + if (op->ob_gc_bits & (_PyGC_BITS_UNREACHABLE | _PyGC_BITS_FROZEN)) { + // Exclude unreachable objects (in-progress GC) and frozen + // objects from gc.get_objects() to match the default build. + return true; + } struct get_referrers_args *arg = (struct get_referrers_args *)args; + if (op == arg->objs) { + // Don't include the tuple itself in the referrers list. + return true; + } if (Py_TYPE(op)->tp_traverse(op, referrersvisit, arg->objs)) { - op->ob_tid = 0; // we will restore the refcount later - worklist_push(&arg->results, op); + if (_PyObjectStack_Push(&arg->results, Py_NewRef(op)) < 0) { + return false; + } } return true; @@ -1441,48 +1473,25 @@ visit_get_referrers(const mi_heap_t *heap, const mi_heap_area_t *area, PyObject * _PyGC_GetReferrers(PyInterpreterState *interp, PyObject *objs) { - PyObject *result = PyList_New(0); - if (!result) { - return NULL; - } - - _PyEval_StopTheWorld(interp); - - // Append all objects to a worklist. This abuses ob_tid. We will restore - // it later. NOTE: We can't append to the PyListObject during - // gc_visit_heaps() because PyList_Append() may reclaim an abandoned - // mimalloc segments while we are traversing them. + // NOTE: We can't append to the PyListObject during gc_visit_heaps() + // because PyList_Append() may reclaim an abandoned mimalloc segments + // while we are traversing them. struct get_referrers_args args = { .objs = objs }; - gc_visit_heaps(interp, &visit_get_referrers, &args.base); - - bool error = false; - PyObject *op; - while ((op = worklist_pop(&args.results)) != NULL) { - gc_restore_tid(op); - if (op != objs && PyList_Append(result, op) < 0) { - error = true; - break; - } - } - - // In case of error, clear the remaining worklist - while ((op = worklist_pop(&args.results)) != NULL) { - gc_restore_tid(op); - } - + _PyEval_StopTheWorld(interp); + int err = gc_visit_heaps(interp, &visit_get_referrers, &args.base); _PyEval_StartTheWorld(interp); - if (error) { - Py_DECREF(result); - return NULL; + PyObject *list = list_from_object_stack(&args.results); + if (err < 0) { + PyErr_NoMemory(); + Py_CLEAR(list); } - - return result; + return list; } struct get_objects_args { struct visitor_args base; - struct worklist objects; + _PyObjectStack objects; }; static bool @@ -1493,54 +1502,36 @@ visit_get_objects(const mi_heap_t *heap, const mi_heap_area_t *area, if (op == NULL) { return true; } + if (op->ob_gc_bits & (_PyGC_BITS_UNREACHABLE | _PyGC_BITS_FROZEN)) { + // Exclude unreachable objects (in-progress GC) and frozen + // objects from gc.get_objects() to match the default build. + return true; + } struct get_objects_args *arg = (struct get_objects_args *)args; - op->ob_tid = 0; // we will restore the refcount later - worklist_push(&arg->objects, op); - + if (_PyObjectStack_Push(&arg->objects, Py_NewRef(op)) < 0) { + return false; + } return true; } PyObject * _PyGC_GetObjects(PyInterpreterState *interp, int generation) { - PyObject *result = PyList_New(0); - if (!result) { - return NULL; - } - - _PyEval_StopTheWorld(interp); - - // Append all objects to a worklist. This abuses ob_tid. We will restore - // it later. NOTE: We can't append to the list during gc_visit_heaps() - // because PyList_Append() may reclaim an abandoned mimalloc segment - // while we are traversing it. + // NOTE: We can't append to the PyListObject during gc_visit_heaps() + // because PyList_Append() may reclaim an abandoned mimalloc segments + // while we are traversing them. struct get_objects_args args = { 0 }; - gc_visit_heaps(interp, &visit_get_objects, &args.base); - - bool error = false; - PyObject *op; - while ((op = worklist_pop(&args.objects)) != NULL) { - gc_restore_tid(op); - if (op != result && PyList_Append(result, op) < 0) { - error = true; - break; - } - } - - // In case of error, clear the remaining worklist - while ((op = worklist_pop(&args.objects)) != NULL) { - gc_restore_tid(op); - } - + _PyEval_StopTheWorld(interp); + int err = gc_visit_heaps(interp, &visit_get_objects, &args.base); _PyEval_StartTheWorld(interp); - if (error) { - Py_DECREF(result); - return NULL; + PyObject *list = list_from_object_stack(&args.objects); + if (err < 0) { + PyErr_NoMemory(); + Py_CLEAR(list); } - - return result; + return list; } static bool From ad6110a93ffa82cae71af6c78692de065d3871b5 Mon Sep 17 00:00:00 2001 From: Sam Gross Date: Thu, 24 Oct 2024 12:03:50 -0400 Subject: [PATCH 020/127] gh-125842: Fix `sys.exit(0xffff_ffff)` on Windows (#125896) On Windows, `long` is a signed 32-bit integer so it can't represent `0xffff_ffff` without overflow. Windows exit codes are unsigned 32-bit integers, so if a child process exits with `-1`, it will be represented as `0xffff_ffff`. Also fix a number of other possible cases where `_Py_HandleSystemExit` could return with an exception set, leading to a `SystemError` (or fatal error in debug builds) later on during shutdown. --- Lib/test/test_sys.py | 14 ++++ ...-10-23-17-24-23.gh-issue-125842.m3EF9E.rst | 2 + Python/pythonrun.c | 80 +++++++++++-------- 3 files changed, 63 insertions(+), 33 deletions(-) create mode 100644 Misc/NEWS.d/next/Windows/2024-10-23-17-24-23.gh-issue-125842.m3EF9E.rst diff --git a/Lib/test/test_sys.py b/Lib/test/test_sys.py index 9689ef8e96e072..c0862d7d15f39e 100644 --- a/Lib/test/test_sys.py +++ b/Lib/test/test_sys.py @@ -206,6 +206,20 @@ def test_exit(self): self.assertEqual(out, b'') self.assertEqual(err, b'') + # gh-125842: Windows uses 32-bit unsigned integers for exit codes + # so a -1 exit code is sometimes interpreted as 0xffff_ffff. + rc, out, err = assert_python_failure('-c', 'import sys; sys.exit(0xffff_ffff)') + self.assertIn(rc, (-1, 0xff, 0xffff_ffff)) + self.assertEqual(out, b'') + self.assertEqual(err, b'') + + # Overflow results in a -1 exit code, which may be converted to 0xff + # or 0xffff_ffff. + rc, out, err = assert_python_failure('-c', 'import sys; sys.exit(2**128)') + self.assertIn(rc, (-1, 0xff, 0xffff_ffff)) + self.assertEqual(out, b'') + self.assertEqual(err, b'') + # call with integer argument with self.assertRaises(SystemExit) as cm: sys.exit(42) diff --git a/Misc/NEWS.d/next/Windows/2024-10-23-17-24-23.gh-issue-125842.m3EF9E.rst b/Misc/NEWS.d/next/Windows/2024-10-23-17-24-23.gh-issue-125842.m3EF9E.rst new file mode 100644 index 00000000000000..63644721d57f5b --- /dev/null +++ b/Misc/NEWS.d/next/Windows/2024-10-23-17-24-23.gh-issue-125842.m3EF9E.rst @@ -0,0 +1,2 @@ +Fix a :exc:`SystemError` when :func:`sys.exit` is called with ``0xffffffff`` +on Windows. diff --git a/Python/pythonrun.c b/Python/pythonrun.c index fc0f11bc4e8af4..8b57018321c070 100644 --- a/Python/pythonrun.c +++ b/Python/pythonrun.c @@ -564,6 +564,30 @@ PyRun_SimpleStringFlags(const char *command, PyCompilerFlags *flags) return _PyRun_SimpleStringFlagsWithName(command, NULL, flags); } +static int +parse_exit_code(PyObject *code, int *exitcode_p) +{ + if (PyLong_Check(code)) { + // gh-125842: Use a long long to avoid an overflow error when `long` + // is 32-bit. We still truncate the result to an int. + int exitcode = (int)PyLong_AsLongLong(code); + if (exitcode == -1 && PyErr_Occurred()) { + // On overflow or other error, clear the exception and use -1 + // as the exit code to match historical Python behavior. + PyErr_Clear(); + *exitcode_p = -1; + return 1; + } + *exitcode_p = exitcode; + return 1; + } + else if (code == Py_None) { + *exitcode_p = 0; + return 1; + } + return 0; +} + int _Py_HandleSystemExit(int *exitcode_p) { @@ -580,50 +604,40 @@ _Py_HandleSystemExit(int *exitcode_p) fflush(stdout); - int exitcode = 0; - PyObject *exc = PyErr_GetRaisedException(); - if (exc == NULL) { - goto done; - } - assert(PyExceptionInstance_Check(exc)); + assert(exc != NULL && PyExceptionInstance_Check(exc)); - /* The error code should be in the `code' attribute. */ PyObject *code = PyObject_GetAttr(exc, &_Py_ID(code)); - if (code) { + if (code == NULL) { + // If the exception has no 'code' attribute, print the exception below + PyErr_Clear(); + } + else if (parse_exit_code(code, exitcode_p)) { + Py_DECREF(code); + Py_CLEAR(exc); + return 1; + } + else { + // If code is not an int or None, print it below Py_SETREF(exc, code); - if (exc == Py_None) { - goto done; - } } - /* If we failed to dig out the 'code' attribute, - * just let the else clause below print the error. - */ - if (PyLong_Check(exc)) { - exitcode = (int)PyLong_AsLong(exc); + PyThreadState *tstate = _PyThreadState_GET(); + PyObject *sys_stderr = _PySys_GetAttr(tstate, &_Py_ID(stderr)); + if (sys_stderr != NULL && sys_stderr != Py_None) { + if (PyFile_WriteObject(exc, sys_stderr, Py_PRINT_RAW) < 0) { + PyErr_Clear(); + } } else { - PyThreadState *tstate = _PyThreadState_GET(); - PyObject *sys_stderr = _PySys_GetAttr(tstate, &_Py_ID(stderr)); - /* We clear the exception here to avoid triggering the assertion - * in PyObject_Str that ensures it won't silently lose exception - * details. - */ - PyErr_Clear(); - if (sys_stderr != NULL && sys_stderr != Py_None) { - PyFile_WriteObject(exc, sys_stderr, Py_PRINT_RAW); - } else { - PyObject_Print(exc, stderr, Py_PRINT_RAW); - fflush(stderr); + if (PyObject_Print(exc, stderr, Py_PRINT_RAW) < 0) { + PyErr_Clear(); } - PySys_WriteStderr("\n"); - exitcode = 1; + fflush(stderr); } - -done: + PySys_WriteStderr("\n"); Py_CLEAR(exc); - *exitcode_p = exitcode; + *exitcode_p = 1; return 1; } From 5003ad5c5ea508f0dde1b374cd8bc6a481ad5c5d Mon Sep 17 00:00:00 2001 From: partev Date: Thu, 24 Oct 2024 12:41:01 -0400 Subject: [PATCH 021/127] gh-125909: Avoid a redirect when linking to the devguide (#125826) --- Doc/tools/templates/indexcontent.html | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Doc/tools/templates/indexcontent.html b/Doc/tools/templates/indexcontent.html index f2e9fbb0106452..2686f48dad2a95 100644 --- a/Doc/tools/templates/indexcontent.html +++ b/Doc/tools/templates/indexcontent.html @@ -59,7 +59,7 @@

{{ docstitle|e }}

- + From 3c4a7fa6178d852ccb73527aaa2d0a5e93022e89 Mon Sep 17 00:00:00 2001 From: Sam Gross Date: Thu, 24 Oct 2024 12:44:38 -0400 Subject: [PATCH 022/127] gh-124218: Avoid refcount contention on builtins module (GH-125847) This replaces `_PyEval_BuiltinsFromGlobals` with `_PyDict_LoadBuiltinsFromGlobals`, which returns a new reference instead of a borrowed reference. Internally, the new function uses per-thread reference counting when possible to avoid contention on the refcount fields on the builtins module. --- Include/internal/pycore_ceval.h | 3 --- Include/internal/pycore_dict.h | 29 ++++++++++++++++++++++++++++ Objects/dictobject.c | 34 +++++++++++++++++++++++++++++++++ Objects/frameobject.c | 24 ++++------------------- Objects/funcobject.c | 25 +++--------------------- Python/ceval.c | 6 ++++-- 6 files changed, 74 insertions(+), 47 deletions(-) diff --git a/Include/internal/pycore_ceval.h b/Include/internal/pycore_ceval.h index cff2b1f7114793..411bbff106dd69 100644 --- a/Include/internal/pycore_ceval.h +++ b/Include/internal/pycore_ceval.h @@ -83,9 +83,6 @@ extern void _PyEval_Fini(void); extern PyObject* _PyEval_GetBuiltins(PyThreadState *tstate); -extern PyObject* _PyEval_BuiltinsFromGlobals( - PyThreadState *tstate, - PyObject *globals); // Trampoline API diff --git a/Include/internal/pycore_dict.h b/Include/internal/pycore_dict.h index 1d185559b3ef43..c5399ad8e0497f 100644 --- a/Include/internal/pycore_dict.h +++ b/Include/internal/pycore_dict.h @@ -108,6 +108,9 @@ extern Py_ssize_t _PyDictKeys_StringLookup(PyDictKeysObject* dictkeys, PyObject PyAPI_FUNC(PyObject *)_PyDict_LoadGlobal(PyDictObject *, PyDictObject *, PyObject *); PyAPI_FUNC(void) _PyDict_LoadGlobalStackRef(PyDictObject *, PyDictObject *, PyObject *, _PyStackRef *); +// Loads the __builtins__ object from the globals dict. Returns a new reference. +extern PyObject *_PyDict_LoadBuiltinsFromGlobals(PyObject *globals); + /* Consumes references to key and value */ PyAPI_FUNC(int) _PyDict_SetItem_Take2(PyDictObject *op, PyObject *key, PyObject *value); extern int _PyDict_SetItem_LockHeld(PyDictObject *dict, PyObject *name, PyObject *value); @@ -318,6 +321,8 @@ PyDictObject *_PyObject_MaterializeManagedDict_LockHeld(PyObject *); #ifndef Py_GIL_DISABLED # define _Py_INCREF_DICT Py_INCREF # define _Py_DECREF_DICT Py_DECREF +# define _Py_INCREF_BUILTINS Py_INCREF +# define _Py_DECREF_BUILTINS Py_DECREF #else static inline Py_ssize_t _PyDict_UniqueId(PyDictObject *mp) @@ -341,6 +346,30 @@ _Py_DECREF_DICT(PyObject *op) Py_ssize_t id = _PyDict_UniqueId((PyDictObject *)op); _Py_THREAD_DECREF_OBJECT(op, id); } + +// Like `_Py_INCREF_DICT`, but also handles non-dict objects because builtins +// may not be a dict. +static inline void +_Py_INCREF_BUILTINS(PyObject *op) +{ + if (PyDict_CheckExact(op)) { + _Py_INCREF_DICT(op); + } + else { + Py_INCREF(op); + } +} + +static inline void +_Py_DECREF_BUILTINS(PyObject *op) +{ + if (PyDict_CheckExact(op)) { + _Py_DECREF_DICT(op); + } + else { + Py_DECREF(op); + } +} #endif #ifdef __cplusplus diff --git a/Objects/dictobject.c b/Objects/dictobject.c index 3134f6141dc9be..68ba2f74fdc67a 100644 --- a/Objects/dictobject.c +++ b/Objects/dictobject.c @@ -2511,6 +2511,40 @@ _PyDict_LoadGlobalStackRef(PyDictObject *globals, PyDictObject *builtins, PyObje assert(ix >= 0 || PyStackRef_IsNull(*res)); } +PyObject * +_PyDict_LoadBuiltinsFromGlobals(PyObject *globals) +{ + if (!PyDict_Check(globals)) { + PyErr_BadInternalCall(); + return NULL; + } + + PyDictObject *mp = (PyDictObject *)globals; + PyObject *key = &_Py_ID(__builtins__); + Py_hash_t hash = unicode_get_hash(key); + + // Use the stackref variant to avoid reference count contention on the + // builtins module in the free threading build. It's important not to + // make any escaping calls between the lookup and the `PyStackRef_CLOSE()` + // because the `ref` is not visible to the GC. + _PyStackRef ref; + Py_ssize_t ix = _Py_dict_lookup_threadsafe_stackref(mp, key, hash, &ref); + if (ix == DKIX_ERROR) { + return NULL; + } + if (PyStackRef_IsNull(ref)) { + return Py_NewRef(PyEval_GetBuiltins()); + } + PyObject *builtins = PyStackRef_AsPyObjectBorrow(ref); + if (PyModule_Check(builtins)) { + builtins = _PyModule_GetDict(builtins); + assert(builtins != NULL); + } + _Py_INCREF_BUILTINS(builtins); + PyStackRef_CLOSE(ref); + return builtins; +} + /* Consumes references to key and value */ static int setitem_take2_lock_held(PyDictObject *mp, PyObject *key, PyObject *value) diff --git a/Objects/frameobject.c b/Objects/frameobject.c index 5ef48919a081be..af2a2ef18e627a 100644 --- a/Objects/frameobject.c +++ b/Objects/frameobject.c @@ -1,8 +1,9 @@ /* Frame object implementation */ #include "Python.h" -#include "pycore_ceval.h" // _PyEval_BuiltinsFromGlobals() +#include "pycore_ceval.h" // _PyEval_SetOpcodeTrace() #include "pycore_code.h" // CO_FAST_LOCAL, etc. +#include "pycore_dict.h" // _PyDict_LoadBuiltinsFromGlobals() #include "pycore_function.h" // _PyFunction_FromConstructor() #include "pycore_moduleobject.h" // _PyModule_GetDict() #include "pycore_modsupport.h" // _PyArg_CheckPositional() @@ -1899,7 +1900,7 @@ PyFrameObject* PyFrame_New(PyThreadState *tstate, PyCodeObject *code, PyObject *globals, PyObject *locals) { - PyObject *builtins = _PyEval_BuiltinsFromGlobals(tstate, globals); // borrowed ref + PyObject *builtins = _PyDict_LoadBuiltinsFromGlobals(globals); if (builtins == NULL) { return NULL; } @@ -1914,6 +1915,7 @@ PyFrame_New(PyThreadState *tstate, PyCodeObject *code, .fc_closure = NULL }; PyFunctionObject *func = _PyFunction_FromConstructor(&desc); + _Py_DECREF_BUILTINS(builtins); if (func == NULL) { return NULL; } @@ -2204,21 +2206,3 @@ PyFrame_GetGenerator(PyFrameObject *frame) PyGenObject *gen = _PyGen_GetGeneratorFromFrame(frame->f_frame); return Py_NewRef(gen); } - -PyObject* -_PyEval_BuiltinsFromGlobals(PyThreadState *tstate, PyObject *globals) -{ - PyObject *builtins = PyDict_GetItemWithError(globals, &_Py_ID(__builtins__)); - if (builtins) { - if (PyModule_Check(builtins)) { - builtins = _PyModule_GetDict(builtins); - assert(builtins != NULL); - } - return builtins; - } - if (PyErr_Occurred()) { - return NULL; - } - - return _PyEval_GetBuiltins(tstate); -} diff --git a/Objects/funcobject.c b/Objects/funcobject.c index 44fb4ac0907d7b..e72a7d98c0a79e 100644 --- a/Objects/funcobject.c +++ b/Objects/funcobject.c @@ -2,7 +2,6 @@ /* Function object implementation */ #include "Python.h" -#include "pycore_ceval.h" // _PyEval_BuiltinsFromGlobals() #include "pycore_dict.h" // _Py_INCREF_DICT() #include "pycore_long.h" // _PyLong_GetOne() #include "pycore_modsupport.h" // _PyArg_NoKeywords() @@ -115,12 +114,7 @@ _PyFunction_FromConstructor(PyFrameConstructor *constr) } _Py_INCREF_DICT(constr->fc_globals); op->func_globals = constr->fc_globals; - if (PyDict_Check(constr->fc_builtins)) { - _Py_INCREF_DICT(constr->fc_builtins); - } - else { - Py_INCREF(constr->fc_builtins); - } + _Py_INCREF_BUILTINS(constr->fc_builtins); op->func_builtins = constr->fc_builtins; op->func_name = Py_NewRef(constr->fc_name); op->func_qualname = Py_NewRef(constr->fc_qualname); @@ -153,8 +147,6 @@ PyFunction_NewWithQualName(PyObject *code, PyObject *globals, PyObject *qualname assert(PyDict_Check(globals)); _Py_INCREF_DICT(globals); - PyThreadState *tstate = _PyThreadState_GET(); - PyCodeObject *code_obj = (PyCodeObject *)code; _Py_INCREF_CODE(code_obj); @@ -188,16 +180,10 @@ PyFunction_NewWithQualName(PyObject *code, PyObject *globals, PyObject *qualname goto error; } - builtins = _PyEval_BuiltinsFromGlobals(tstate, globals); // borrowed ref + builtins = _PyDict_LoadBuiltinsFromGlobals(globals); if (builtins == NULL) { goto error; } - if (PyDict_Check(builtins)) { - _Py_INCREF_DICT(builtins); - } - else { - Py_INCREF(builtins); - } PyFunctionObject *op = PyObject_GC_New(PyFunctionObject, &PyFunction_Type); if (op == NULL) { @@ -1078,12 +1064,7 @@ func_clear(PyObject *self) PyObject *builtins = op->func_builtins; op->func_builtins = NULL; if (builtins != NULL) { - if (PyDict_Check(builtins)) { - _Py_DECREF_DICT(builtins); - } - else { - Py_DECREF(builtins); - } + _Py_DECREF_BUILTINS(builtins); } Py_CLEAR(op->func_module); Py_CLEAR(op->func_defaults); diff --git a/Python/ceval.c b/Python/ceval.c index ca75646b585f07..ece7ef1d32048f 100644 --- a/Python/ceval.c +++ b/Python/ceval.c @@ -639,7 +639,7 @@ PyEval_EvalCode(PyObject *co, PyObject *globals, PyObject *locals) if (locals == NULL) { locals = globals; } - PyObject *builtins = _PyEval_BuiltinsFromGlobals(tstate, globals); // borrowed ref + PyObject *builtins = _PyDict_LoadBuiltinsFromGlobals(globals); if (builtins == NULL) { return NULL; } @@ -654,6 +654,7 @@ PyEval_EvalCode(PyObject *co, PyObject *globals, PyObject *locals) .fc_closure = NULL }; PyFunctionObject *func = _PyFunction_FromConstructor(&desc); + _Py_DECREF_BUILTINS(builtins); if (func == NULL) { return NULL; } @@ -1899,7 +1900,7 @@ PyEval_EvalCodeEx(PyObject *_co, PyObject *globals, PyObject *locals, if (defaults == NULL) { return NULL; } - PyObject *builtins = _PyEval_BuiltinsFromGlobals(tstate, globals); // borrowed ref + PyObject *builtins = _PyDict_LoadBuiltinsFromGlobals(globals); if (builtins == NULL) { Py_DECREF(defaults); return NULL; @@ -1954,6 +1955,7 @@ PyEval_EvalCodeEx(PyObject *_co, PyObject *globals, PyObject *locals, Py_XDECREF(func); Py_XDECREF(kwnames); PyMem_Free(newargs); + _Py_DECREF_BUILTINS(builtins); Py_DECREF(defaults); return res; } From 41bd9d959ccdb1095b6662b903bb3cbd2a47087b Mon Sep 17 00:00:00 2001 From: Peter Bierma Date: Thu, 24 Oct 2024 12:51:45 -0400 Subject: [PATCH 023/127] gh-125864: Propagate `pickle.loads()` failures in `InterpreterPoolExecutor` (gh-125898) Authored-by: Peter Bierma --- Lib/concurrent/futures/interpreter.py | 3 ++- .../test_interpreter_pool.py | 18 ++++++++++++++++++ 2 files changed, 20 insertions(+), 1 deletion(-) diff --git a/Lib/concurrent/futures/interpreter.py b/Lib/concurrent/futures/interpreter.py index fd7941adb766bb..d17688dc9d7346 100644 --- a/Lib/concurrent/futures/interpreter.py +++ b/Lib/concurrent/futures/interpreter.py @@ -107,7 +107,8 @@ def _call(cls, func, args, kwargs, resultsid): @classmethod def _call_pickled(cls, pickled, resultsid): - fn, args, kwargs = pickle.loads(pickled) + with cls._capture_exc(resultsid): + fn, args, kwargs = pickle.loads(pickled) cls._call(fn, args, kwargs, resultsid) def __init__(self, initdata, shared=None): diff --git a/Lib/test/test_concurrent_futures/test_interpreter_pool.py b/Lib/test/test_concurrent_futures/test_interpreter_pool.py index 5264b1bb6e9c75..ea1512fc830d0c 100644 --- a/Lib/test/test_concurrent_futures/test_interpreter_pool.py +++ b/Lib/test/test_concurrent_futures/test_interpreter_pool.py @@ -56,6 +56,16 @@ def pipe(self): return r, w +class PickleShenanigans: + """Succeeds with pickle.dumps(), but fails with pickle.loads()""" + def __init__(self, value): + if value == 1: + raise RuntimeError("gotcha") + + def __reduce__(self): + return (self.__class__, (1,)) + + class InterpreterPoolExecutorTest( InterpretersMixin, ExecutorTest, BaseTestCase): @@ -279,6 +289,14 @@ def test_idle_thread_reuse(self): self.assertEqual(len(executor._threads), 1) executor.shutdown(wait=True) + def test_pickle_errors_propagate(self): + # GH-125864: Pickle errors happen before the script tries to execute, so the + # queue used to wait infinitely. + + fut = self.executor.submit(PickleShenanigans(0)) + with self.assertRaisesRegex(RuntimeError, "gotcha"): + fut.result() + class AsyncioTest(InterpretersMixin, testasyncio_utils.TestCase): From 3f24bde0b6689b8f05872a8118a97908b5a94659 Mon Sep 17 00:00:00 2001 From: Javad Shafique Date: Thu, 24 Oct 2024 19:41:16 +0200 Subject: [PATCH 024/127] gh-118950: Fix SSLProtocol.connection_lost not being called when OSError is thrown (#118960) Co-authored-by: Kumar Aditya --- Lib/asyncio/sslproto.py | 5 +- Lib/test/test_asyncio/test_sslproto.py | 48 +++++++++++++++++++ ...-05-12-03-10-36.gh-issue-118950.5Wc4vp.rst | 1 + 3 files changed, 53 insertions(+), 1 deletion(-) create mode 100644 Misc/NEWS.d/next/Core and Builtins/2024-05-12-03-10-36.gh-issue-118950.5Wc4vp.rst diff --git a/Lib/asyncio/sslproto.py b/Lib/asyncio/sslproto.py index fa99d4533aa0a6..74c5f0d5ca0609 100644 --- a/Lib/asyncio/sslproto.py +++ b/Lib/asyncio/sslproto.py @@ -101,7 +101,7 @@ def get_protocol(self): return self._ssl_protocol._app_protocol def is_closing(self): - return self._closed + return self._closed or self._ssl_protocol._is_transport_closing() def close(self): """Close the transport. @@ -379,6 +379,9 @@ def _get_app_transport(self): self._app_transport_created = True return self._app_transport + def _is_transport_closing(self): + return self._transport is not None and self._transport.is_closing() + def connection_made(self, transport): """Called when the low-level connection is made. diff --git a/Lib/test/test_asyncio/test_sslproto.py b/Lib/test/test_asyncio/test_sslproto.py index f5f0afeab51c9e..761904c5146b6a 100644 --- a/Lib/test/test_asyncio/test_sslproto.py +++ b/Lib/test/test_asyncio/test_sslproto.py @@ -109,6 +109,54 @@ def test_connection_lost(self): test_utils.run_briefly(self.loop) self.assertIsInstance(waiter.exception(), ConnectionAbortedError) + def test_connection_lost_when_busy(self): + # gh-118950: SSLProtocol.connection_lost not being called when OSError + # is thrown on asyncio.write. + sock = mock.Mock() + sock.fileno = mock.Mock(return_value=12345) + sock.send = mock.Mock(side_effect=BrokenPipeError) + + # construct StreamWriter chain that contains loop dependant logic this emulates + # what _make_ssl_transport() does in BaseSelectorEventLoop + reader = asyncio.StreamReader(limit=2 ** 16, loop=self.loop) + protocol = asyncio.StreamReaderProtocol(reader, loop=self.loop) + ssl_proto = self.ssl_protocol(proto=protocol) + + # emulate reading decompressed data + sslobj = mock.Mock() + sslobj.read.side_effect = ssl.SSLWantReadError + sslobj.write.side_effect = ssl.SSLWantReadError + ssl_proto._sslobj = sslobj + + # emulate outgoing data + data = b'An interesting message' + + outgoing = mock.Mock() + outgoing.read = mock.Mock(return_value=data) + outgoing.pending = len(data) + ssl_proto._outgoing = outgoing + + # use correct socket transport to initialize the SSLProtocol + self.loop._make_socket_transport(sock, ssl_proto) + + transport = ssl_proto._app_transport + writer = asyncio.StreamWriter(transport, protocol, reader, self.loop) + + async def main(): + # writes data to transport + async def write(): + writer.write(data) + await writer.drain() + + # try to write for the first time + await write() + # try to write for the second time, this raises as the connection_lost + # callback should be done with error + with self.assertRaises(ConnectionResetError): + await write() + + self.loop.run_until_complete(main()) + def test_close_during_handshake(self): # bpo-29743 Closing transport during handshake process leaks socket waiter = self.loop.create_future() diff --git a/Misc/NEWS.d/next/Core and Builtins/2024-05-12-03-10-36.gh-issue-118950.5Wc4vp.rst b/Misc/NEWS.d/next/Core and Builtins/2024-05-12-03-10-36.gh-issue-118950.5Wc4vp.rst new file mode 100644 index 00000000000000..82be975f4d808d --- /dev/null +++ b/Misc/NEWS.d/next/Core and Builtins/2024-05-12-03-10-36.gh-issue-118950.5Wc4vp.rst @@ -0,0 +1 @@ +Fix bug where SSLProtocol.connection_lost wasn't getting called when OSError was thrown on writing to socket. From 500f5338a8fe13719478589333fcd296e8e8eb02 Mon Sep 17 00:00:00 2001 From: Shantanu <12621235+hauntsaninja@users.noreply.github.com> Date: Thu, 24 Oct 2024 12:11:12 -0700 Subject: [PATCH 025/127] gh-123930: Better error for "from imports" when script shadows module (#123929) --- Doc/whatsnew/3.13.rst | 4 +- Include/internal/pycore_moduleobject.h | 2 + Lib/test/test_import/__init__.py | 325 ++++++++++++------ ...-09-11-01-32-07.gh-issue-123930.BkPfB6.rst | 4 + Objects/moduleobject.c | 28 +- Python/ceval.c | 150 +++++--- 6 files changed, 342 insertions(+), 171 deletions(-) create mode 100644 Misc/NEWS.d/next/Core_and_Builtins/2024-09-11-01-32-07.gh-issue-123930.BkPfB6.rst diff --git a/Doc/whatsnew/3.13.rst b/Doc/whatsnew/3.13.rst index f9e74a9b8ff9c6..de4c7fd4c0486b 100644 --- a/Doc/whatsnew/3.13.rst +++ b/Doc/whatsnew/3.13.rst @@ -274,7 +274,7 @@ Improved error messages File "/home/me/random.py", line 3, in print(random.randint(5)) ^^^^^^^^^^^^^^ - AttributeError: module 'random' has no attribute 'randint' (consider renaming '/home/me/random.py' since it has the same name as the standard library module named 'random' and the import system gives it precedence) + AttributeError: module 'random' has no attribute 'randint' (consider renaming '/home/me/random.py' since it has the same name as the standard library module named 'random' and prevents importing that standard library module) Similarly, if a script has the same name as a third-party module that it attempts to import and this results in errors, @@ -289,7 +289,7 @@ Improved error messages File "/home/me/numpy.py", line 3, in np.array([1, 2, 3]) ^^^^^^^^ - AttributeError: module 'numpy' has no attribute 'array' (consider renaming '/home/me/numpy.py' if it has the same name as a third-party module you intended to import) + AttributeError: module 'numpy' has no attribute 'array' (consider renaming '/home/me/numpy.py' if it has the same name as a library you intended to import) (Contributed by Shantanu Jain in :gh:`95754`.) diff --git a/Include/internal/pycore_moduleobject.h b/Include/internal/pycore_moduleobject.h index cc2dda48ed9f28..9bb282a13a9659 100644 --- a/Include/internal/pycore_moduleobject.h +++ b/Include/internal/pycore_moduleobject.h @@ -11,6 +11,8 @@ extern "C" { extern void _PyModule_Clear(PyObject *); extern void _PyModule_ClearDict(PyObject *); extern int _PyModuleSpec_IsInitializing(PyObject *); +extern int _PyModuleSpec_GetFileOrigin(PyObject *, PyObject **); +extern int _PyModule_IsPossiblyShadowing(PyObject *); extern int _PyModule_IsExtension(PyObject *obj); diff --git a/Lib/test/test_import/__init__.py b/Lib/test/test_import/__init__.py index 5d0d02480b3929..5b7ba90b2cc7c6 100644 --- a/Lib/test/test_import/__init__.py +++ b/Lib/test/test_import/__init__.py @@ -804,104 +804,133 @@ def test_issue105979(self): str(cm.exception)) def test_script_shadowing_stdlib(self): - with os_helper.temp_dir() as tmp: - with open(os.path.join(tmp, "fractions.py"), "w", encoding='utf-8') as f: - f.write("import fractions\nfractions.Fraction") - - expected_error = ( - rb"AttributeError: module 'fractions' has no attribute 'Fraction' " - rb"\(consider renaming '.*fractions.py' since it has the " - rb"same name as the standard library module named 'fractions' " - rb"and the import system gives it precedence\)" + script_errors = [ + ( + "import fractions\nfractions.Fraction", + rb"AttributeError: module 'fractions' has no attribute 'Fraction'" + ), + ( + "from fractions import Fraction", + rb"ImportError: cannot import name 'Fraction' from 'fractions'" ) + ] + for script, error in script_errors: + with self.subTest(script=script), os_helper.temp_dir() as tmp: + with open(os.path.join(tmp, "fractions.py"), "w", encoding='utf-8') as f: + f.write(script) + + expected_error = error + ( + rb" \(consider renaming '.*fractions.py' since it has the " + rb"same name as the standard library module named 'fractions' " + rb"and prevents importing that standard library module\)" + ) - popen = script_helper.spawn_python(os.path.join(tmp, "fractions.py"), cwd=tmp) - stdout, stderr = popen.communicate() - self.assertRegex(stdout, expected_error) + popen = script_helper.spawn_python(os.path.join(tmp, "fractions.py"), cwd=tmp) + stdout, stderr = popen.communicate() + self.assertRegex(stdout, expected_error) - popen = script_helper.spawn_python('-m', 'fractions', cwd=tmp) - stdout, stderr = popen.communicate() - self.assertRegex(stdout, expected_error) + popen = script_helper.spawn_python('-m', 'fractions', cwd=tmp) + stdout, stderr = popen.communicate() + self.assertRegex(stdout, expected_error) - popen = script_helper.spawn_python('-c', 'import fractions', cwd=tmp) - stdout, stderr = popen.communicate() - self.assertRegex(stdout, expected_error) + popen = script_helper.spawn_python('-c', 'import fractions', cwd=tmp) + stdout, stderr = popen.communicate() + self.assertRegex(stdout, expected_error) - # and there's no error at all when using -P - popen = script_helper.spawn_python('-P', 'fractions.py', cwd=tmp) - stdout, stderr = popen.communicate() - self.assertEqual(stdout, b'') + # and there's no error at all when using -P + popen = script_helper.spawn_python('-P', 'fractions.py', cwd=tmp) + stdout, stderr = popen.communicate() + self.assertEqual(stdout, b'') - tmp_child = os.path.join(tmp, "child") - os.mkdir(tmp_child) + tmp_child = os.path.join(tmp, "child") + os.mkdir(tmp_child) - # test the logic with different cwd - popen = script_helper.spawn_python(os.path.join(tmp, "fractions.py"), cwd=tmp_child) - stdout, stderr = popen.communicate() - self.assertRegex(stdout, expected_error) + # test the logic with different cwd + popen = script_helper.spawn_python(os.path.join(tmp, "fractions.py"), cwd=tmp_child) + stdout, stderr = popen.communicate() + self.assertRegex(stdout, expected_error) - popen = script_helper.spawn_python('-m', 'fractions', cwd=tmp_child) - stdout, stderr = popen.communicate() - self.assertEqual(stdout, b'') # no error + popen = script_helper.spawn_python('-m', 'fractions', cwd=tmp_child) + stdout, stderr = popen.communicate() + self.assertEqual(stdout, b'') # no error - popen = script_helper.spawn_python('-c', 'import fractions', cwd=tmp_child) - stdout, stderr = popen.communicate() - self.assertEqual(stdout, b'') # no error + popen = script_helper.spawn_python('-c', 'import fractions', cwd=tmp_child) + stdout, stderr = popen.communicate() + self.assertEqual(stdout, b'') # no error def test_package_shadowing_stdlib_module(self): - with os_helper.temp_dir() as tmp: - os.mkdir(os.path.join(tmp, "fractions")) - with open(os.path.join(tmp, "fractions", "__init__.py"), "w", encoding='utf-8') as f: - f.write("shadowing_module = True") - with open(os.path.join(tmp, "main.py"), "w", encoding='utf-8') as f: - f.write(""" -import fractions -fractions.shadowing_module -fractions.Fraction -""") - - expected_error = ( - rb"AttributeError: module 'fractions' has no attribute 'Fraction' " - rb"\(consider renaming '.*fractions.__init__.py' since it has the " - rb"same name as the standard library module named 'fractions' " - rb"and the import system gives it precedence\)" + script_errors = [ + ( + "fractions.Fraction", + rb"AttributeError: module 'fractions' has no attribute 'Fraction'" + ), + ( + "from fractions import Fraction", + rb"ImportError: cannot import name 'Fraction' from 'fractions'" ) + ] + for script, error in script_errors: + with self.subTest(script=script), os_helper.temp_dir() as tmp: + os.mkdir(os.path.join(tmp, "fractions")) + with open( + os.path.join(tmp, "fractions", "__init__.py"), "w", encoding='utf-8' + ) as f: + f.write("shadowing_module = True") + with open(os.path.join(tmp, "main.py"), "w", encoding='utf-8') as f: + f.write("import fractions; fractions.shadowing_module\n") + f.write(script) + + expected_error = error + ( + rb" \(consider renaming '.*[\\/]fractions[\\/]+__init__.py' since it has the " + rb"same name as the standard library module named 'fractions' " + rb"and prevents importing that standard library module\)" + ) - popen = script_helper.spawn_python(os.path.join(tmp, "main.py"), cwd=tmp) - stdout, stderr = popen.communicate() - self.assertRegex(stdout, expected_error) + popen = script_helper.spawn_python(os.path.join(tmp, "main.py"), cwd=tmp) + stdout, stderr = popen.communicate() + self.assertRegex(stdout, expected_error) - popen = script_helper.spawn_python('-m', 'main', cwd=tmp) - stdout, stderr = popen.communicate() - self.assertRegex(stdout, expected_error) + popen = script_helper.spawn_python('-m', 'main', cwd=tmp) + stdout, stderr = popen.communicate() + self.assertRegex(stdout, expected_error) - # and there's no shadowing at all when using -P - popen = script_helper.spawn_python('-P', 'main.py', cwd=tmp) - stdout, stderr = popen.communicate() - self.assertRegex(stdout, b"module 'fractions' has no attribute 'shadowing_module'") + # and there's no shadowing at all when using -P + popen = script_helper.spawn_python('-P', 'main.py', cwd=tmp) + stdout, stderr = popen.communicate() + self.assertRegex(stdout, b"module 'fractions' has no attribute 'shadowing_module'") def test_script_shadowing_third_party(self): - with os_helper.temp_dir() as tmp: - with open(os.path.join(tmp, "numpy.py"), "w", encoding='utf-8') as f: - f.write("import numpy\nnumpy.array") - - expected_error = ( - rb"AttributeError: module 'numpy' has no attribute 'array' " - rb"\(consider renaming '.*numpy.py' if it has the " - rb"same name as a third-party module you intended to import\)\s+\Z" + script_errors = [ + ( + "import numpy\nnumpy.array", + rb"AttributeError: module 'numpy' has no attribute 'array'" + ), + ( + "from numpy import array", + rb"ImportError: cannot import name 'array' from 'numpy'" ) + ] + for script, error in script_errors: + with self.subTest(script=script), os_helper.temp_dir() as tmp: + with open(os.path.join(tmp, "numpy.py"), "w", encoding='utf-8') as f: + f.write(script) + + expected_error = error + ( + rb" \(consider renaming '.*numpy.py' if it has the " + rb"same name as a library you intended to import\)\s+\Z" + ) - popen = script_helper.spawn_python(os.path.join(tmp, "numpy.py")) - stdout, stderr = popen.communicate() - self.assertRegex(stdout, expected_error) + popen = script_helper.spawn_python(os.path.join(tmp, "numpy.py")) + stdout, stderr = popen.communicate() + self.assertRegex(stdout, expected_error) - popen = script_helper.spawn_python('-m', 'numpy', cwd=tmp) - stdout, stderr = popen.communicate() - self.assertRegex(stdout, expected_error) + popen = script_helper.spawn_python('-m', 'numpy', cwd=tmp) + stdout, stderr = popen.communicate() + self.assertRegex(stdout, expected_error) - popen = script_helper.spawn_python('-c', 'import numpy', cwd=tmp) - stdout, stderr = popen.communicate() - self.assertRegex(stdout, expected_error) + popen = script_helper.spawn_python('-c', 'import numpy', cwd=tmp) + stdout, stderr = popen.communicate() + self.assertRegex(stdout, expected_error) def test_script_maybe_not_shadowing_third_party(self): with os_helper.temp_dir() as tmp: @@ -911,15 +940,23 @@ def test_script_maybe_not_shadowing_third_party(self): expected_error = ( rb"AttributeError: module 'numpy' has no attribute 'attr'\s+\Z" ) - popen = script_helper.spawn_python('-c', 'import numpy; numpy.attr', cwd=tmp) stdout, stderr = popen.communicate() self.assertRegex(stdout, expected_error) + expected_error = ( + rb"ImportError: cannot import name 'attr' from 'numpy' \(.*\)\s+\Z" + ) + popen = script_helper.spawn_python('-c', 'from numpy import attr', cwd=tmp) + stdout, stderr = popen.communicate() + self.assertRegex(stdout, expected_error) + def test_script_shadowing_stdlib_edge_cases(self): with os_helper.temp_dir() as tmp: with open(os.path.join(tmp, "fractions.py"), "w", encoding='utf-8') as f: f.write("shadowing_module = True") + + # Unhashable str subclass with open(os.path.join(tmp, "main.py"), "w", encoding='utf-8') as f: f.write(""" import fractions @@ -932,11 +969,28 @@ class substr(str): except TypeError as e: print(str(e)) """) + popen = script_helper.spawn_python("main.py", cwd=tmp) + stdout, stderr = popen.communicate() + self.assertEqual(stdout.rstrip(), b"unhashable type: 'substr'") + + with open(os.path.join(tmp, "main.py"), "w", encoding='utf-8') as f: + f.write(""" +import fractions +fractions.shadowing_module +class substr(str): + __hash__ = None +fractions.__name__ = substr('fractions') +try: + from fractions import Fraction +except TypeError as e: + print(str(e)) +""") popen = script_helper.spawn_python("main.py", cwd=tmp) stdout, stderr = popen.communicate() self.assertEqual(stdout.rstrip(), b"unhashable type: 'substr'") + # Various issues with sys module with open(os.path.join(tmp, "main.py"), "w", encoding='utf-8') as f: f.write(""" import fractions @@ -961,18 +1015,45 @@ class substr(str): except AttributeError as e: print(str(e)) """) + popen = script_helper.spawn_python("main.py", cwd=tmp) + stdout, stderr = popen.communicate() + lines = stdout.splitlines() + self.assertEqual(len(lines), 3) + for line in lines: + self.assertEqual(line, b"module 'fractions' has no attribute 'Fraction'") + with open(os.path.join(tmp, "main.py"), "w", encoding='utf-8') as f: + f.write(""" +import fractions +fractions.shadowing_module + +import sys +sys.stdlib_module_names = None +try: + from fractions import Fraction +except ImportError as e: + print(str(e)) + +del sys.stdlib_module_names +try: + from fractions import Fraction +except ImportError as e: + print(str(e)) + +sys.path = [0] +try: + from fractions import Fraction +except ImportError as e: + print(str(e)) +""") popen = script_helper.spawn_python("main.py", cwd=tmp) stdout, stderr = popen.communicate() - self.assertEqual( - stdout.splitlines(), - [ - b"module 'fractions' has no attribute 'Fraction'", - b"module 'fractions' has no attribute 'Fraction'", - b"module 'fractions' has no attribute 'Fraction'", - ], - ) + lines = stdout.splitlines() + self.assertEqual(len(lines), 3) + for line in lines: + self.assertRegex(line, rb"cannot import name 'Fraction' from 'fractions' \(.*\)") + # Various issues with origin with open(os.path.join(tmp, "main.py"), "w", encoding='utf-8') as f: f.write(""" import fractions @@ -992,37 +1073,61 @@ class substr(str): popen = script_helper.spawn_python("main.py", cwd=tmp) stdout, stderr = popen.communicate() - self.assertEqual( - stdout.splitlines(), - [ - b"module 'fractions' has no attribute 'Fraction'", - b"module 'fractions' has no attribute 'Fraction'" - ], - ) - - def test_script_shadowing_stdlib_sys_path_modification(self): - with os_helper.temp_dir() as tmp: - with open(os.path.join(tmp, "fractions.py"), "w", encoding='utf-8') as f: - f.write("shadowing_module = True") - - expected_error = ( - rb"AttributeError: module 'fractions' has no attribute 'Fraction' " - rb"\(consider renaming '.*fractions.py' since it has the " - rb"same name as the standard library module named 'fractions' " - rb"and the import system gives it precedence\)" - ) + lines = stdout.splitlines() + self.assertEqual(len(lines), 2) + for line in lines: + self.assertEqual(line, b"module 'fractions' has no attribute 'Fraction'") with open(os.path.join(tmp, "main.py"), "w", encoding='utf-8') as f: f.write(""" -import sys -sys.path.insert(0, "this_folder_does_not_exist") import fractions -fractions.Fraction -""") +fractions.shadowing_module +del fractions.__spec__.origin +try: + from fractions import Fraction +except ImportError as e: + print(str(e)) +fractions.__spec__.origin = 0 +try: + from fractions import Fraction +except ImportError as e: + print(str(e)) +""") popen = script_helper.spawn_python("main.py", cwd=tmp) stdout, stderr = popen.communicate() - self.assertRegex(stdout, expected_error) + lines = stdout.splitlines() + self.assertEqual(len(lines), 2) + for line in lines: + self.assertRegex(line, rb"cannot import name 'Fraction' from 'fractions' \(.*\)") + + def test_script_shadowing_stdlib_sys_path_modification(self): + script_errors = [ + ( + "import fractions\nfractions.Fraction", + rb"AttributeError: module 'fractions' has no attribute 'Fraction'" + ), + ( + "from fractions import Fraction", + rb"ImportError: cannot import name 'Fraction' from 'fractions'" + ) + ] + for script, error in script_errors: + with self.subTest(script=script), os_helper.temp_dir() as tmp: + with open(os.path.join(tmp, "fractions.py"), "w", encoding='utf-8') as f: + f.write("shadowing_module = True") + with open(os.path.join(tmp, "main.py"), "w", encoding='utf-8') as f: + f.write('import sys; sys.path.insert(0, "this_folder_does_not_exist")\n') + f.write(script) + expected_error = error + ( + rb" \(consider renaming '.*fractions.py' since it has the " + rb"same name as the standard library module named 'fractions' " + rb"and prevents importing that standard library module\)" + ) + + popen = script_helper.spawn_python("main.py", cwd=tmp) + stdout, stderr = popen.communicate() + self.assertRegex(stdout, expected_error) @skip_if_dont_write_bytecode diff --git a/Misc/NEWS.d/next/Core_and_Builtins/2024-09-11-01-32-07.gh-issue-123930.BkPfB6.rst b/Misc/NEWS.d/next/Core_and_Builtins/2024-09-11-01-32-07.gh-issue-123930.BkPfB6.rst new file mode 100644 index 00000000000000..3c8eb02b2dc2d6 --- /dev/null +++ b/Misc/NEWS.d/next/Core_and_Builtins/2024-09-11-01-32-07.gh-issue-123930.BkPfB6.rst @@ -0,0 +1,4 @@ +Improve the error message when a script shadowing a module from the standard +library causes :exc:`ImportError` to be raised during a "from" import. +Similarly, improve the error message when a script shadowing a third party module +attempts to "from" import an attribute from that third party module while still initialising. diff --git a/Objects/moduleobject.c b/Objects/moduleobject.c index c06badd5f3edfe..535b0d068f064f 100644 --- a/Objects/moduleobject.c +++ b/Objects/moduleobject.c @@ -836,15 +836,15 @@ _PyModuleSpec_IsUninitializedSubmodule(PyObject *spec, PyObject *name) return rc; } -static int -_get_file_origin_from_spec(PyObject *spec, PyObject **p_origin) +int +_PyModuleSpec_GetFileOrigin(PyObject *spec, PyObject **p_origin) { PyObject *has_location = NULL; int rc = PyObject_GetOptionalAttr(spec, &_Py_ID(has_location), &has_location); if (rc <= 0) { return rc; } - // If origin is not a location, or doesn't exist, or is not a str), we could consider falling + // If origin is not a location, or doesn't exist, or is not a str, we could consider falling // back to module.__file__. But the cases in which module.__file__ is not __spec__.origin // are cases in which we probably shouldn't be guessing. rc = PyObject_IsTrue(has_location); @@ -867,8 +867,8 @@ _get_file_origin_from_spec(PyObject *spec, PyObject **p_origin) return 1; } -static int -_is_module_possibly_shadowing(PyObject *origin) +int +_PyModule_IsPossiblyShadowing(PyObject *origin) { // origin must be a unicode subtype // Returns 1 if the module at origin could be shadowing a module of the @@ -993,11 +993,11 @@ _Py_module_getattro_impl(PyModuleObject *m, PyObject *name, int suppress) } PyObject *origin = NULL; - if (_get_file_origin_from_spec(spec, &origin) < 0) { + if (_PyModuleSpec_GetFileOrigin(spec, &origin) < 0) { goto done; } - int is_possibly_shadowing = _is_module_possibly_shadowing(origin); + int is_possibly_shadowing = _PyModule_IsPossiblyShadowing(origin); if (is_possibly_shadowing < 0) { goto done; } @@ -1018,20 +1018,23 @@ _Py_module_getattro_impl(PyModuleObject *m, PyObject *name, int suppress) "module '%U' has no attribute '%U' " "(consider renaming '%U' since it has the same " "name as the standard library module named '%U' " - "and the import system gives it precedence)", + "and prevents importing that standard library module)", mod_name, name, origin, mod_name); } else { int rc = _PyModuleSpec_IsInitializing(spec); - if (rc > 0) { + if (rc < 0) { + goto done; + } + else if (rc > 0) { if (is_possibly_shadowing) { assert(origin); - // For third-party modules, only mention the possibility of + // For non-stdlib modules, only mention the possibility of // shadowing if the module is being initialized. PyErr_Format(PyExc_AttributeError, "module '%U' has no attribute '%U' " "(consider renaming '%U' if it has the same name " - "as a third-party module you intended to import)", + "as a library you intended to import)", mod_name, name, origin); } else if (origin) { @@ -1049,7 +1052,8 @@ _Py_module_getattro_impl(PyModuleObject *m, PyObject *name, int suppress) mod_name, name); } } - else if (rc == 0) { + else { + assert(rc == 0); rc = _PyModuleSpec_IsUninitializedSubmodule(spec, name); if (rc > 0) { PyErr_Format(PyExc_AttributeError, diff --git a/Python/ceval.c b/Python/ceval.c index ece7ef1d32048f..beee5325cd6259 100644 --- a/Python/ceval.c +++ b/Python/ceval.c @@ -2802,7 +2802,7 @@ PyObject * _PyEval_ImportFrom(PyThreadState *tstate, PyObject *v, PyObject *name) { PyObject *x; - PyObject *fullmodname, *pkgname, *pkgpath, *pkgname_or_unknown, *errmsg; + PyObject *fullmodname, *mod_name, *origin, *mod_name_or_unknown, *errmsg, *spec; if (PyObject_GetOptionalAttr(v, name, &x) != 0) { return x; @@ -2810,16 +2810,16 @@ _PyEval_ImportFrom(PyThreadState *tstate, PyObject *v, PyObject *name) /* Issue #17636: in case this failed because of a circular relative import, try to fallback on reading the module directly from sys.modules. */ - if (PyObject_GetOptionalAttr(v, &_Py_ID(__name__), &pkgname) < 0) { + if (PyObject_GetOptionalAttr(v, &_Py_ID(__name__), &mod_name) < 0) { return NULL; } - if (pkgname == NULL || !PyUnicode_Check(pkgname)) { - Py_CLEAR(pkgname); + if (mod_name == NULL || !PyUnicode_Check(mod_name)) { + Py_CLEAR(mod_name); goto error; } - fullmodname = PyUnicode_FromFormat("%U.%U", pkgname, name); + fullmodname = PyUnicode_FromFormat("%U.%U", mod_name, name); if (fullmodname == NULL) { - Py_DECREF(pkgname); + Py_DECREF(mod_name); return NULL; } x = PyImport_GetModule(fullmodname); @@ -2827,63 +2827,121 @@ _PyEval_ImportFrom(PyThreadState *tstate, PyObject *v, PyObject *name) if (x == NULL && !_PyErr_Occurred(tstate)) { goto error; } - Py_DECREF(pkgname); + Py_DECREF(mod_name); return x; + error: - if (pkgname == NULL) { - pkgname_or_unknown = PyUnicode_FromString(""); - if (pkgname_or_unknown == NULL) { + if (mod_name == NULL) { + mod_name_or_unknown = PyUnicode_FromString(""); + if (mod_name_or_unknown == NULL) { return NULL; } } else { - pkgname_or_unknown = pkgname; + mod_name_or_unknown = mod_name; } + // mod_name is no longer an owned reference + assert(mod_name_or_unknown); + assert(mod_name == NULL || mod_name == mod_name_or_unknown); - pkgpath = NULL; - if (PyModule_Check(v)) { - pkgpath = PyModule_GetFilenameObject(v); - if (pkgpath == NULL) { - if (!PyErr_ExceptionMatches(PyExc_SystemError)) { - Py_DECREF(pkgname_or_unknown); - return NULL; + origin = NULL; + if (PyObject_GetOptionalAttr(v, &_Py_ID(__spec__), &spec) < 0) { + Py_DECREF(mod_name_or_unknown); + return NULL; + } + if (spec == NULL) { + errmsg = PyUnicode_FromFormat( + "cannot import name %R from %R (unknown location)", + name, mod_name_or_unknown + ); + goto done_with_errmsg; + } + if (_PyModuleSpec_GetFileOrigin(spec, &origin) < 0) { + goto done; + } + + int is_possibly_shadowing = _PyModule_IsPossiblyShadowing(origin); + if (is_possibly_shadowing < 0) { + goto done; + } + int is_possibly_shadowing_stdlib = 0; + if (is_possibly_shadowing) { + PyObject *stdlib_modules = PySys_GetObject("stdlib_module_names"); + if (stdlib_modules && PyAnySet_Check(stdlib_modules)) { + is_possibly_shadowing_stdlib = PySet_Contains(stdlib_modules, mod_name_or_unknown); + if (is_possibly_shadowing_stdlib < 0) { + goto done; } - // module filename missing - _PyErr_Clear(tstate); } } - if (pkgpath == NULL || !PyUnicode_Check(pkgpath)) { - Py_CLEAR(pkgpath); + + if (is_possibly_shadowing_stdlib) { + assert(origin); errmsg = PyUnicode_FromFormat( - "cannot import name %R from %R (unknown location)", - name, pkgname_or_unknown + "cannot import name %R from %R " + "(consider renaming %R since it has the same " + "name as the standard library module named %R " + "and prevents importing that standard library module)", + name, mod_name_or_unknown, origin, mod_name_or_unknown ); } else { - PyObject *spec; - int rc = PyObject_GetOptionalAttr(v, &_Py_ID(__spec__), &spec); - if (rc > 0) { - rc = _PyModuleSpec_IsInitializing(spec); - Py_DECREF(spec); - } + int rc = _PyModuleSpec_IsInitializing(spec); if (rc < 0) { - Py_DECREF(pkgname_or_unknown); - Py_DECREF(pkgpath); - return NULL; + goto done; + } + else if (rc > 0) { + if (is_possibly_shadowing) { + assert(origin); + // For non-stdlib modules, only mention the possibility of + // shadowing if the module is being initialized. + errmsg = PyUnicode_FromFormat( + "cannot import name %R from %R " + "(consider renaming %R if it has the same name " + "as a library you intended to import)", + name, mod_name_or_unknown, origin + ); + } + else if (origin) { + errmsg = PyUnicode_FromFormat( + "cannot import name %R from partially initialized module %R " + "(most likely due to a circular import) (%S)", + name, mod_name_or_unknown, origin + ); + } + else { + errmsg = PyUnicode_FromFormat( + "cannot import name %R from partially initialized module %R " + "(most likely due to a circular import)", + name, mod_name_or_unknown + ); + } + } + else { + assert(rc == 0); + if (origin) { + errmsg = PyUnicode_FromFormat( + "cannot import name %R from %R (%S)", + name, mod_name_or_unknown, origin + ); + } + else { + errmsg = PyUnicode_FromFormat( + "cannot import name %R from %R (unknown location)", + name, mod_name_or_unknown + ); + } } - const char *fmt = - rc ? - "cannot import name %R from partially initialized module %R " - "(most likely due to a circular import) (%S)" : - "cannot import name %R from %R (%S)"; - - errmsg = PyUnicode_FromFormat(fmt, name, pkgname_or_unknown, pkgpath); } - /* NULL checks for errmsg and pkgname done by PyErr_SetImportError. */ - _PyErr_SetImportErrorWithNameFrom(errmsg, pkgname, pkgpath, name); - Py_XDECREF(errmsg); - Py_DECREF(pkgname_or_unknown); - Py_XDECREF(pkgpath); +done_with_errmsg: + /* NULL checks for errmsg, mod_name, origin done by PyErr_SetImportError. */ + _PyErr_SetImportErrorWithNameFrom(errmsg, mod_name, origin, name); + Py_DECREF(errmsg); + +done: + Py_XDECREF(origin); + Py_XDECREF(spec); + Py_DECREF(mod_name_or_unknown); return NULL; } @@ -3243,5 +3301,3 @@ _PyEval_LoadName(PyThreadState *tstate, _PyInterpreterFrame *frame, PyObject *na } return value; } - - From 1306f33c84b2745aa8af5e3e8f680aa80b836c0e Mon Sep 17 00:00:00 2001 From: Kerim Kabirov Date: Thu, 24 Oct 2024 22:52:21 +0200 Subject: [PATCH 026/127] gh-125933: Add ARIA labels to select elements in the version switcher (#125934) Co-authored-by: Hugo van Kemenade <1324225+hugovk@users.noreply.github.com> --- Doc/tools/static/rtd_switcher.js | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/Doc/tools/static/rtd_switcher.js b/Doc/tools/static/rtd_switcher.js index f5dc7045a0dbc4..2bf01a002db90c 100644 --- a/Doc/tools/static/rtd_switcher.js +++ b/Doc/tools/static/rtd_switcher.js @@ -7,7 +7,7 @@ document.addEventListener("readthedocs-addons-data-ready", function(event) { const config = event.detail.data() const versionSelect = ` - ${ config.versions.active.map( (version) => `