From 7717eae5ae666fe9fcb02ee10ec52108cf532bf0 Mon Sep 17 00:00:00 2001 From: Alex Waygood Date: Tue, 15 Oct 2024 21:51:29 +0100 Subject: [PATCH 1/6] [red-knot] Add control flow for try/except blocks (v3) --- .../mdtest/exception/control_flow.md | 622 ++++++++++++++++++ .../src/semantic_index/builder.rs | 129 +++- .../semantic_index/builder/except_handlers.rs | 80 +++ crates/ruff_benchmark/benches/red_knot.rs | 26 +- 4 files changed, 831 insertions(+), 26 deletions(-) create mode 100644 crates/red_knot_python_semantic/resources/mdtest/exception/control_flow.md create mode 100644 crates/red_knot_python_semantic/src/semantic_index/builder/except_handlers.rs diff --git a/crates/red_knot_python_semantic/resources/mdtest/exception/control_flow.md b/crates/red_knot_python_semantic/resources/mdtest/exception/control_flow.md new file mode 100644 index 0000000000000..4643ad5bdf3d3 --- /dev/null +++ b/crates/red_knot_python_semantic/resources/mdtest/exception/control_flow.md @@ -0,0 +1,622 @@ +# Control flow for exception handlers + +Tests that assert we understand the possible "definition states" (which symbols +might or might not be defined) in various branches of a +`try`/`except`/`else`/`finally` block. + +For a full writeup on the semantics of exception handlers, +see [this document](https://astral-sh.notion.site/Exception-handler-control-flow-11348797e1ca80bb8ce1e9aedbbe439d). + +The tests throughout this Markdown document use functions with names starting +with `could_raise_*` to mark definitions that might or might not succeed +(as the function could raise an exception). A type checker must assume that any +arbitrary function call could raise an exception in Python; this is just a +naming convention used in these tests for clarity, and to future-proof the +tests against possible future improvements whereby certain statements or +expressions could potentially be inferred as being incapable of causing an +exception to be raised. + +## A single bare `except` + +If there are different types for a single variable `x` in the two branches, we +can't determine which branch might have been taken, so the inferred type after +the `try`/`except` block is the union of type at the end of the `try` block +(`str`) and the type at the end of the `except` block (`Literal[2]`): + +```py path=union_type_inferred.py +def could_raise_returns_str() -> str: + return 'foo' + +x = 1 + +try: + reveal_type(x) # revealed: Literal[1] + x = could_raise_returns_str() + reveal_type(x) # revealed: str +except: + reveal_type(x) # revealed: Literal[1] | str + x = 2 + reveal_type(x) # revealed: Literal[2] + +reveal_type(x) # revealed: str | Literal[2] +``` + +If `x` has the same type at the end of both branches, the branches unify and +`x` is not inferred as having a union type following the `try`/`except` block: + +```py path=branches_unify_to_non_union_type.py +def could_raise_returns_str() -> str: + return 'foo' + +x = 1 + +try: + x = could_raise_returns_str() +except: + x = could_raise_returns_str() + +reveal_type(x) # revealed: str +``` + +## A non-bare `except` + +For simple `try`/`except` blocks, `except TypeError:` has the same control flow +as `except:`. There might have been an unhandled exception, but (as described +in [the document on exception-handling semantics](https://astral-sh.notion.site/Exception-handler-control-flow-11348797e1ca80bb8ce1e9aedbbe439d)) +that would lead to termination of the scope -- it's irrelevant to consider this +possibility. + +```py +def could_raise_returns_str() -> str: + return 'foo' + +x = 1 + +try: + reveal_type(x) # revealed: Literal[1] + x = could_raise_returns_str() + reveal_type(x) # revealed: str +except TypeError: + reveal_type(x) # revealed: Literal[1] | str + x = 2 + reveal_type(x) # revealed: Literal[2] + +reveal_type(x) # revealed: str | Literal[2] +``` + +## Multiple `except` branches + +If the scope reaches the final `reveal_type` call in this example, +either the `try`-block suite of statements was executed in its entirety, +or exactly one `except` was executed in its entirety. The inferred type of `x` +should be the union of the types at the end of the three suites: + +- At the end of `try`, `type(x) == str` +- At the end of `except TypeError`, `x == 2` +- At the end of `except ValueError`, `x == 3` + +```py +def could_raise_returns_str() -> str: + return 'foo' + +x = 1 + +try: + reveal_type(x) # revealed: Literal[1] + x = could_raise_returns_str() + reveal_type(x) # revealed: str +except TypeError: + reveal_type(x) # revealed: Literal[1] | str + x = 2 + reveal_type(x) # revealed: Literal[2] +except ValueError: + reveal_type(x) # revealed: Literal[1] | str + x = 3 + reveal_type(x) # revealed: Literal[3] + +reveal_type(x) # revealed: str | Literal[2, 3] +``` + +## Exception handlers with `else` branches (but no `finally`) + +If we reach the `reveal_type` call at the end of the scope, +either the `try` and `else` suites were both executed in their entireties, +or the `except` suite was executed in its entirety. The type of `x` will be the +union of the type at the end of the `else` suite and the type at the end of the +`except` suite: + +- At the end of `else`, `x == 3` +- At the end of `except`, `x == 2` + +```py path=single_except.py +def could_raise_returns_str() -> str: + return 'foo' + +x = 1 + +try: + reveal_type(x) # revealed: Literal[1] + x = could_raise_returns_str() + reveal_type(x) # revealed: str +except TypeError: + reveal_type(x) # revealed: Literal[1] | str + x = 2 + reveal_type(x) # revealed: Literal[2] +else: + reveal_type(x) # revealed: str + x = 3 + reveal_type(x) # revealed: Literal[3] + +reveal_type(x) # revealed: Literal[2, 3] +``` + +For a block that has multiple `except` branches and an `else` branch, the same +principle applies. In order to reach the final `reveal_type` call, +either exactly one of the `except` suites must have been executed in its +entirety, or the `try` suite and the `else` suite must both have been executed +in their entireties: + +```py +def could_raise_returns_str() -> str: + return 'foo' + +x = 1 + +try: + reveal_type(x) # revealed: Literal[1] + x = could_raise_returns_str() + reveal_type(x) # revealed: str +except TypeError: + reveal_type(x) # revealed: Literal[1] | str + x = 2 + reveal_type(x) # revealed: Literal[2] +except ValueError: + reveal_type(x) # revealed: Literal[1] | str + x = 3 + reveal_type(x) # revealed: Literal[3] +else: + reveal_type(x) # revealed: str + x = 4 + reveal_type(x) # revealed: Literal[4] + +reveal_type(x) # revealed: Literal[2, 3, 4] +``` + +## Exception handlers with `finally` branches (but no `except` branches) + +A `finally` suite is *always* executed, so if we reach the `reveal_type` call +at the end of this example, we know that `x` *must* have been reassigned to `2` +during the `finally` suite; therefore the type of `x` at the end of the example +is `Literal[2]`: + +```py path=redef_in_finally.py +def could_raise_returns_str() -> str: + return 'foo' + +x = 1 + +try: + reveal_type(x) # revealed: Literal[1] + x = could_raise_returns_str() + reveal_type(x) # revealed: str +finally: + x = 2 + reveal_type(x) # revealed: Literal[2] + +reveal_type(x) # revealed: Literal[2] +``` + +If `x` was *not* redefined in the `finally` suite, however, things are somewhat +more complicated. If we reach the final `reveal_type` call, +unlike the state when we're visiting the `finally` suite, +we know that the `try`-block suite ran to completion. +This means that there are fewer possible states at this point than there were +when we were inside the `finally` block. + +(Our current model does *not* correctly infer the types *inside* `finally` +suites, however; this is still a TODO item for us.) + +```py path=no_redef_in_finally.py +def could_raise_returns_str() -> str: + return 'foo' + +x = 1 + +try: + reveal_type(x) # revealed: Literal[1] + x = could_raise_returns_str() + reveal_type(x) # revealed: str +finally: + # TODO: should be Literal[1] | str + reveal_type(x) # revealed: str + +reveal_type(x) # revealed: str +``` + +## Combining an `except` branch with a `finally` branch + +As previously stated, we do not yet have accurate inference for types *inside* +`finally` suites. When we do, however, we will have to take account of the +following possibilities inside `finally` suites: + +- The `try` suite could have run to completion +- Or we could have jumped from halfway through the `try` suite to an `except` + suite, and the `except` suite ran to completion +- Or we could have jumped from halfway through the `try`suite straight to the + `finally` suite +- Or we could have jumped from halfway through the `try` suite to an + `except` suite, only for an exception raised in the `except` suite to cause + us to jump to the `finally` suite before the `except` suite ran to completion + +```py path=redef_in_finally.py +def could_raise_returns_str() -> str: + return 'foo' + +def could_raise_returns_bytes() -> bytes: + return b'foo' + +def could_raise_returns_bool() -> bool: + return True + +x = 1 + +try: + reveal_type(x) # revealed: Literal[1] + x = could_raise_returns_str() + reveal_type(x) # revealed: str +except TypeError: + reveal_type(x) # revealed: Literal[1] | str + x = could_raise_returns_bytes() + reveal_type(x) # revealed: bytes + x = could_raise_returns_bool() + reveal_type(x) # revealed: bool +finally: + # TODO: should be `Literal[1] | str | bytes | bool` + reveal_type(x) # revealed: str | bool + x = 2 + reveal_type(x) # revealed: Literal[2] + +reveal_type(x) # revealed: Literal[2] +``` + +Example without a redefinition in the `finally` suite. As before, there +*should* be fewer possibilities after completion of the `finally` suite than +there were during the `finally` suite itself (since in some control-flow +possibilities, some exceptions were merely *suspended* during the `finally` +suite, and lead to the scope's termination following conclusion of the +`finally` suite). + +```py path=no_redef_in_finally.py +def could_raise_returns_str() -> str: + return 'foo' + +def could_raise_returns_bytes() -> bytes: + return b'foo' + +def could_raise_returns_bool() -> bool: + return True + +x = 1 + +try: + reveal_type(x) # revealed: Literal[1] + x = could_raise_returns_str() + reveal_type(x) # revealed: str +except TypeError: + reveal_type(x) # revealed: Literal[1] | str + x = could_raise_returns_bytes() + reveal_type(x) # revealed: bytes + x = could_raise_returns_bool() + reveal_type(x) # revealed: bool +finally: + # TODO: should be `Literal[1] | str | bytes | bool` + reveal_type(x) # revealed: str | bool + +reveal_type(x) # revealed: str | bool +``` + +An example with multiple `except` branches and a `finally` branch: + +```py path=multiple_except_branches.py +def could_raise_returns_str() -> str: + return 'foo' + +def could_raise_returns_bytes() -> bytes: + return b'foo' + +def could_raise_returns_bool() -> bool: + return True + +def could_raise_returns_memoryview() -> memoryview: + return memoryview(b"") + +def could_raise_returns_float() -> float: + return 3.14 + +x = 1 + +try: + reveal_type(x) # revealed: Literal[1] + x = could_raise_returns_str() + reveal_type(x) # revealed: str +except TypeError: + reveal_type(x) # revealed: Literal[1] | str + x = could_raise_returns_bytes() + reveal_type(x) # revealed: bytes + x = could_raise_returns_bool() + reveal_type(x) # revealed: bool +except ValueError: + reveal_type(x) # revealed: Literal[1] | str + x = could_raise_returns_memoryview() + reveal_type(x) # revealed: memoryview + x = could_raise_returns_float() + reveal_type(x) # revealed: float +finally: + # TODO: should be `Literal[1] | str | bytes | bool | memoryview | float` + reveal_type(x) # revealed: str | bool | float + +reveal_type(x) # revealed: str | bool | float +``` + +## Combining `except`, `else` and `finally` branches + +If the exception handler has an `else` branch, we must also take into account +the possibility that control flow could have jumped to the `finally` suite from +partway through the `else` suite due to an exception raised *there*. + +```py path=single_except_branch.py +def could_raise_returns_str() -> str: + return 'foo' + +def could_raise_returns_bytes() -> bytes: + return b'foo' + +def could_raise_returns_bool() -> bool: + return True + +def could_raise_returns_memoryview() -> memoryview: + return memoryview(b"") + +def could_raise_returns_float() -> float: + return 3.14 + +x = 1 + +try: + reveal_type(x) # revealed: Literal[1] + x = could_raise_returns_str() + reveal_type(x) # revealed: str +except TypeError: + reveal_type(x) # revealed: Literal[1] | str + x = could_raise_returns_bytes() + reveal_type(x) # revealed: bytes + x = could_raise_returns_bool() + reveal_type(x) # revealed: bool +else: + reveal_type(x) # revealed: str + x = could_raise_returns_memoryview() + reveal_type(x) # revealed: memoryview + x = could_raise_returns_float() + reveal_type(x) # revealed: float +finally: + # TODO: should be `Literal[1] | str | bytes | bool | memoryview | float` + reveal_type(x) # revealed: bool | float + +reveal_type(x) # revealed: bool | float +``` + +The same again, this time with multiple `except` branches: + +```py path=multiple_except_branches.py +def could_raise_returns_str() -> str: + return 'foo' + +def could_raise_returns_bytes() -> bytes: + return b'foo' + +def could_raise_returns_bool() -> bool: + return True + +def could_raise_returns_memoryview() -> memoryview: + return memoryview(b"") + +def could_raise_returns_float() -> float: + return 3.14 + +def could_raise_returns_range() -> range: + return range(42) + +def could_raise_returns_slice() -> slice: + return slice(None) + +x = 1 + +try: + reveal_type(x) # revealed: Literal[1] + x = could_raise_returns_str() + reveal_type(x) # revealed: str +except TypeError: + reveal_type(x) # revealed: Literal[1] | str + x = could_raise_returns_bytes() + reveal_type(x) # revealed: bytes + x = could_raise_returns_bool() + reveal_type(x) # revealed: bool +except ValueError: + reveal_type(x) # revealed: Literal[1] | str + x = could_raise_returns_memoryview() + reveal_type(x) # revealed: memoryview + x = could_raise_returns_float() + reveal_type(x) # revealed: float +else: + reveal_type(x) # revealed: str + x = could_raise_returns_range() + reveal_type(x) # revealed: range + x = could_raise_returns_slice() + reveal_type(x) # revealed: slice +finally: + # TODO: should be `Literal[1] | str | bytes | bool | memoryview | float | range | slice` + reveal_type(x) # revealed: bool | float | slice + +reveal_type(x) # revealed: bool | float | slice +``` + +## Nested `try`/`except` blocks + +It would take advanced analysis, which we are not yet capable of, to be able +to determine that an exception handler always suppresses all exceptions. This +is partly because it is possible for statements in `except`, `else` and +`finally` suites to raise exceptions as well as statements in `try` suites. +This means that if an exception handler is nested inside the `try` statement of +an enclosing exception handler, it should (at least for now) be treated the +same as any other node: as a suite containing statements that could possibly +raise exceptions, which would lead to control flow jumping out of that suite +prior to the suite running to completion. + +```py +def could_raise_returns_str() -> str: + return 'foo' + +def could_raise_returns_bytes() -> bytes: + return b'foo' + +def could_raise_returns_bool() -> bool: + return True + +def could_raise_returns_memoryview() -> memoryview: + return memoryview(b"") + +def could_raise_returns_float() -> float: + return 3.14 + +def could_raise_returns_range() -> range: + return range(42) + +def could_raise_returns_slice() -> slice: + return slice(None) + +def could_raise_returns_complex() -> complex: + return 3j + +def could_raise_returns_bytearray() -> bytearray: + return bytearray() + +class Foo: ... +class Bar: ... + +def could_raise_returns_Foo() -> Foo: + return Foo() + +def could_raise_returns_Bar() -> Bar: + return Bar() + +x = 1 + +try: + try: + reveal_type(x) # revealed: Literal[1] + x = could_raise_returns_str() + reveal_type(x) # revealed: str + except TypeError: + reveal_type(x) # revealed: Literal[1] | str + x = could_raise_returns_bytes() + reveal_type(x) # revealed: bytes + x = could_raise_returns_bool() + reveal_type(x) # revealed: bool + except ValueError: + reveal_type(x) # revealed: Literal[1] | str + x = could_raise_returns_memoryview() + reveal_type(x) # revealed: memoryview + x = could_raise_returns_float() + reveal_type(x) # revealed: float + else: + reveal_type(x) # revealed: str + x = could_raise_returns_range() + reveal_type(x) # revealed: range + x = could_raise_returns_slice() + reveal_type(x) # revealed: slice + finally: + # TODO: should be `Literal[1] | str | bytes | bool | memoryview | float | range | slice` + reveal_type(x) # revealed: bool | float | slice + x = 2 + reveal_type(x) # revealed: Literal[2] + reveal_type(x) # revealed: Literal[2] +except: + reveal_type(x) # revealed: Literal[1, 2] | str | bytes | bool | memoryview | float | range | slice + x = could_raise_returns_complex() + reveal_type(x) # revealed: complex + x = could_raise_returns_bytearray() + reveal_type(x) # revealed: bytearray +else: + reveal_type(x) # revealed: Literal[2] + x = could_raise_returns_Foo() + reveal_type(x) # revealed: Foo + x = could_raise_returns_Bar() + reveal_type(x) # revealed: Bar +finally: + # TODO: should be `Literal[1, 2] | str | bytes | bool | memoryview | float | range | slice | complex | bytearray | Foo | Bar` + reveal_type(x) # revealed: bytearray | Bar + +# Either one `except` branch or the `else` +# must have been taken and completed to get here: +reveal_type(x) # revealed: bytearray | Bar +``` + +## Nested scopes inside `try` blocks + +Shadowing a variable in an inner scope has no effect on type inference of the +variable by that name in the outer scope: + +```py +def could_raise_returns_str() -> str: + return 'foo' + +def could_raise_returns_bytes() -> bytes: + return b'foo' + +def could_raise_returns_range() -> range: + return range(42) + +def could_raise_returns_bytearray() -> bytearray: + return bytearray() + +def could_raise_returns_float() -> float: + return 3.14 + +x = 1 + +try: + def foo(param=could_raise_returns_str()): + x = could_raise_returns_str() + + try: + reveal_type(x) # revealed: str + x = could_raise_returns_bytes() + reveal_type(x) # revealed: bytes + except: + reveal_type(x) # revealed: str | bytes + x = could_raise_returns_bytearray() + reveal_type(x) # revealed: bytearray + x = could_raise_returns_float() + reveal_type(x) # revealed: float + finally: + # TODO: should be `str | bytes | bytearray | float` + reveal_type(x) # revealed: bytes | float + reveal_type(x) # revealed: bytes | float + + x = foo + reveal_type(x) # revealed: Literal[foo] +except: + reveal_type(x) # revealed: Literal[1] | Literal[foo] + + class Bar: + x = could_raise_returns_range() + reveal_type(x) # revealed: range + + x = Bar + reveal_type(x) # revealed: Literal[Bar] +finally: + # TODO: should be `Literal[1] | Literal[foo] | Literal[Bar]` + reveal_type(x) # revealed: Literal[foo] | Literal[Bar] + +reveal_type(x) # revealed: Literal[foo] | Literal[Bar] +``` diff --git a/crates/red_knot_python_semantic/src/semantic_index/builder.rs b/crates/red_knot_python_semantic/src/semantic_index/builder.rs index 1c5c03e79a279..d2b9c2d8a1a77 100644 --- a/crates/red_knot_python_semantic/src/semantic_index/builder.rs +++ b/crates/red_knot_python_semantic/src/semantic_index/builder.rs @@ -1,5 +1,6 @@ use std::sync::Arc; +use except_handlers::{TryNodeContextStack, TryNodeContextStackManager}; use rustc_hash::FxHashMap; use ruff_db::files::File; @@ -32,6 +33,8 @@ use super::definition::{ MatchPatternDefinitionNodeRef, WithItemDefinitionNodeRef, }; +mod except_handlers; + pub(super) struct SemanticIndexBuilder<'db> { // Builder state db: &'db dyn Db, @@ -44,6 +47,8 @@ pub(super) struct SemanticIndexBuilder<'db> { current_match_case: Option>, /// Flow states at each `break` in the current loop. loop_break_states: Vec, + /// Per-scope contexts regarding nested `try`/`except` statements + try_node_context_stack_manager: TryNodeContextStackManager, /// Flags about the file's global scope has_future_annotations: bool, @@ -70,6 +75,7 @@ impl<'db> SemanticIndexBuilder<'db> { current_assignment: None, current_match_case: None, loop_break_states: vec![], + try_node_context_stack_manager: TryNodeContextStackManager::default(), has_future_annotations: false, @@ -97,6 +103,11 @@ impl<'db> SemanticIndexBuilder<'db> { .expect("Always to have a root scope") } + fn try_node_context_stack(&mut self) -> &mut TryNodeContextStack { + self.try_node_context_stack_manager + .current_try_context_stack() + } + fn push_scope(&mut self, node: NodeWithScopeRef) { let parent = self.current_scope(); self.push_scope_with_parent(node, Some(parent)); @@ -110,6 +121,7 @@ impl<'db> SemanticIndexBuilder<'db> { kind: node.scope_kind(), descendents: children_start..children_start, }; + self.try_node_context_stack_manager.enter_nested_scope(); let file_scope_id = self.scopes.push(scope); self.symbol_tables.push(SymbolTableBuilder::new()); @@ -139,6 +151,7 @@ impl<'db> SemanticIndexBuilder<'db> { let children_end = self.scopes.next_index(); let scope = &mut self.scopes[id]; scope.descendents = scope.descendents.start..children_end; + self.try_node_context_stack_manager.exit_scope(); id } @@ -226,6 +239,8 @@ impl<'db> SemanticIndexBuilder<'db> { DefinitionCategory::Declaration => use_def.record_declaration(symbol, definition), DefinitionCategory::Binding => use_def.record_binding(symbol, definition), } + let snapshot = self.flow_snapshot(); + self.try_node_context_stack().record_definition(&snapshot); definition } @@ -764,40 +779,104 @@ where is_star, range: _, }) => { - self.visit_body(body); + // Save the state prior to visiting any of the `try` block. + // + // Potentially none of the `try` block could have been executed prior to executing + // the `except` block(s) and/or the `finally` block. + // We will merge this state with all of the intermediate + // states during the `try` block before visiting those suites. + let pre_try_block_state = self.flow_snapshot(); - for except_handler in handlers { - let ast::ExceptHandler::ExceptHandler(except_handler) = except_handler; - let ast::ExceptHandlerExceptHandler { - name: symbol_name, - type_: handled_exceptions, - body: handler_body, - range: _, - } = except_handler; + self.try_node_context_stack().push_context(); - if let Some(handled_exceptions) = handled_exceptions { - self.visit_expr(handled_exceptions); + // Visit the `try` block! + self.visit_body(body); + + let mut post_except_states = vec![]; + + // Take a record also of all the intermediate states we encountered + // while visiting the `try` block + let try_block_snapshots = self.try_node_context_stack().pop_context(); + + if !handlers.is_empty() { + // Save the state immediately *after* visiting the `try` block + // but *before* we prepare for visiting the `except` block(s). + // + // We will revert to this state prior to visiting the the `else` block, + // as there necessarily must have been 0 `except` blocks executed + // if we hit the `else` block. + let post_try_block_state = self.flow_snapshot(); + + // Prepare for visiting the `except` block(s) + self.flow_restore(pre_try_block_state); + for state in try_block_snapshots { + self.flow_merge(state); } - // If `handled_exceptions` above was `None`, it's something like `except as e:`, - // which is invalid syntax. However, it's still pretty obvious here that the user - // *wanted* `e` to be bound, so we should still create a definition here nonetheless. - if let Some(symbol_name) = symbol_name { - let symbol = self.add_symbol(symbol_name.id.clone()); - - self.add_definition( - symbol, - DefinitionNodeRef::ExceptHandler(ExceptHandlerDefinitionNodeRef { - handler: except_handler, - is_star: *is_star, - }), - ); + let pre_except_state = self.flow_snapshot(); + let num_handlers = handlers.len(); + + for (i, except_handler) in handlers.iter().enumerate() { + let ast::ExceptHandler::ExceptHandler(except_handler) = except_handler; + let ast::ExceptHandlerExceptHandler { + name: symbol_name, + type_: handled_exceptions, + body: handler_body, + range: _, + } = except_handler; + + if let Some(handled_exceptions) = handled_exceptions { + self.visit_expr(handled_exceptions); + } + + // If `handled_exceptions` above was `None`, it's something like `except as e:`, + // which is invalid syntax. However, it's still pretty obvious here that the user + // *wanted* `e` to be bound, so we should still create a definition here nonetheless. + if let Some(symbol_name) = symbol_name { + let symbol = self.add_symbol(symbol_name.id.clone()); + + self.add_definition( + symbol, + DefinitionNodeRef::ExceptHandler(ExceptHandlerDefinitionNodeRef { + handler: except_handler, + is_star: *is_star, + }), + ); + } + + self.visit_body(handler_body); + // Each `except` block is mutually exclusive with all other `except` blocks. + post_except_states.push(self.flow_snapshot()); + + // It's unnecessary to do the `self.flow_restore()` call for the final except handler, + // as we'll immediately call `self.flow_restore()` to a different state + // as soon as this loop over the handlers terminates. + if i < (num_handlers - 1) { + self.flow_restore(pre_except_state.clone()); + } } - self.visit_body(handler_body); + // If we get to the `else` block, we know that 0 of the `except` blocks can have been executed, + // and the entire `try` block must have been executed: + self.flow_restore(post_try_block_state); } self.visit_body(orelse); + + for post_except_state in post_except_states { + self.flow_merge(post_except_state); + } + + // TODO: there's lots of complexity here that isn't yet handled by our model. + // In order to accurately model the semantics of `finally` suites, we in fact need to visit + // the suite twice: once under the (current) assumption that either the `try + else` suite + // ran to completion or exactly one `except` branch ran to completion, and then again under + // the assumption that potentially none of the branches ran to completion and we in fact + // jumped from a `try`, `else` or `except` branch straight into the `finally` branch. + // This requires some rethinking of some fundamental assumptions the `use-def` map makes. + // For more details, see: + // - https://astral-sh.notion.site/Exception-handler-control-flow-11348797e1ca80bb8ce1e9aedbbe439d + // - https://github.com/astral-sh/ruff/pull/13633#discussion_r1788626702 self.visit_body(finalbody); } _ => { diff --git a/crates/red_knot_python_semantic/src/semantic_index/builder/except_handlers.rs b/crates/red_knot_python_semantic/src/semantic_index/builder/except_handlers.rs new file mode 100644 index 0000000000000..d04994f1b7de7 --- /dev/null +++ b/crates/red_knot_python_semantic/src/semantic_index/builder/except_handlers.rs @@ -0,0 +1,80 @@ +use crate::semantic_index::use_def::FlowSnapshot; + +/// An abstraction over the fact that each scope should have its own [`TryNodeContextStack`] +#[derive(Debug, Default)] +pub(super) struct TryNodeContextStackManager(Vec); + +impl TryNodeContextStackManager { + /// Push a new [`TryNodeContextStack`] onto the stack of stacks. + /// + /// Each [`TryNodeContextStack`] is only valid for a single scope + pub(super) fn enter_nested_scope(&mut self) { + self.0.push(TryNodeContextStack::default()); + } + + /// Retrieve the [`TryNodeContextStack`] that is relevant for the current scope. + pub(super) fn current_try_context_stack(&mut self) -> &mut TryNodeContextStack { + self.0 + .last_mut() + .expect("There should always be at least one `TryBlockContexts` on the stack") + } + + /// Pop a new [`TryNodeContextStack`] off the stack of stacks. + /// + /// Each [`TryNodeContextStack`] is only valid for a single scope + pub(super) fn exit_scope(&mut self) { + let popped_context = self.0.pop(); + debug_assert!( + popped_context.is_some(), + "exit_scope() should never be called on an empty stack \ +(this indicates an unbalanced `enter_nested_scope()`/`exit_scope()` pair of calls)" + ); + } +} + +/// The contexts of nested `try`/`except` blocks for a single scope +#[derive(Debug, Default)] +pub(super) struct TryNodeContextStack(Vec); + +impl TryNodeContextStack { + /// Push a new [`TryNodeContext`] for recording intermediate states + /// while visiting a [`ruff_python_ast::StmtTry`] node that has a `finally` branch. + pub(super) fn push_context(&mut self) { + self.0.push(TryNodeContext::default()); + } + + /// Pop a [`TryNodeContext`] off the stack. + pub(super) fn pop_context(&mut self) -> Vec { + let TryNodeContext { + try_suite_snapshots, + } = self + .0 + .pop() + .expect("Cannot pop a `try` block off an empty `TryBlockContexts` stack"); + try_suite_snapshots + } + + /// For each `try` block on the stack, push the snapshot onto the `try` block + pub(super) fn record_definition(&mut self, snapshot: &FlowSnapshot) { + for context in &mut self.0 { + context.record_definition(snapshot.clone()); + } + } +} + +/// Context for tracking definitions over the course of a single +/// [`ruff_python_ast::StmtTry`] node +/// +/// It will likely be necessary to add more fields to this struct in the future +/// when we add more advanced handling of `finally` branches. +#[derive(Debug, Default)] +struct TryNodeContext { + try_suite_snapshots: Vec, +} + +impl TryNodeContext { + /// Take a record of what the internal state looked like after a definition + fn record_definition(&mut self, snapshot: FlowSnapshot) { + self.try_suite_snapshots.push(snapshot); + } +} diff --git a/crates/ruff_benchmark/benches/red_knot.rs b/crates/ruff_benchmark/benches/red_knot.rs index eb8b186646215..1a002f190b8ce 100644 --- a/crates/ruff_benchmark/benches/red_knot.rs +++ b/crates/ruff_benchmark/benches/red_knot.rs @@ -22,10 +22,15 @@ struct Case { const TOMLLIB_312_URL: &str = "https://raw.githubusercontent.com/python/cpython/8e8a4baf652f6e1cee7acde9d78c4b6154539748/Lib/tomllib"; -// The failed import from 'collections.abc' is due to lack of support for 'import *'. static EXPECTED_DIAGNOSTICS: &[&str] = &[ + // We don't support `ModuleType`-attributes as globals yet: "/src/tomllib/__init__.py:10:30: Name `__name__` used when not defined", + // We don't support `*` imports yet: "/src/tomllib/_parser.py:7:29: Module `collections.abc` has no member `Iterable`", + // We don't support terminal statements in control flow yet: + "/src/tomllib/_parser.py:353:5: Method `__getitem__` of type `Unbound | @Todo` is not callable on object of type `Unbound | @Todo`", + "/src/tomllib/_parser.py:455:9: Method `__getitem__` of type `Unbound | @Todo` is not callable on object of type `Unbound | @Todo`", + // True positives! "Line 69 is too long (89 characters)", "Use double quotes for strings", "Use double quotes for strings", @@ -34,6 +39,25 @@ static EXPECTED_DIAGNOSTICS: &[&str] = &[ "Use double quotes for strings", "Use double quotes for strings", "Use double quotes for strings", + // We don't support terminal statements in control flow yet: + "/src/tomllib/_parser.py:66:18: Name `s` used when possibly not defined", + "/src/tomllib/_parser.py:98:12: Name `char` used when possibly not defined", + "/src/tomllib/_parser.py:101:12: Name `char` used when possibly not defined", + "/src/tomllib/_parser.py:104:14: Name `char` used when possibly not defined", + "/src/tomllib/_parser.py:104:14: Name `char` used when possibly not defined", + "/src/tomllib/_parser.py:115:14: Name `char` used when possibly not defined", + "/src/tomllib/_parser.py:115:14: Name `char` used when possibly not defined", + "/src/tomllib/_parser.py:126:12: Name `char` used when possibly not defined", + "/src/tomllib/_parser.py:348:20: Name `nest` used when possibly not defined", + "/src/tomllib/_parser.py:353:5: Name `nest` used when possibly not defined", + "/src/tomllib/_parser.py:453:24: Name `nest` used when possibly not defined", + "/src/tomllib/_parser.py:455:9: Name `nest` used when possibly not defined", + "/src/tomllib/_parser.py:482:16: Name `char` used when possibly not defined", + "/src/tomllib/_parser.py:566:12: Name `char` used when possibly not defined", + "/src/tomllib/_parser.py:573:12: Name `char` used when possibly not defined", + "/src/tomllib/_parser.py:579:12: Name `char` used when possibly not defined", + "/src/tomllib/_parser.py:580:63: Name `char` used when possibly not defined", + "/src/tomllib/_parser.py:629:38: Name `datetime_obj` used when possibly not defined" ]; fn get_test_file(name: &str) -> TestFile { From 24d139891efc6030ef0f3ec2eebd1999c2e6e831 Mon Sep 17 00:00:00 2001 From: Alex Waygood Date: Wed, 16 Oct 2024 09:50:37 +0100 Subject: [PATCH 2/6] fix perf regression? --- .../src/semantic_index/builder.rs | 5 ++-- .../semantic_index/builder/except_handlers.rs | 23 +++++++++++-------- 2 files changed, 16 insertions(+), 12 deletions(-) diff --git a/crates/red_knot_python_semantic/src/semantic_index/builder.rs b/crates/red_knot_python_semantic/src/semantic_index/builder.rs index d2b9c2d8a1a77..881a994901d1b 100644 --- a/crates/red_knot_python_semantic/src/semantic_index/builder.rs +++ b/crates/red_knot_python_semantic/src/semantic_index/builder.rs @@ -103,7 +103,7 @@ impl<'db> SemanticIndexBuilder<'db> { .expect("Always to have a root scope") } - fn try_node_context_stack(&mut self) -> &mut TryNodeContextStack { + fn try_node_context_stack(&self) -> &TryNodeContextStack { self.try_node_context_stack_manager .current_try_context_stack() } @@ -239,8 +239,7 @@ impl<'db> SemanticIndexBuilder<'db> { DefinitionCategory::Declaration => use_def.record_declaration(symbol, definition), DefinitionCategory::Binding => use_def.record_binding(symbol, definition), } - let snapshot = self.flow_snapshot(); - self.try_node_context_stack().record_definition(&snapshot); + self.try_node_context_stack().record_definition(self); definition } diff --git a/crates/red_knot_python_semantic/src/semantic_index/builder/except_handlers.rs b/crates/red_knot_python_semantic/src/semantic_index/builder/except_handlers.rs index d04994f1b7de7..1f165cbc605ed 100644 --- a/crates/red_knot_python_semantic/src/semantic_index/builder/except_handlers.rs +++ b/crates/red_knot_python_semantic/src/semantic_index/builder/except_handlers.rs @@ -1,5 +1,9 @@ +use std::cell::RefCell; + use crate::semantic_index::use_def::FlowSnapshot; +use super::SemanticIndexBuilder; + /// An abstraction over the fact that each scope should have its own [`TryNodeContextStack`] #[derive(Debug, Default)] pub(super) struct TryNodeContextStackManager(Vec); @@ -13,9 +17,9 @@ impl TryNodeContextStackManager { } /// Retrieve the [`TryNodeContextStack`] that is relevant for the current scope. - pub(super) fn current_try_context_stack(&mut self) -> &mut TryNodeContextStack { + pub(super) fn current_try_context_stack(&self) -> &TryNodeContextStack { self.0 - .last_mut() + .last() .expect("There should always be at least one `TryBlockContexts` on the stack") } @@ -34,30 +38,31 @@ impl TryNodeContextStackManager { /// The contexts of nested `try`/`except` blocks for a single scope #[derive(Debug, Default)] -pub(super) struct TryNodeContextStack(Vec); +pub(super) struct TryNodeContextStack(RefCell>); impl TryNodeContextStack { /// Push a new [`TryNodeContext`] for recording intermediate states /// while visiting a [`ruff_python_ast::StmtTry`] node that has a `finally` branch. - pub(super) fn push_context(&mut self) { - self.0.push(TryNodeContext::default()); + pub(super) fn push_context(&self) { + self.0.borrow_mut().push(TryNodeContext::default()); } /// Pop a [`TryNodeContext`] off the stack. - pub(super) fn pop_context(&mut self) -> Vec { + pub(super) fn pop_context(&self) -> Vec { let TryNodeContext { try_suite_snapshots, } = self .0 + .borrow_mut() .pop() .expect("Cannot pop a `try` block off an empty `TryBlockContexts` stack"); try_suite_snapshots } /// For each `try` block on the stack, push the snapshot onto the `try` block - pub(super) fn record_definition(&mut self, snapshot: &FlowSnapshot) { - for context in &mut self.0 { - context.record_definition(snapshot.clone()); + pub(super) fn record_definition(&self, builder: &SemanticIndexBuilder) { + for context in self.0.borrow_mut().iter_mut() { + context.record_definition(builder.flow_snapshot()); } } } From c47bf7397ff09597316cbc69fef43080a6b3c17d Mon Sep 17 00:00:00 2001 From: Alex Waygood Date: Wed, 16 Oct 2024 11:27:38 +0100 Subject: [PATCH 3/6] Once again, get rid of the `RefCell` --- .../src/semantic_index/builder.rs | 9 +++++++-- .../semantic_index/builder/except_handlers.rs | 19 ++++++++----------- 2 files changed, 15 insertions(+), 13 deletions(-) diff --git a/crates/red_knot_python_semantic/src/semantic_index/builder.rs b/crates/red_knot_python_semantic/src/semantic_index/builder.rs index 881a994901d1b..451443b8dc238 100644 --- a/crates/red_knot_python_semantic/src/semantic_index/builder.rs +++ b/crates/red_knot_python_semantic/src/semantic_index/builder.rs @@ -103,7 +103,7 @@ impl<'db> SemanticIndexBuilder<'db> { .expect("Always to have a root scope") } - fn try_node_context_stack(&self) -> &TryNodeContextStack { + fn try_node_context_stack(&mut self) -> &mut TryNodeContextStack { self.try_node_context_stack_manager .current_try_context_stack() } @@ -239,7 +239,12 @@ impl<'db> SemanticIndexBuilder<'db> { DefinitionCategory::Declaration => use_def.record_declaration(symbol, definition), DefinitionCategory::Binding => use_def.record_binding(symbol, definition), } - self.try_node_context_stack().record_definition(self); + + let mut try_node_stack_manager = std::mem::take(&mut self.try_node_context_stack_manager); + try_node_stack_manager + .current_try_context_stack() + .record_definition(self); + self.try_node_context_stack_manager = try_node_stack_manager; definition } diff --git a/crates/red_knot_python_semantic/src/semantic_index/builder/except_handlers.rs b/crates/red_knot_python_semantic/src/semantic_index/builder/except_handlers.rs index 1f165cbc605ed..8c32a3be30017 100644 --- a/crates/red_knot_python_semantic/src/semantic_index/builder/except_handlers.rs +++ b/crates/red_knot_python_semantic/src/semantic_index/builder/except_handlers.rs @@ -1,5 +1,3 @@ -use std::cell::RefCell; - use crate::semantic_index::use_def::FlowSnapshot; use super::SemanticIndexBuilder; @@ -17,9 +15,9 @@ impl TryNodeContextStackManager { } /// Retrieve the [`TryNodeContextStack`] that is relevant for the current scope. - pub(super) fn current_try_context_stack(&self) -> &TryNodeContextStack { + pub(super) fn current_try_context_stack(&mut self) -> &mut TryNodeContextStack { self.0 - .last() + .last_mut() .expect("There should always be at least one `TryBlockContexts` on the stack") } @@ -38,30 +36,29 @@ impl TryNodeContextStackManager { /// The contexts of nested `try`/`except` blocks for a single scope #[derive(Debug, Default)] -pub(super) struct TryNodeContextStack(RefCell>); +pub(super) struct TryNodeContextStack(Vec); impl TryNodeContextStack { /// Push a new [`TryNodeContext`] for recording intermediate states /// while visiting a [`ruff_python_ast::StmtTry`] node that has a `finally` branch. - pub(super) fn push_context(&self) { - self.0.borrow_mut().push(TryNodeContext::default()); + pub(super) fn push_context(&mut self) { + self.0.push(TryNodeContext::default()); } /// Pop a [`TryNodeContext`] off the stack. - pub(super) fn pop_context(&self) -> Vec { + pub(super) fn pop_context(&mut self) -> Vec { let TryNodeContext { try_suite_snapshots, } = self .0 - .borrow_mut() .pop() .expect("Cannot pop a `try` block off an empty `TryBlockContexts` stack"); try_suite_snapshots } /// For each `try` block on the stack, push the snapshot onto the `try` block - pub(super) fn record_definition(&self, builder: &SemanticIndexBuilder) { - for context in self.0.borrow_mut().iter_mut() { + pub(super) fn record_definition(&mut self, builder: &SemanticIndexBuilder) { + for context in &mut self.0 { context.record_definition(builder.flow_snapshot()); } } From 926df03de32b7d1932e9d2c228d09f1a16e842a2 Mon Sep 17 00:00:00 2001 From: Alex Waygood Date: Wed, 16 Oct 2024 11:33:30 +0100 Subject: [PATCH 4/6] improve encapsulation --- .../mdtest/exception/control_flow.md | 39 ++++++++-------- .../src/semantic_index/builder.rs | 15 ++----- .../semantic_index/builder/except_handlers.rs | 44 ++++++++++++++----- 3 files changed, 56 insertions(+), 42 deletions(-) diff --git a/crates/red_knot_python_semantic/resources/mdtest/exception/control_flow.md b/crates/red_knot_python_semantic/resources/mdtest/exception/control_flow.md index 4643ad5bdf3d3..f9db52e086bc4 100644 --- a/crates/red_knot_python_semantic/resources/mdtest/exception/control_flow.md +++ b/crates/red_knot_python_semantic/resources/mdtest/exception/control_flow.md @@ -19,9 +19,9 @@ exception to be raised. ## A single bare `except` If there are different types for a single variable `x` in the two branches, we -can't determine which branch might have been taken, so the inferred type after -the `try`/`except` block is the union of type at the end of the `try` block -(`str`) and the type at the end of the `except` block (`Literal[2]`): +can't determine which branch might have been taken. The inferred type after +the `try`/`except` block is therefore the union of type at the end of the `try` +suite (`str`) and the type at the end of the `except` suite (`Literal[2]`): ```py path=union_type_inferred.py def could_raise_returns_str() -> str: @@ -63,8 +63,8 @@ reveal_type(x) # revealed: str For simple `try`/`except` blocks, `except TypeError:` has the same control flow as `except:`. There might have been an unhandled exception, but (as described in [the document on exception-handling semantics](https://astral-sh.notion.site/Exception-handler-control-flow-11348797e1ca80bb8ce1e9aedbbe439d)) -that would lead to termination of the scope -- it's irrelevant to consider this -possibility. +that would lead to termination of the scope. It's therefore irrelevant to +consider this possibility when it comes to control-flow analysis. ```py def could_raise_returns_str() -> str: @@ -88,8 +88,9 @@ reveal_type(x) # revealed: str | Literal[2] If the scope reaches the final `reveal_type` call in this example, either the `try`-block suite of statements was executed in its entirety, -or exactly one `except` was executed in its entirety. The inferred type of `x` -should be the union of the types at the end of the three suites: +or exactly one `except` suite was executed in its entirety. +The inferred type of `x` should be the union of the types at the end of the +three suites: - At the end of `try`, `type(x) == str` - At the end of `except TypeError`, `x == 2` @@ -119,7 +120,7 @@ reveal_type(x) # revealed: str | Literal[2, 3] ## Exception handlers with `else` branches (but no `finally`) -If we reach the `reveal_type` call at the end of the scope, +If we reach the `reveal_type` call at the end of this scope, either the `try` and `else` suites were both executed in their entireties, or the `except` suite was executed in its entirety. The type of `x` will be the union of the type at the end of the `else` suite and the type at the end of the @@ -184,10 +185,10 @@ reveal_type(x) # revealed: Literal[2, 3, 4] ## Exception handlers with `finally` branches (but no `except` branches) -A `finally` suite is *always* executed, so if we reach the `reveal_type` call -at the end of this example, we know that `x` *must* have been reassigned to `2` -during the `finally` suite; therefore the type of `x` at the end of the example -is `Literal[2]`: +A `finally` suite is *always* executed. As such, if we reach the `reveal_type` +call at the end of this example, we know that `x` *must* have been reassigned +to `2` during the `finally` suite. The type of `x` at the end of the example is +therefore `Literal[2]`: ```py path=redef_in_finally.py def could_raise_returns_str() -> str: @@ -242,7 +243,7 @@ following possibilities inside `finally` suites: - The `try` suite could have run to completion - Or we could have jumped from halfway through the `try` suite to an `except` suite, and the `except` suite ran to completion -- Or we could have jumped from halfway through the `try`suite straight to the +- Or we could have jumped from halfway through the `try` suite straight to the `finally` suite - Or we could have jumped from halfway through the `try` suite to an `except` suite, only for an exception raised in the `except` suite to cause @@ -279,12 +280,12 @@ finally: reveal_type(x) # revealed: Literal[2] ``` -Example without a redefinition in the `finally` suite. As before, there -*should* be fewer possibilities after completion of the `finally` suite than -there were during the `finally` suite itself (since in some control-flow -possibilities, some exceptions were merely *suspended* during the `finally` -suite, and lead to the scope's termination following conclusion of the -`finally` suite). +Now for an example without a redefinition in the `finally` suite. +As before, there *should* be fewer possibilities after completion of the +`finally` suite than there were during the `finally` suite itself. +(In some control-flow possibilities, some exceptions were merely *suspended* +during the `finally` suite, and lead to the scope's termination following the +conclusion of the `finally` suite.) ```py path=no_redef_in_finally.py def could_raise_returns_str() -> str: diff --git a/crates/red_knot_python_semantic/src/semantic_index/builder.rs b/crates/red_knot_python_semantic/src/semantic_index/builder.rs index 451443b8dc238..5777fe2dc6036 100644 --- a/crates/red_knot_python_semantic/src/semantic_index/builder.rs +++ b/crates/red_knot_python_semantic/src/semantic_index/builder.rs @@ -1,6 +1,6 @@ use std::sync::Arc; -use except_handlers::{TryNodeContextStack, TryNodeContextStackManager}; +use except_handlers::TryNodeContextStackManager; use rustc_hash::FxHashMap; use ruff_db::files::File; @@ -103,11 +103,6 @@ impl<'db> SemanticIndexBuilder<'db> { .expect("Always to have a root scope") } - fn try_node_context_stack(&mut self) -> &mut TryNodeContextStack { - self.try_node_context_stack_manager - .current_try_context_stack() - } - fn push_scope(&mut self, node: NodeWithScopeRef) { let parent = self.current_scope(); self.push_scope_with_parent(node, Some(parent)); @@ -241,9 +236,7 @@ impl<'db> SemanticIndexBuilder<'db> { } let mut try_node_stack_manager = std::mem::take(&mut self.try_node_context_stack_manager); - try_node_stack_manager - .current_try_context_stack() - .record_definition(self); + try_node_stack_manager.record_definition(self); self.try_node_context_stack_manager = try_node_stack_manager; definition @@ -791,7 +784,7 @@ where // states during the `try` block before visiting those suites. let pre_try_block_state = self.flow_snapshot(); - self.try_node_context_stack().push_context(); + self.try_node_context_stack_manager.push_context(); // Visit the `try` block! self.visit_body(body); @@ -800,7 +793,7 @@ where // Take a record also of all the intermediate states we encountered // while visiting the `try` block - let try_block_snapshots = self.try_node_context_stack().pop_context(); + let try_block_snapshots = self.try_node_context_stack_manager.pop_context(); if !handlers.is_empty() { // Save the state immediately *after* visiting the `try` block diff --git a/crates/red_knot_python_semantic/src/semantic_index/builder/except_handlers.rs b/crates/red_knot_python_semantic/src/semantic_index/builder/except_handlers.rs index 8c32a3be30017..6438d1996f3d1 100644 --- a/crates/red_knot_python_semantic/src/semantic_index/builder/except_handlers.rs +++ b/crates/red_knot_python_semantic/src/semantic_index/builder/except_handlers.rs @@ -14,13 +14,6 @@ impl TryNodeContextStackManager { self.0.push(TryNodeContextStack::default()); } - /// Retrieve the [`TryNodeContextStack`] that is relevant for the current scope. - pub(super) fn current_try_context_stack(&mut self) -> &mut TryNodeContextStack { - self.0 - .last_mut() - .expect("There should always be at least one `TryBlockContexts` on the stack") - } - /// Pop a new [`TryNodeContextStack`] off the stack of stacks. /// /// Each [`TryNodeContextStack`] is only valid for a single scope @@ -32,21 +25,48 @@ impl TryNodeContextStackManager { (this indicates an unbalanced `enter_nested_scope()`/`exit_scope()` pair of calls)" ); } + + /// Push a [`TryNodeContext`] onto the [`TryNodeContextStack`] + /// at the top of our stack of stacks + pub(super) fn push_context(&mut self) { + self.current_try_context_stack().push_context(); + } + + /// Pop a [`TryNodeContext`] off the [`TryNodeContextStack`] + /// at the top of our stack of stacks. Return the Vec of [`FlowSnapshot`]s + /// recorded while we were visiting the `try` suite. + pub(super) fn pop_context(&mut self) -> Vec { + self.current_try_context_stack().pop_context() + } + + /// Retrieve the stack that is at the top of our stack of stacks. + /// For each `try` block on that stack, push the snapshot onto the `try` block + pub(super) fn record_definition(&mut self, builder: &SemanticIndexBuilder) { + self.current_try_context_stack().record_definition(builder); + } + + /// Retrieve the [`TryNodeContextStack`] that is relevant for the current scope. + fn current_try_context_stack(&mut self) -> &mut TryNodeContextStack { + self.0 + .last_mut() + .expect("There should always be at least one `TryBlockContexts` on the stack") + } } /// The contexts of nested `try`/`except` blocks for a single scope #[derive(Debug, Default)] -pub(super) struct TryNodeContextStack(Vec); +struct TryNodeContextStack(Vec); impl TryNodeContextStack { /// Push a new [`TryNodeContext`] for recording intermediate states /// while visiting a [`ruff_python_ast::StmtTry`] node that has a `finally` branch. - pub(super) fn push_context(&mut self) { + fn push_context(&mut self) { self.0.push(TryNodeContext::default()); } - /// Pop a [`TryNodeContext`] off the stack. - pub(super) fn pop_context(&mut self) -> Vec { + /// Pop a [`TryNodeContext`] off the stack. Return the Vec of [`FlowSnapshot`]s + /// recorded while we were visiting the `try` suite. + fn pop_context(&mut self) -> Vec { let TryNodeContext { try_suite_snapshots, } = self @@ -57,7 +77,7 @@ impl TryNodeContextStack { } /// For each `try` block on the stack, push the snapshot onto the `try` block - pub(super) fn record_definition(&mut self, builder: &SemanticIndexBuilder) { + fn record_definition(&mut self, builder: &SemanticIndexBuilder) { for context in &mut self.0 { context.record_definition(builder.flow_snapshot()); } From 5386a3ef4a73974056ba94d9511e6bc5219cb973 Mon Sep 17 00:00:00 2001 From: Alex Waygood Date: Wed, 16 Oct 2024 13:21:34 +0100 Subject: [PATCH 5/6] Apply suggestions from code review Co-authored-by: Carl Meyer --- .../resources/mdtest/exception/control_flow.md | 2 +- crates/red_knot_python_semantic/src/semantic_index/builder.rs | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/crates/red_knot_python_semantic/resources/mdtest/exception/control_flow.md b/crates/red_knot_python_semantic/resources/mdtest/exception/control_flow.md index f9db52e086bc4..7ee7366e39e96 100644 --- a/crates/red_knot_python_semantic/resources/mdtest/exception/control_flow.md +++ b/crates/red_knot_python_semantic/resources/mdtest/exception/control_flow.md @@ -20,7 +20,7 @@ exception to be raised. If there are different types for a single variable `x` in the two branches, we can't determine which branch might have been taken. The inferred type after -the `try`/`except` block is therefore the union of type at the end of the `try` +the `try`/`except` block is therefore the union of the type at the end of the `try` suite (`str`) and the type at the end of the `except` suite (`Literal[2]`): ```py path=union_type_inferred.py diff --git a/crates/red_knot_python_semantic/src/semantic_index/builder.rs b/crates/red_knot_python_semantic/src/semantic_index/builder.rs index 5777fe2dc6036..b89270289b0fb 100644 --- a/crates/red_knot_python_semantic/src/semantic_index/builder.rs +++ b/crates/red_knot_python_semantic/src/semantic_index/builder.rs @@ -870,7 +870,7 @@ where // ran to completion or exactly one `except` branch ran to completion, and then again under // the assumption that potentially none of the branches ran to completion and we in fact // jumped from a `try`, `else` or `except` branch straight into the `finally` branch. - // This requires some rethinking of some fundamental assumptions the `use-def` map makes. + // This requires rethinking some fundamental assumptions semantic indexing makes. // For more details, see: // - https://astral-sh.notion.site/Exception-handler-control-flow-11348797e1ca80bb8ce1e9aedbbe439d // - https://github.com/astral-sh/ruff/pull/13633#discussion_r1788626702 From 4233cebb6012f2305ad730db6a0d83e529b491e4 Mon Sep 17 00:00:00 2001 From: Alex Waygood Date: Wed, 16 Oct 2024 13:59:06 +0100 Subject: [PATCH 6/6] Docs touchups per review! --- .../mdtest/exception/control_flow.md | 60 ++++++++++++------- 1 file changed, 39 insertions(+), 21 deletions(-) diff --git a/crates/red_knot_python_semantic/resources/mdtest/exception/control_flow.md b/crates/red_knot_python_semantic/resources/mdtest/exception/control_flow.md index 7ee7366e39e96..ead639e490aab 100644 --- a/crates/red_knot_python_semantic/resources/mdtest/exception/control_flow.md +++ b/crates/red_knot_python_semantic/resources/mdtest/exception/control_flow.md @@ -1,11 +1,11 @@ # Control flow for exception handlers -Tests that assert we understand the possible "definition states" (which symbols -might or might not be defined) in various branches of a +These tests assert that we understand the possible "definition states" (which +symbols might or might not be defined) in the various branches of a `try`/`except`/`else`/`finally` block. For a full writeup on the semantics of exception handlers, -see [this document](https://astral-sh.notion.site/Exception-handler-control-flow-11348797e1ca80bb8ce1e9aedbbe439d). +see [this document][1]. The tests throughout this Markdown document use functions with names starting with `could_raise_*` to mark definitions that might or might not succeed @@ -18,10 +18,23 @@ exception to be raised. ## A single bare `except` -If there are different types for a single variable `x` in the two branches, we -can't determine which branch might have been taken. The inferred type after -the `try`/`except` block is therefore the union of the type at the end of the `try` -suite (`str`) and the type at the end of the `except` suite (`Literal[2]`): +Consider the following `try`/`except` block, with a single bare `except:`. +There are different types for the variable `x` in the two branches of this +block, and we can't determine which branch might have been taken from the +perspective of code following this block. The inferred type after the block's +conclusion is therefore the union of the type at the end of the `try` suite +(`str`) and the type at the end of the `except` suite (`Literal[2]`). + +*Within* the `except` suite, we must infer a union of all possible "definition +states" we could have been in at any point during the `try` suite. This is +because control flow could have jumped to the `except` suite without any of the +`try`-suite definitions successfully completing, with only *some* of the +`try`-suite definitions successfully completing, or indeed with *all* of them +successfully completing. The type of `x` at the beginning of the `except` suite +in this example is therefore `Literal[1] | str`, taking into account that we +might have jumped to the `except` suite before the +`x = could_raise_returns_str()` redefinition, but we *also* could have jumped +to the `except` suite *after* that redefinition. ```py path=union_type_inferred.py def could_raise_returns_str() -> str: @@ -41,8 +54,9 @@ except: reveal_type(x) # revealed: str | Literal[2] ``` -If `x` has the same type at the end of both branches, the branches unify and -`x` is not inferred as having a union type following the `try`/`except` block: +If `x` has the same type at the end of both branches, however, the branches +unify and `x` is not inferred as having a union type following the +`try`/`except` block: ```py path=branches_unify_to_non_union_type.py def could_raise_returns_str() -> str: @@ -60,11 +74,13 @@ reveal_type(x) # revealed: str ## A non-bare `except` -For simple `try`/`except` blocks, `except TypeError:` has the same control flow -as `except:`. There might have been an unhandled exception, but (as described -in [the document on exception-handling semantics](https://astral-sh.notion.site/Exception-handler-control-flow-11348797e1ca80bb8ce1e9aedbbe439d)) -that would lead to termination of the scope. It's therefore irrelevant to -consider this possibility when it comes to control-flow analysis. +For simple `try`/`except` blocks, an `except TypeError:` handler has the same +control flow semantics as an `except:` handler. An `except TypeError:` handler +will not catch *all* exceptions: if this is the only handler, it opens up the +possibility that an exception might occur that would not be handled. However, +as described in [the document on exception-handling semantics][1], that would +lead to termination of the scope. It's therefore irrelevant to consider this +possibility when it comes to control-flow analysis. ```py def could_raise_returns_str() -> str: @@ -89,8 +105,8 @@ reveal_type(x) # revealed: str | Literal[2] If the scope reaches the final `reveal_type` call in this example, either the `try`-block suite of statements was executed in its entirety, or exactly one `except` suite was executed in its entirety. -The inferred type of `x` should be the union of the types at the end of the -three suites: +The inferred type of `x` at this point is the union of the types at the end of +the three suites: - At the end of `try`, `type(x) == str` - At the end of `except TypeError`, `x == 2` @@ -122,9 +138,9 @@ reveal_type(x) # revealed: str | Literal[2, 3] If we reach the `reveal_type` call at the end of this scope, either the `try` and `else` suites were both executed in their entireties, -or the `except` suite was executed in its entirety. The type of `x` will be the -union of the type at the end of the `else` suite and the type at the end of the -`except` suite: +or the `except` suite was executed in its entirety. The type of `x` at this +point is the union of the type at the end of the `else` suite and the type at +the end of the `except` suite: - At the end of `else`, `x == 3` - At the end of `except`, `x == 2` @@ -244,7 +260,7 @@ following possibilities inside `finally` suites: - Or we could have jumped from halfway through the `try` suite to an `except` suite, and the `except` suite ran to completion - Or we could have jumped from halfway through the `try` suite straight to the - `finally` suite + `finally` suite due to an unhandled exception - Or we could have jumped from halfway through the `try` suite to an `except` suite, only for an exception raised in the `except` suite to cause us to jump to the `finally` suite before the `except` suite ran to completion @@ -284,7 +300,7 @@ Now for an example without a redefinition in the `finally` suite. As before, there *should* be fewer possibilities after completion of the `finally` suite than there were during the `finally` suite itself. (In some control-flow possibilities, some exceptions were merely *suspended* -during the `finally` suite, and lead to the scope's termination following the +during the `finally` suite; these lead to the scope's termination following the conclusion of the `finally` suite.) ```py path=no_redef_in_finally.py @@ -621,3 +637,5 @@ finally: reveal_type(x) # revealed: Literal[foo] | Literal[Bar] ``` + +[1]: https://astral-sh.notion.site/Exception-handler-control-flow-11348797e1ca80bb8ce1e9aedbbe439d