From fe21a653b3c60f7c8db8da8428507a97e43c4ce3 Mon Sep 17 00:00:00 2001 From: Damian Shaw Date: Sun, 11 Feb 2024 10:30:54 -0500 Subject: [PATCH] Narrow resolution to use direct conflicts --- .../resolution/resolvelib/provider.py | 180 ++++++++++++++++++ 1 file changed, 180 insertions(+) diff --git a/src/pip/_internal/resolution/resolvelib/provider.py b/src/pip/_internal/resolution/resolvelib/provider.py index 315fb9c8902..88bd9ecc5f1 100644 --- a/src/pip/_internal/resolution/resolvelib/provider.py +++ b/src/pip/_internal/resolution/resolvelib/provider.py @@ -5,8 +5,10 @@ Dict, Iterable, Iterator, + List, Mapping, Sequence, + Set, TypeVar, Union, ) @@ -75,6 +77,133 @@ def _get_with_identifier( return default +def _extract_names_from_causes_and_parents( + causes: Iterable["PreferenceInformation"], +) -> Set[str]: + """ + Utility function to extract names from the causes and their parent packages + + :params causes: An iterable of PreferenceInformation + + Returns a set of strings, each representing the name of a requirement or + its parent package that was in causes + """ + causes_names = set() + for cause in causes: + causes_names.add(cause.requirement.name) + if cause.parent: + causes_names.add(cause.parent.name) + + return causes_names + + +def _causes_with_conflicting_parent( + causes: Iterable["PreferenceInformation"], +) -> List["PreferenceInformation"]: + """ + Identifies causes that conflict because their parent package requirements + are not satisfied by another cause, or vice versa. + + :params causes: An iterable sequence of PreferenceInformation + + Returns a list of PreferenceInformation objects that represent the causes + where their parent conflicts + """ + # Avoid duplication by keeping track of already identified conflicting + # causes by their id + conflicting_causes_by_id: dict[int, "PreferenceInformation"] = {} + all_causes_by_id = {id(c): c for c in causes} + + # Map cause IDs and parent packages by parent name for quick lookup + causes_ids_and_parents_by_parent_name: dict[ + str, list[tuple[int, Candidate]] + ] = collections.defaultdict(list) + for cause_id, cause in all_causes_by_id.items(): + if cause.parent: + causes_ids_and_parents_by_parent_name[cause.parent.name].append( + (cause_id, cause.parent) + ) + + # Identify a cause's requirement conflicts with another cause's parent + for cause_id, cause in all_causes_by_id.items(): + if cause_id in conflicting_causes_by_id: + continue + + cause_id_and_parents = causes_ids_and_parents_by_parent_name.get( + cause.requirement.name + ) + if not cause_id_and_parents: + continue + + for other_cause_id, parent in cause_id_and_parents: + if not cause.requirement.is_satisfied_by(parent): + conflicting_causes_by_id[cause_id] = cause + conflicting_causes_by_id[other_cause_id] = all_causes_by_id[ + other_cause_id + ] + + return list(conflicting_causes_by_id.values()) + + +def _first_causes_with_no_candidates( + causes: Sequence["PreferenceInformation"], + candidates: Mapping[str, Iterator[Candidate]], +) -> List["PreferenceInformation"]: + """ + Checks for causes that have no possible candidates to satisfy their + requirements. Returns first causes found as iterating candidates can + be expensive due to downloading and building packages. + + :params causes: A sequence of PreferenceInformation + :params candidates: A mapping of package names to iterators of their candidates + + Returns a list containing the first pair of PreferenceInformation objects + that were found which had no satisfying candidates, else if all causes + had at least some satisfying candidate an empty list is returned. + """ + # Group causes by package name to reduce the comparison complexity. + causes_by_name: dict[str, list["PreferenceInformation"]] = collections.defaultdict( + list + ) + for cause in causes: + causes_by_name[cause.requirement.project_name].append(cause) + + # Check for cause pairs within the same package that have incompatible specifiers. + for cause_name, causes_list in causes_by_name.items(): + if len(causes_list) < 2: + continue + + while causes_list: + cause = causes_list.pop() + candidate = cause.requirement.get_candidate_lookup()[1] + if candidate is None: + continue + + for other_cause in causes_list: + other_candidate = other_cause.requirement.get_candidate_lookup()[1] + if other_candidate is None: + continue + + # Check if no candidate can match the combined specifier + combined_specifier = candidate.specifier & other_candidate.specifier + possible_candidates = candidates.get(cause_name) + + # If no candidates have been provided then by default the + # causes have no candidates + if possible_candidates is None: + return [cause, other_cause] + + # Use any and contains version here instead of filter so + # if a version is found that matches it will short curcuit + # iterating through possible candidates + if not any( + combined_specifier.contains(c.version) for c in possible_candidates + ): + return [cause, other_cause] + + return [] + + class PipProvider(_ProviderBase): """Pip's provider implementation for resolvelib. @@ -253,3 +382,54 @@ def is_backtrack_cause( if backtrack_cause.parent and identifier == backtrack_cause.parent.name: return True return False + + def narrow_requirement_selection( + self, + identifiers: Iterable[str], + resolutions: Mapping[str, Candidate], + candidates: Mapping[str, Iterator[Candidate]], + information: Mapping[str, Iterable["PreferenceInformation"]], + backtrack_causes: Sequence["PreferenceInformation"], + ) -> Iterable[str]: + """ + Narrows down the selection of requirements to consider for the next + resolution step. + + This method uses principles of conflict-driven clause learning (CDCL) + to focus on the closest conflicts first. + + :params identifiers: Iterable of requirement names currently under + consideration. + :params resolutions: Current mapping of resolved package identifiers + to their selected candidates. + :params candidates: Mapping of each package's possible candidates. + :params information: Mapping of requirement information for each package. + :params backtrack_causes: Sequence of requirements, if non-empty, + were the cause of the resolver backtracking + + Returns: + An iterable of requirement names that the resolver will use to + limit the next step of resolution + """ + if len(backtrack_causes) < 2: + return identifiers + + # First, try to resolve direct causes based on conflicting parent packages + direct_causes = _causes_with_conflicting_parent(backtrack_causes) + if not direct_causes: + # If no conflicting parent packages found try to find some causes + # that share the same requirement name but no common candidate, + # we take the first one of these as iterating through candidates + # is potentially expensive + direct_causes = _first_causes_with_no_candidates( + backtrack_causes, candidates + ) + if direct_causes: + backtrack_causes = direct_causes + + # Filter identifiers based on the narrowed down causes. + unsatisfied_causes_names = set( + identifiers + ) & _extract_names_from_causes_and_parents(backtrack_causes) + + return unsatisfied_causes_names if unsatisfied_causes_names else identifiers