From dbe85275ec859e441f26ffeedfcef55eac458b00 Mon Sep 17 00:00:00 2001 From: Frost Ming Date: Thu, 6 Apr 2023 13:38:39 +0800 Subject: [PATCH] feat: Allow to order the index urls and find links together for PackageFinder (#43) --- README.md | 2 +- src/unearth/__init__.py | 3 +- src/unearth/__main__.py | 29 ++++--- src/unearth/finder.py | 162 ++++++++++++++++++++++++---------------- src/unearth/utils.py | 35 +++++++++ tests/test_finder.py | 50 ++++++------- tests/test_utils.py | 25 +++++++ 7 files changed, 198 insertions(+), 108 deletions(-) create mode 100644 tests/test_utils.py diff --git a/README.md b/README.md index 8a0df85..7b6992e 100644 --- a/README.md +++ b/README.md @@ -43,7 +43,7 @@ Get the best matching candidate for a requirement: ```python >>> from unearth import PackageFinder ->>> finder = PackageFinder(index_urls=['https://pypi.org/simple/']) +>>> finder = PackageFinder(index_urls=["https://pypi.org/simple/"]) >>> result = finder.find_best_match("flask>=2") >>> result.best_candidate Package(name='flask', version='2.1.2', link=) diff --git a/src/unearth/__init__.py b/src/unearth/__init__.py index 1dc6aa6..673f8fa 100644 --- a/src/unearth/__init__.py +++ b/src/unearth/__init__.py @@ -7,12 +7,13 @@ """ from unearth.errors import HashMismatchError, UnpackError, URLError, VCSBackendError from unearth.evaluator import Package, TargetPython -from unearth.finder import BestMatch, PackageFinder +from unearth.finder import BestMatch, PackageFinder, Source from unearth.link import Link from unearth.vcs import vcs_support __all__ = [ "Link", + "Source", "Package", "URLError", "BestMatch", diff --git a/src/unearth/__main__.py b/src/unearth/__main__.py index e59f614..1419722 100644 --- a/src/unearth/__main__.py +++ b/src/unearth/__main__.py @@ -24,8 +24,8 @@ class CLIArgs: index_urls: list[str] find_links: list[str] trusted_hosts: list[str] - no_binary: list[str] - only_binary: list[str] + no_binary: bool + only_binary: bool prefer_binary: bool all: bool link_only: bool @@ -57,8 +57,8 @@ def cli_parser() -> argparse.ArgumentParser: parser.add_argument( "--index-url", "-i", - dest="index_urls", metavar="URL", + dest="index_urls", action="append", help="(Multiple)(PEP 503)Simple Index URLs.", ) @@ -79,17 +79,13 @@ def cli_parser() -> argparse.ArgumentParser: ) parser.add_argument( "--no-binary", - action="append", - metavar="PACKAGE", - help="(Multiple)Specify package names to exclude binary results, " - "or `:all:` to exclude all binary results.", + action="store_true", + help="Exclude binary packages from the results.", ) parser.add_argument( "--only-binary", - action="append", - metavar="PACKAGE", - help="(Multiple)Specify package names to only allow binary results, " - "or `:all:` to enforce binary results for all packages.", + action="store_true", + help="Only include binary packages in the results.", ) parser.add_argument( "--prefer-binary", @@ -128,16 +124,17 @@ def cli(argv: list[str] | None = None) -> None: parser = cli_parser() args = cast(CLIArgs, parser.parse_args(argv)) _setup_logger(args.verbose) + name = args.requirement.name finder = PackageFinder( - index_urls=args.index_urls or ["https://pypi.org/simple"], + index_urls=args.index_urls or ["https://pypi.org/simple/"], find_links=args.find_links or [], trusted_hosts=args.trusted_hosts or [], - no_binary=args.no_binary or [], - only_binary=args.only_binary or [], - prefer_binary=args.prefer_binary, + no_binary=[name] if args.no_binary else [], + only_binary=[name] if args.only_binary else [], + prefer_binary=[name] if args.prefer_binary else [], verbosity=int(args.verbose), ) - matches = finder.find_matches(args.requirement) + matches = list(finder.find_matches(args.requirement)) if not matches: print("No matches are found.", file=sys.stderr) sys.exit(1) diff --git a/src/unearth/finder.py b/src/unearth/finder.py index 763944c..c1d8687 100644 --- a/src/unearth/finder.py +++ b/src/unearth/finder.py @@ -3,10 +3,11 @@ import atexit import functools +import itertools import os import pathlib from tempfile import TemporaryDirectory -from typing import Iterable, NamedTuple +from typing import TYPE_CHECKING, Iterable, NamedTuple, Sequence from urllib.parse import urljoin import packaging.requirements @@ -25,7 +26,17 @@ from unearth.link import Link from unearth.preparer import unpack_link from unearth.session import PyPISession -from unearth.utils import split_auth_from_url +from unearth.utils import LazySequence + +if TYPE_CHECKING: + from typing import TypedDict + + class Source(TypedDict): + url: str + type: str + +else: + Source = dict class BestMatch(NamedTuple): @@ -34,9 +45,9 @@ class BestMatch(NamedTuple): #: The best matching package, or None if no match was found. best: Package | None #: The applicable packages, excluding those with unmatching versions. - applicable: list[Package] + applicable: Sequence[Package] #: All candidates found for the requirement. - candidates: list[Package] + candidates: Sequence[Package] class PackageFinder: @@ -45,16 +56,16 @@ class PackageFinder: Args: session (PyPISession|None): The session to use for the finder. If not provided, a temporary session will be created. - index_urls: (Iterable[str]): The urls of the index pages. - find_links: (Iterable[str]): The urls or paths of the find links. + index_urls (Iterable[str]): The index URLs to search for packages. + find_links (Iterable[str]): The links to search for packages. trusted_hosts: (Iterable[str]): The trusted hosts. target_python (TargetPython): The links must match the target Python ignore_compatibility (bool): Whether to ignore the compatibility check no_binary (Iterable[str]): The names of the packages to disallow wheels only_binary (Iterable[str]): The names of the packages to disallow non-wheels - prefer_binary (bool): Whether to prefer binary packages even if - newer sdist pacakges exist. + prefer_binary (Iterable[str]): The names of the packages to prefer binary + distributions even if newer sdist pacakges exist. respect_source_order (bool): If True, packages from the source coming earlier are more preferred, even if they have lower versions. verbosity (int): The verbosity level. @@ -63,6 +74,7 @@ class PackageFinder: def __init__( self, session: PyPISession | None = None, + *, index_urls: Iterable[str] = (), find_links: Iterable[str] = (), trusted_hosts: Iterable[str] = (), @@ -70,33 +82,57 @@ def __init__( ignore_compatibility: bool = False, no_binary: Iterable[str] = (), only_binary: Iterable[str] = (), - prefer_binary: bool = False, + prefer_binary: Iterable[str] = (), respect_source_order: bool = False, verbosity: int = 0, ) -> None: - self.index_urls = list(index_urls) - self.find_links = list(find_links) + self.sources: list[Source] = [] + for url in index_urls: + self.add_index_url(url) + for url in find_links: + self.add_find_links(url) self.target_python = target_python or TargetPython() self.ignore_compatibility = ignore_compatibility self.no_binary = [canonicalize_name(name) for name in no_binary] self.only_binary = [canonicalize_name(name) for name in only_binary] - self.prefer_binary = prefer_binary - if session is None: - session = PyPISession( - index_urls=self.index_urls, trusted_hosts=trusted_hosts - ) - atexit.register(session.close) - self.session = session + self.prefer_binary = [canonicalize_name(name) for name in prefer_binary] + self.trusted_hosts = trusted_hosts + self._session = session self.respect_source_order = respect_source_order self.verbosity = verbosity self._tag_priorities = { tag: i for i, tag in enumerate(self.target_python.supported_tags()) } - # Index pages are preferred over find links. - self._source_order = [ - split_auth_from_url(url)[1] for url in (self.index_urls + self.find_links) - ] + + @property + def session(self) -> PyPISession: + if self._session is None: + index_urls = [ + source["url"] for source in self.sources if source["type"] == "index" + ] + session = PyPISession( + index_urls=index_urls, trusted_hosts=self.trusted_hosts + ) + atexit.register(session.close) + self._session = session + return self._session + + def add_index_url(self, url: str) -> None: + """Add an index URL to the finder search scope. + + Args: + url (str): The index URL to add. + """ + self.sources.append({"url": url, "type": "index"}) + + def add_find_links(self, url: str) -> None: + """Add a find links URL to the finder search scope. + + Args: + url (str): The find links URL to add. + """ + self.sources.append({"url": url, "type": "find_links"}) def build_evaluator( self, @@ -174,24 +210,12 @@ def _sort_key(self, package: Package) -> tuple: (self._tag_priorities.get(tag, pri - 1) for tag in file_tags), default=pri - 1, ) - if self.prefer_binary: + if canonicalize_name(package.name) in self.prefer_binary: prefer_binary = True - comes_from = package.link.comes_from - source_index = len(self._source_order) - - if comes_from is not None and self.respect_source_order: - source_index = next( - ( - i - for i, url in enumerate(self._source_order) - if comes_from.startswith(url) - ), - source_index, - ) + return ( -int(link.is_yanked), int(prefer_binary), - -source_index, parse_version(package.version) if package.version is not None else 0, -pri, build_tag, @@ -211,26 +235,39 @@ def _find_packages( hashes (dict[str, list[str]]|None): The hashes to filter on. Returns: - Iterable[Package]: The packages with the given name + Iterable[Package]: The packages with the given name, sorted by best match. """ evaluator = self.build_evaluator(package_name, allow_yanked, hashes) - for index_url in self.index_urls: - package_link = self._build_index_page_link(index_url, package_name) - yield from self._evaluate_links( - collect_links_from_location(self.session, package_link), evaluator - ) - for find_link in self.find_links: - link = self._build_find_link(find_link) - yield from self._evaluate_links( - collect_links_from_location(self.session, link, expand=True), evaluator - ) + + def find_one_source(source: Source) -> Iterable[Package]: + if source["type"] == "index": + link = self._build_index_page_link(source["url"], package_name) + result = self._evaluate_links( + collect_links_from_location(self.session, link), evaluator + ) + else: + link = self._build_find_link(source["url"]) + result = self._evaluate_links( + collect_links_from_location(self.session, link, expand=True), + evaluator, + ) + if self.respect_source_order: + # Sort the result within the individual source. + return sorted(result, key=self._sort_key, reverse=True) + return result + + all_packages = itertools.chain.from_iterable(map(find_one_source, self.sources)) + if self.respect_source_order: + return all_packages + # Otherwise, sort the result across all sources. + return sorted(all_packages, key=self._sort_key, reverse=True) def find_all_packages( self, package_name: str, allow_yanked: bool = False, hashes: dict[str, list[str]] | None = None, - ) -> list[Package]: + ) -> Sequence[Package]: """Find all packages with the given package name, best match first. Args: @@ -239,13 +276,9 @@ def find_all_packages( hashes (dict[str, list[str]]|None): The hashes to filter on. Returns: - list[Package]: The packages list sorted by best match + Sequence[Package]: The packages list sorted by best match """ - return sorted( - self._find_packages(package_name, allow_yanked, hashes), - key=self._sort_key, - reverse=True, - ) + return LazySequence(self._find_packages(package_name, allow_yanked, hashes)) def _find_packages_from_requirement( self, @@ -266,7 +299,7 @@ def find_matches( allow_yanked: bool | None = None, allow_prereleases: bool | None = None, hashes: dict[str, list[str]] | None = None, - ) -> list[Package]: + ) -> Sequence[Package]: """Find all packages matching the given requirement, best match first. Args: @@ -279,18 +312,16 @@ def find_matches( hashes (dict[str, list[str]]|None): The hashes to filter on. Returns: - list[Package]: The packages list sorted by best match + Sequence[Package]: The packages sorted by best match """ if isinstance(requirement, str): requirement = packaging.requirements.Requirement(requirement) - return sorted( + return LazySequence( self._evaluate_packages( self._find_packages_from_requirement(requirement, allow_yanked, hashes), requirement, allow_prereleases, - ), - key=self._sort_key, - reverse=True, + ) ) def find_best_match( @@ -316,13 +347,14 @@ def find_best_match( """ if isinstance(requirement, str): requirement = packaging.requirements.Requirement(requirement) - candidates = list( - self._find_packages_from_requirement(requirement, allow_yanked, hashes) + packages = self._find_packages_from_requirement( + requirement, allow_yanked, hashes ) - applicable_candidates = list( - self._evaluate_packages(candidates, requirement, allow_prereleases) + candidates = LazySequence(packages) + applicable_candidates = LazySequence( + self._evaluate_packages(packages, requirement, allow_prereleases) ) - best_match = max(applicable_candidates, key=self._sort_key, default=None) + best_match = next(iter(applicable_candidates), None) return BestMatch(best_match, applicable_candidates, candidates) def download_and_unpack( diff --git a/src/unearth/utils.py b/src/unearth/utils.py index f706518..1a3a40f 100644 --- a/src/unearth/utils.py +++ b/src/unearth/utils.py @@ -2,10 +2,12 @@ from __future__ import annotations import functools +import itertools import os import sys import urllib.parse as parse from pathlib import Path +from typing import Iterable, Iterator, Sequence, TypeVar from urllib.request import pathname2url, url2pathname WINDOWS = sys.platform == "win32" @@ -182,3 +184,36 @@ def format_size(size: str) -> str: return f"{int_size / 1000.0:.1f} kB" else: return f"{int(int_size)} bytes" + + +T = TypeVar("T", covariant=True) + + +class LazySequence(Sequence[T]): + """A sequence that is lazily evaluated.""" + + def __init__(self, data: Iterable[T]) -> None: + self._inner = data + + def __iter__(self) -> Iterator[T]: + self._inner, this = itertools.tee(self._inner) + return this + + def __len__(self) -> int: + i = 0 + for _ in self: + i += 1 + return i + + def __bool__(self) -> bool: + for _ in self: + return True + return False + + def __getitem__(self, index: int) -> T: # type: ignore[override] + if index < 0: + raise IndexError("Negative indices are not supported") + for i, item in enumerate(self): + if i == index: + return item + raise IndexError("Index out of range") diff --git a/tests/test_finder.py b/tests/test_finder.py index 39ed182..7bea699 100644 --- a/tests/test_finder.py +++ b/tests/test_finder.py @@ -6,6 +6,8 @@ pytestmark = pytest.mark.usefixtures("pypi", "content_type") +DEFAULT_INDEX_URL = "https://pypi.org/simple/" + @pytest.mark.parametrize( "target_python,filename", @@ -34,15 +36,15 @@ ) def test_find_most_matching_wheel(session, target_python, filename): finder = PackageFinder( - session, index_urls=["https://pypi.org/simple"], target_python=target_python + session=session, index_urls=[DEFAULT_INDEX_URL], target_python=target_python ) assert finder.find_best_match("black").best.link.filename == filename def test_find_package_with_format_control(session): finder = PackageFinder( - session, - index_urls=["https://pypi.org/simple"], + session=session, + index_urls=[DEFAULT_INDEX_URL], target_python=TargetPython( (3, 9), abis=["cp39"], impl="cp", platforms=["win_amd64"] ), @@ -58,8 +60,8 @@ def test_find_package_with_format_control(session): def test_find_package_no_binary_for_all(session): finder = PackageFinder( - session, - index_urls=["https://pypi.org/simple"], + session=session, + index_urls=[DEFAULT_INDEX_URL], target_python=TargetPython( (3, 9), abis=["cp39"], impl="cp", platforms=["win_amd64"] ), @@ -71,12 +73,12 @@ def test_find_package_no_binary_for_all(session): def test_find_package_prefer_binary(session): finder = PackageFinder( - session, - index_urls=["https://pypi.org/simple"], + session=session, + index_urls=[DEFAULT_INDEX_URL], target_python=TargetPython( (3, 9), abis=["cp39"], impl="cp", platforms=["win_amd64"] ), - prefer_binary=True, + prefer_binary=["first"], ) assert ( finder.find_best_match("first").best.link.filename @@ -86,8 +88,8 @@ def test_find_package_prefer_binary(session): def test_find_package_with_hash_allowance(session): finder = PackageFinder( - session, - index_urls=["https://pypi.org/simple"], + session=session, + index_urls=[DEFAULT_INDEX_URL], target_python=TargetPython( (3, 9), abis=["cp39"], impl="cp", platforms=["win_amd64"] ), @@ -108,8 +110,8 @@ def test_find_package_with_hash_allowance(session): @pytest.mark.parametrize("ignore_compat", [True, False]) def test_find_package_ignoring_compatibility(session, ignore_compat): finder = PackageFinder( - session, - index_urls=["https://pypi.org/simple"], + session=session, + index_urls=[DEFAULT_INDEX_URL], target_python=TargetPython( (3, 9), abis=["cp39"], impl="cp", platforms=["win_amd64"] ), @@ -121,8 +123,8 @@ def test_find_package_ignoring_compatibility(session, ignore_compat): def test_find_package_with_version_specifier(session): finder = PackageFinder( - session, - index_urls=["https://pypi.org/simple"], + session=session, + index_urls=[DEFAULT_INDEX_URL], ignore_compatibility=True, ) matches = finder.find_matches("black==22.3.0") @@ -134,8 +136,8 @@ def test_find_package_with_version_specifier(session): def test_find_package_allowing_prereleases(session): finder = PackageFinder( - session, - index_urls=["https://pypi.org/simple"], + session=session, + index_urls=[DEFAULT_INDEX_URL], ignore_compatibility=True, ) matches = finder.find_matches("black<22.3.0", allow_prereleases=True) @@ -152,8 +154,8 @@ def test_find_package_allowing_prereleases(session): def test_find_requirement_with_link(session): finder = PackageFinder( - session, - index_urls=["https://pypi.org/simple"], + session=session, + index_urls=[DEFAULT_INDEX_URL], ignore_compatibility=True, ) req = "first @ https://pypi.org/files/first-2.0.2.tar.gz" @@ -166,11 +168,9 @@ def test_find_requirement_with_link(session): def test_find_requirement_preference(session, fixtures_dir): find_link = Link.from_path(fixtures_dir / "findlinks/index.html") finder = PackageFinder( - session, - index_urls=["https://pypi.org/simple"], - find_links=[find_link.normalized], - ignore_compatibility=True, + session=session, index_urls=[DEFAULT_INDEX_URL], ignore_compatibility=True ) + finder.add_find_links(find_link.normalized) best = finder.find_best_match("first").best assert best.link.filename == "first-2.0.3-py2.py3-none-any.whl" assert best.link.comes_from == find_link.normalized @@ -179,12 +179,12 @@ def test_find_requirement_preference(session, fixtures_dir): def test_find_requirement_preference_respect_source_order(session, fixtures_dir): find_link = Link.from_path(fixtures_dir / "findlinks/index.html") finder = PackageFinder( - session, - index_urls=["https://pypi.org/simple"], - find_links=[find_link.normalized], + session=session, + index_urls=[DEFAULT_INDEX_URL], ignore_compatibility=True, respect_source_order=True, ) + finder.add_find_links(find_link.normalized) best = finder.find_best_match("first").best assert best.link.filename == "first-2.0.2.tar.gz" assert best.link.comes_from == "https://pypi.org/simple/first/" diff --git a/tests/test_utils.py b/tests/test_utils.py new file mode 100644 index 0000000..39711cd --- /dev/null +++ b/tests/test_utils.py @@ -0,0 +1,25 @@ +from unittest import mock + +from unearth.utils import LazySequence + + +def test_lazy_sequence(): + func = mock.Mock() + + def gen(size): + for i in range(size): + func() + yield i + + seq = LazySequence(gen(5)) + assert bool(seq) is True + assert func.call_count == 1 + assert seq[0] == 0 + assert func.call_count == 1 + assert seq[1] == 1 + assert func.call_count == 2 + assert 3 in seq + assert func.call_count == 4 + assert len(seq) == 5 + assert list(seq) == [0, 1, 2, 3, 4] + assert func.call_count == 5