From 4a03cc865566f7e791c02a78b42759e2321815f8 Mon Sep 17 00:00:00 2001 From: Danny McClanahan <1305167+cosmicexplorer@users.noreply.github.com> Date: Wed, 9 Aug 2023 00:30:12 -0400 Subject: [PATCH] rearrange explanatory comments --- src/pip/_internal/network/lazy_wheel.py | 47 ++++++++++++++++--------- 1 file changed, 31 insertions(+), 16 deletions(-) diff --git a/src/pip/_internal/network/lazy_wheel.py b/src/pip/_internal/network/lazy_wheel.py index 29cb7b9671a..9a209894f02 100644 --- a/src/pip/_internal/network/lazy_wheel.py +++ b/src/pip/_internal/network/lazy_wheel.py @@ -40,22 +40,24 @@ def dist_from_wheel_url(name: str, url: str, session: Session) -> BaseDistributi is raised. """ try: + # After context manager exit, wheel.name will point to a deleted file path. + # Add `delete_backing_file=False` to disable this for debugging. with LazyHTTPFile(url, session) as lazy_file: - with indent_log(): - logger.debug("begin prefetching for %s", name) - lazy_file.prefetch_contiguous_dist_info(name) - logger.debug("done prefetching for %s", name) + lazy_file.prefetch_contiguous_dist_info(name) - # For read-only ZIP files, ZipFile only needs methods read, - # seek, seekable and tell, not the whole IO protocol. wheel = MemoryWheel(lazy_file.name, lazy_file) - # After context manager exit, wheel.name is an invalid file by intention. return get_wheel_distribution(wheel, canonicalize_name(name)) except (BadZipFile, UnsupportedWheel): raise InvalidWheel(url, name) class ReadOnlyIOWrapper(BinaryIO): + """Implement read-side ``BinaryIO`` methods wrapping an inner ``BinaryIO``. + + For read-only ZIP files, ``ZipFile`` only needs read, seek, seekable, and tell. + ``LazyHTTPFile`` subclasses this and therefore must only implement its lazy read(). + """ + _file: BinaryIO def __init__(self, inner: BinaryIO) -> None: @@ -160,8 +162,8 @@ def __next__(self) -> bytes: # guess at how the server will interpret caching directives for range requests (we # especially don't want the server to return 200 OK with the entire file contents). # TODO: consider If-Match (etag) and handling 304 File Modified, here or in the -# unpack_url() method. -_UNCACHED_HEADERS = {**HEADERS, 'Cache-Control': 'no-cache'} +# unpack_url() method for non-lazy downloads. +_UNCACHED_HEADERS = {**HEADERS, "Cache-Control": "no-cache"} class LazyHTTPFile(ReadOnlyIOWrapper): @@ -182,9 +184,9 @@ def __init__( url: str, session: Session, initial_chunk_size: int = _DEFAULT_INITIAL_FETCH, + delete_backing_file: bool = True, ) -> None: - # Add delete=False and print the file's `.name` to debug invalid virtual zips. - super().__init__(cast(BinaryIO, NamedTemporaryFile())) + super().__init__(cast(BinaryIO, NamedTemporaryFile(delete=delete_backing_file))) self._request_count = 0 self._session = session @@ -387,16 +389,27 @@ def _download(self, start: int, end: int) -> None: self._file.write(chunk) def prefetch_contiguous_dist_info(self, name: str) -> None: - """ - Read contents of entire dist-info section of wheel. + """Read contents of entire dist-info section of wheel. - pip will read every entry in this directory when generating a dist from a wheel, - so prepopulating the file contents avoids waiting for multiple range requests. + We know pip will read every entry in this directory when generating a dist from + a wheel, so prepopulating the file contents avoids waiting for further + range requests. """ + # Clarify in debug output which requests were sent during __init__, which during + # the prefetch, and which during the dist metadata generation. + with indent_log(): + logger.debug("begin prefetching dist-info for %s", name) + self._prefetch_contiguous_dist_info(name) + logger.debug("done prefetching dist-info for %s", name) + + def _prefetch_contiguous_dist_info(self, name: str) -> None: dist_info_prefix = re.compile(r"^[^/]*\.dist-info/") start: int | None = None end: int | None = None + # This may perform further requests if __init__() did not pull in the entire + # central directory at the end of the file (although _DEFAULT_INITIAL_FETCH + # should be set large enough to avoid this). zf = ZipFile(self) for info in zf.infolist(): @@ -405,12 +418,14 @@ def prefetch_contiguous_dist_info(self, name: str) -> None: start = info.header_offset continue else: + # The last .dist-info/ entry may be before the end of the file if the + # wheel's entries are sorted lexicographically (which is unusual). if not dist_info_prefix.search(info.filename): end = info.header_offset break if start is None: raise UnsupportedWheel( - f"no {dist_info_prefix} directory found for {name} in {self.name}" + f"no {dist_info_prefix!r} directory found for {name} in {self.name}" ) # If the last entries of the zip are the .dist-info/ dir (as usual), then give # us everything until the start of the central directory.