diff --git a/news/12186.bugfix.rst b/news/12186.bugfix.rst new file mode 100644 index 00000000000..b51d84a0cb2 --- /dev/null +++ b/news/12186.bugfix.rst @@ -0,0 +1 @@ +Avoid downloading any dists in ``install --dry-run`` if PEP 658 ``.metadata`` files or lazy wheels are available. diff --git a/news/12256.feature.rst b/news/12256.feature.rst new file mode 100644 index 00000000000..0b5bb356188 --- /dev/null +++ b/news/12256.feature.rst @@ -0,0 +1 @@ +Cache computed metadata from sdists and lazy wheels in ``~/.cache/pip/link-metadata`` when ``--use-feature=metadata-cache`` is enabled. diff --git a/news/12863.trivial.rst b/news/12863.trivial.rst new file mode 100644 index 00000000000..dc36a82a0df --- /dev/null +++ b/news/12863.trivial.rst @@ -0,0 +1 @@ +Cache "concrete" dists by ``Distribution`` instead of ``InstallRequirement``. diff --git a/news/12871.trivial.rst b/news/12871.trivial.rst new file mode 100644 index 00000000000..186e2bcb3c4 --- /dev/null +++ b/news/12871.trivial.rst @@ -0,0 +1 @@ +Refactor much of ``RequirementPreparer`` to avoid duplicated code paths for metadata-only requirements. diff --git a/src/pip/_internal/cache.py b/src/pip/_internal/cache.py index 6b4512672db..534d0e866b6 100644 --- a/src/pip/_internal/cache.py +++ b/src/pip/_internal/cache.py @@ -1,12 +1,14 @@ """Cache Management """ +import abc import hashlib import json import logging import os +import re from pathlib import Path -from typing import Any, Dict, List, Optional +from typing import Dict, Iterator, List, Optional, Tuple from pip._vendor.packaging.tags import Tag, interpreter_name, interpreter_version from pip._vendor.packaging.utils import canonicalize_name @@ -15,21 +17,71 @@ from pip._internal.models.direct_url import DirectUrl from pip._internal.models.link import Link from pip._internal.models.wheel import Wheel +from pip._internal.req.req_install import InstallRequirement from pip._internal.utils.temp_dir import TempDirectory, tempdir_kinds from pip._internal.utils.urls import path_to_url +from pip._internal.vcs import vcs logger = logging.getLogger(__name__) +_egg_info_re = re.compile(r"([a-z0-9_.]+)-([a-z0-9_.!+-]+)", re.IGNORECASE) + ORIGIN_JSON_NAME = "origin.json" +def _contains_egg_info(s: str) -> bool: + """Determine whether the string looks like an egg_info. + + :param s: The string to parse. E.g. foo-2.1 + """ + return bool(_egg_info_re.search(s)) + + +def should_cache( + req: InstallRequirement, +) -> bool: + """ + Return whether a built InstallRequirement can be stored in the persistent + wheel cache, assuming the wheel cache is available, and _should_build() + has determined a wheel needs to be built. + """ + if not req.link: + return False + + if req.link.is_wheel: + return False + + if req.editable or not req.source_dir: + # never cache editable requirements + return False + + if req.link and req.link.is_vcs: + # VCS checkout. Do not cache + # unless it points to an immutable commit hash. + assert not req.editable + assert req.source_dir + vcs_backend = vcs.get_backend_for_scheme(req.link.scheme) + assert vcs_backend + if vcs_backend.is_immutable_rev_checkout(req.link.url, req.source_dir): + return True + return False + + assert req.link + base, ext = req.link.splitext() + if _contains_egg_info(base): + return True + + # Otherwise, do not cache. + return False + + def _hash_dict(d: Dict[str, str]) -> str: """Return a stable sha224 of a dictionary.""" s = json.dumps(d, sort_keys=True, separators=(",", ":"), ensure_ascii=True) return hashlib.sha224(s.encode("ascii")).hexdigest() -class Cache: +class Cache(abc.ABC): """An abstract class - provides cache directories for data from links :param cache_dir: The root of the cache. @@ -73,20 +125,28 @@ def _get_cache_path_parts(self, link: Link) -> List[str]: return parts - def _get_candidates(self, link: Link, canonical_package_name: str) -> List[Any]: - can_not_cache = not self.cache_dir or not canonical_package_name or not link - if can_not_cache: - return [] + @abc.abstractmethod + def get_path_for_link(self, link: Link) -> str: + """Return a directory to store cached items in for link.""" + ... + + def cache_path(self, link: Link) -> Path: + return Path(self.get_path_for_link(link)) - path = self.get_path_for_link(link) - if os.path.isdir(path): - return [(candidate, path) for candidate in os.listdir(path)] - return [] + +class LinkMetadataCache(Cache): + """Persistently store the metadata of dists found at each link.""" def get_path_for_link(self, link: Link) -> str: - """Return a directory to store cached items in for link.""" - raise NotImplementedError() + parts = self._get_cache_path_parts(link) + assert self.cache_dir + return os.path.join(self.cache_dir, "link-metadata", *parts) + +class WheelCacheBase(Cache): + """Specializations to the cache concept for wheels.""" + + @abc.abstractmethod def get( self, link: Link, @@ -96,10 +156,27 @@ def get( """Returns a link to a cached item if it exists, otherwise returns the passed link. """ - raise NotImplementedError() + ... + + def _can_cache(self, link: Link, canonical_package_name: str) -> bool: + return bool(self.cache_dir and canonical_package_name and link) + def _get_candidates( + self, link: Link, canonical_package_name: str + ) -> Iterator[Tuple[str, str]]: + if not self._can_cache(link, canonical_package_name): + return + + path = self.get_path_for_link(link) + if not os.path.isdir(path): + return -class SimpleWheelCache(Cache): + for candidate in os.scandir(path): + if candidate.is_file(): + yield (candidate.name, path) + + +class SimpleWheelCache(WheelCacheBase): """A cache of wheels for future installs.""" def __init__(self, cache_dir: str) -> None: @@ -131,7 +208,7 @@ def get( package_name: Optional[str], supported_tags: List[Tag], ) -> Link: - candidates = [] + candidates: List[Tuple[int, str, str]] = [] if not package_name: return link @@ -205,7 +282,7 @@ def __init__( ) -class WheelCache(Cache): +class WheelCache(WheelCacheBase): """Wraps EphemWheelCache and SimpleWheelCache into a single Cache This Cache allows for gracefully degradation, using the ephem wheel cache @@ -223,6 +300,15 @@ def get_path_for_link(self, link: Link) -> str: def get_ephem_path_for_link(self, link: Link) -> str: return self._ephem_cache.get_path_for_link(link) + def resolve_cache_dir(self, req: InstallRequirement) -> str: + """Return the persistent or temporary cache directory where the built or + downloaded wheel should be stored.""" + cache_available = bool(self.cache_dir) + assert req.link, req + if cache_available and should_cache(req): + return self.get_path_for_link(req.link) + return self.get_ephem_path_for_link(req.link) + def get( self, link: Link, diff --git a/src/pip/_internal/cli/cmdoptions.py b/src/pip/_internal/cli/cmdoptions.py index 0b7cff77bdd..cca4a92954f 100644 --- a/src/pip/_internal/cli/cmdoptions.py +++ b/src/pip/_internal/cli/cmdoptions.py @@ -1009,6 +1009,8 @@ def check_list_path_option(options: Values) -> None: default=[], choices=[ "fast-deps", + "metadata-cache", + "truststore", ] + ALWAYS_ENABLED_FEATURES, help="Enable new functionality, that may be backward incompatible.", diff --git a/src/pip/_internal/cli/req_command.py b/src/pip/_internal/cli/req_command.py index 92900f94ff4..c4355670546 100644 --- a/src/pip/_internal/cli/req_command.py +++ b/src/pip/_internal/cli/req_command.py @@ -10,7 +10,7 @@ from optparse import Values from typing import Any, List, Optional, Tuple -from pip._internal.cache import WheelCache +from pip._internal.cache import LinkMetadataCache, WheelCache from pip._internal.cli import cmdoptions from pip._internal.cli.index_command import IndexGroupCommand from pip._internal.cli.index_command import SessionCommandMixin as SessionCommandMixin @@ -127,6 +127,16 @@ def make_requirement_preparer( "fast-deps has no effect when used with the legacy resolver." ) + if options.cache_dir and "metadata-cache" in options.features_enabled: + logger.warning( + "pip is using a local cache for metadata information. " + "This experimental feature is enabled through " + "--use-feature=metadata-cache and it is not ready for " + "production." + ) + metadata_cache = LinkMetadataCache(options.cache_dir) + else: + metadata_cache = None return RequirementPreparer( build_dir=temp_build_dir_path, src_dir=options.src_dir, @@ -142,6 +152,7 @@ def make_requirement_preparer( lazy_wheel=lazy_wheel, verbosity=verbosity, legacy_resolver=legacy_resolver, + metadata_cache=metadata_cache, ) @classmethod diff --git a/src/pip/_internal/commands/download.py b/src/pip/_internal/commands/download.py index 917bbb91d83..5528c1866a0 100644 --- a/src/pip/_internal/commands/download.py +++ b/src/pip/_internal/commands/download.py @@ -130,6 +130,9 @@ def run(self, options: Values, args: List[str]) -> int: self.trace_basic_info(finder) requirement_set = resolver.resolve(reqs, check_supported_wheels=True) + preparer.finalize_linked_requirements( + requirement_set.requirements.values(), require_dist_files=True + ) downloaded: List[str] = [] for req in requirement_set.requirements.values(): @@ -138,8 +141,6 @@ def run(self, options: Values, args: List[str]) -> int: preparer.save_linked_requirement(req) downloaded.append(req.name) - preparer.prepare_linked_requirements_more(requirement_set.requirements.values()) - if downloaded: write_output("Successfully downloaded %s", " ".join(downloaded)) diff --git a/src/pip/_internal/commands/install.py b/src/pip/_internal/commands/install.py index ad45a2f2a57..90d73842ffc 100644 --- a/src/pip/_internal/commands/install.py +++ b/src/pip/_internal/commands/install.py @@ -85,7 +85,8 @@ def add_options(self) -> None: help=( "Don't actually install anything, just print what would be. " "Can be used in combination with --ignore-installed " - "to 'resolve' the requirements." + "to 'resolve' the requirements. If package metadata is available " + "or cached, --dry-run also avoids downloading the dependency at all." ), ) self.cmd_opts.add_option( @@ -379,6 +380,10 @@ def run(self, options: Values, args: List[str]) -> int: requirement_set = resolver.resolve( reqs, check_supported_wheels=not options.target_dir ) + preparer.finalize_linked_requirements( + requirement_set.requirements.values(), + require_dist_files=not options.dry_run, + ) if options.json_report_file: report = InstallationReport(requirement_set.requirements_to_install) diff --git a/src/pip/_internal/commands/wheel.py b/src/pip/_internal/commands/wheel.py index 278719f4e0c..f3a81db6602 100644 --- a/src/pip/_internal/commands/wheel.py +++ b/src/pip/_internal/commands/wheel.py @@ -145,6 +145,9 @@ def run(self, options: Values, args: List[str]) -> int: self.trace_basic_info(finder) requirement_set = resolver.resolve(reqs, check_supported_wheels=True) + preparer.finalize_linked_requirements( + requirement_set.requirements.values(), require_dist_files=True + ) reqs_to_build: List[InstallRequirement] = [] for req in requirement_set.requirements.values(): @@ -153,8 +156,6 @@ def run(self, options: Values, args: List[str]) -> int: elif should_build_for_wheel_command(req): reqs_to_build.append(req) - preparer.prepare_linked_requirements_more(requirement_set.requirements.values()) - # build wheels build_successes, build_failures = build( reqs_to_build, diff --git a/src/pip/_internal/distributions/__init__.py b/src/pip/_internal/distributions/__init__.py index 9a89a838b9a..a870b227d1e 100644 --- a/src/pip/_internal/distributions/__init__.py +++ b/src/pip/_internal/distributions/__init__.py @@ -1,4 +1,5 @@ from pip._internal.distributions.base import AbstractDistribution +from pip._internal.distributions.installed import InstalledDistribution from pip._internal.distributions.sdist import SourceDistribution from pip._internal.distributions.wheel import WheelDistribution from pip._internal.req.req_install import InstallRequirement @@ -7,7 +8,18 @@ def make_distribution_for_install_requirement( install_req: InstallRequirement, ) -> AbstractDistribution: - """Returns a Distribution for the given InstallRequirement""" + """Returns an AbstractDistribution for the given InstallRequirement. + + As AbstractDistribution only covers installable artifacts, this method may only be + invoked at the conclusion of a resolve, when the RequirementPreparer has downloaded + the file corresponding to the resolved dist. Commands which intend to consume + metadata-only resolves without downloading should not call this method or + consume AbstractDistribution objects. + """ + # Only pre-installed requirements will have a .satisfied_by dist. + if install_req.satisfied_by: + return InstalledDistribution(install_req) + # Editable requirements will always be source distributions. They use the # legacy logic until we create a modern standard for them. if install_req.editable: diff --git a/src/pip/_internal/distributions/base.py b/src/pip/_internal/distributions/base.py index 6e4d0c91a90..0a132b88f98 100644 --- a/src/pip/_internal/distributions/base.py +++ b/src/pip/_internal/distributions/base.py @@ -37,11 +37,17 @@ def build_tracker_id(self) -> Optional[str]: If None, then this dist has no work to do in the build tracker, and ``.prepare_distribution_metadata()`` will not be called.""" - raise NotImplementedError() + ... @abc.abstractmethod def get_metadata_distribution(self) -> BaseDistribution: - raise NotImplementedError() + """Generate a concrete ``BaseDistribution`` instance for this artifact. + + The implementation should also cache the result with + ``self.req.cache_concrete_dist()`` so the distribution is available to other + users of the ``InstallRequirement``. This method is not called within the build + tracker context, so it should not identify any new setup requirements.""" + ... @abc.abstractmethod def prepare_distribution_metadata( @@ -50,4 +56,11 @@ def prepare_distribution_metadata( build_isolation: bool, check_build_deps: bool, ) -> None: - raise NotImplementedError() + """Generate the information necessary to extract metadata from the artifact. + + This method will be executed within the context of ``BuildTracker#track()``, so + it needs to fully identify any setup requirements so they can be added to the + same active set of tracked builds, while ``.get_metadata_distribution()`` takes + care of generating and caching the ``BaseDistribution`` to expose to the rest of + the resolve.""" + ... diff --git a/src/pip/_internal/distributions/installed.py b/src/pip/_internal/distributions/installed.py index ab8d53be740..83e99fca9ca 100644 --- a/src/pip/_internal/distributions/installed.py +++ b/src/pip/_internal/distributions/installed.py @@ -1,9 +1,11 @@ -from typing import Optional +from typing import TYPE_CHECKING, Optional from pip._internal.distributions.base import AbstractDistribution -from pip._internal.index.package_finder import PackageFinder from pip._internal.metadata import BaseDistribution +if TYPE_CHECKING: + from pip._internal.index.package_finder import PackageFinder + class InstalledDistribution(AbstractDistribution): """Represents an installed package. @@ -17,12 +19,14 @@ def build_tracker_id(self) -> Optional[str]: return None def get_metadata_distribution(self) -> BaseDistribution: - assert self.req.satisfied_by is not None, "not actually installed" - return self.req.satisfied_by + dist = self.req.satisfied_by + assert dist is not None, "not actually installed" + self.req.cache_concrete_dist(dist) + return dist def prepare_distribution_metadata( self, - finder: PackageFinder, + finder: "PackageFinder", build_isolation: bool, check_build_deps: bool, ) -> None: diff --git a/src/pip/_internal/distributions/sdist.py b/src/pip/_internal/distributions/sdist.py index 28ea5cea16c..3e4a3e8d371 100644 --- a/src/pip/_internal/distributions/sdist.py +++ b/src/pip/_internal/distributions/sdist.py @@ -1,10 +1,10 @@ import logging -from typing import TYPE_CHECKING, Iterable, Optional, Set, Tuple +from typing import TYPE_CHECKING, Iterable, Set, Tuple from pip._internal.build_env import BuildEnvironment from pip._internal.distributions.base import AbstractDistribution from pip._internal.exceptions import InstallationError -from pip._internal.metadata import BaseDistribution +from pip._internal.metadata import BaseDistribution, get_directory_distribution from pip._internal.utils.subprocess import runner_with_spinner_message if TYPE_CHECKING: @@ -21,13 +21,19 @@ class SourceDistribution(AbstractDistribution): """ @property - def build_tracker_id(self) -> Optional[str]: + def build_tracker_id(self) -> str: """Identify this requirement uniquely by its link.""" assert self.req.link return self.req.link.url_without_fragment def get_metadata_distribution(self) -> BaseDistribution: - return self.req.get_dist() + assert ( + self.req.metadata_directory + ), "Set as part of .prepare_distribution_metadata()" + dist = get_directory_distribution(self.req.metadata_directory) + self.req.cache_concrete_dist(dist) + self.req.validate_sdist_metadata() + return dist def prepare_distribution_metadata( self, @@ -66,7 +72,11 @@ def prepare_distribution_metadata( self._raise_conflicts("the backend dependencies", conflicting) if missing: self._raise_missing_reqs(missing) - self.req.prepare_metadata() + + # NB: we must still call .cache_concrete_dist() and .validate_sdist_metadata() + # before the InstallRequirement itself has been updated with the metadata from + # this directory! + self.req.prepare_metadata_directory() def _prepare_build_backend(self, finder: "PackageFinder") -> None: # Isolate in a BuildEnvironment and install the build-time diff --git a/src/pip/_internal/distributions/wheel.py b/src/pip/_internal/distributions/wheel.py index bfadd39dcb7..e75c7910379 100644 --- a/src/pip/_internal/distributions/wheel.py +++ b/src/pip/_internal/distributions/wheel.py @@ -31,7 +31,9 @@ def get_metadata_distribution(self) -> BaseDistribution: assert self.req.local_file_path, "Set as part of preparation during download" assert self.req.name, "Wheels are never unnamed" wheel = FilesystemWheel(self.req.local_file_path) - return get_wheel_distribution(wheel, canonicalize_name(self.req.name)) + dist = get_wheel_distribution(wheel, canonicalize_name(self.req.name)) + self.req.cache_concrete_dist(dist) + return dist def prepare_distribution_metadata( self, diff --git a/src/pip/_internal/exceptions.py b/src/pip/_internal/exceptions.py index 2587740f73a..7f6c45adaf0 100644 --- a/src/pip/_internal/exceptions.py +++ b/src/pip/_internal/exceptions.py @@ -246,6 +246,25 @@ def __str__(self) -> str: return f"None {self.metadata_name} metadata found for distribution: {self.dist}" +class CacheMetadataError(PipError): + """Raised when de/serializing a requirement into the metadata cache.""" + + def __init__( + self, + req: "InstallRequirement", + reason: str, + ) -> None: + """ + :param req: The requirement we attempted to cache. + :param reason: Context about the precise error that occurred. + """ + self.req = req + self.reason = reason + + def __str__(self) -> str: + return f"{self.reason} for {self.req} from {self.req.link}" + + class UserInstallationInvalid(InstallationError): """A --user install is requested on an environment without user site.""" diff --git a/src/pip/_internal/metadata/__init__.py b/src/pip/_internal/metadata/__init__.py index aa232b6cabd..2ed1fe8d64a 100644 --- a/src/pip/_internal/metadata/__init__.py +++ b/src/pip/_internal/metadata/__init__.py @@ -6,7 +6,14 @@ from pip._internal.utils.misc import strtobool -from .base import BaseDistribution, BaseEnvironment, FilesystemWheel, MemoryWheel, Wheel +from .base import ( + BaseDistribution, + BaseEnvironment, + FilesystemWheel, + MemoryWheel, + Wheel, + serialize_metadata, +) if TYPE_CHECKING: from typing import Literal, Protocol @@ -23,6 +30,7 @@ "get_environment", "get_wheel_distribution", "select_backend", + "serialize_metadata", ] diff --git a/src/pip/_internal/metadata/base.py b/src/pip/_internal/metadata/base.py index 9eabcdb278b..9c95f9f7a31 100644 --- a/src/pip/_internal/metadata/base.py +++ b/src/pip/_internal/metadata/base.py @@ -1,6 +1,9 @@ import csv +import email.generator import email.message +import email.policy import functools +import io import json import logging import pathlib @@ -90,6 +93,18 @@ def _convert_installed_files_path( return str(pathlib.Path(*info, *entry)) +def serialize_metadata(msg: email.message.Message) -> str: + """Write a dist's metadata to a string. + + Calling ``str(dist.metadata)`` may raise an error by misinterpreting RST directives + as email headers. This method uses the more robust ``email.policy.EmailPolicy`` to + avoid those parsing errors.""" + out = io.StringIO() + g = email.generator.Generator(out, policy=email.policy.EmailPolicy()) + g.flatten(msg) + return out.getvalue() + + class RequiresEntry(NamedTuple): requirement: str extra: str @@ -97,6 +112,15 @@ class RequiresEntry(NamedTuple): class BaseDistribution(Protocol): + @property + def is_concrete(self) -> bool: + """Whether the distribution really exists somewhere on disk. + + If this is false, it has been synthesized from metadata, e.g. via + ``.from_metadata_file_contents()``, or ``.from_wheel()`` against + a ``MemoryWheel``.""" + raise NotImplementedError() + @classmethod def from_directory(cls, directory: str) -> "BaseDistribution": """Load the distribution from a metadata directory. @@ -667,6 +691,10 @@ def iter_installed_distributions( class Wheel(Protocol): location: str + @property + def is_concrete(self) -> bool: + raise NotImplementedError() + def as_zipfile(self) -> zipfile.ZipFile: raise NotImplementedError() @@ -675,6 +703,10 @@ class FilesystemWheel(Wheel): def __init__(self, location: str) -> None: self.location = location + @property + def is_concrete(self) -> bool: + return True + def as_zipfile(self) -> zipfile.ZipFile: return zipfile.ZipFile(self.location, allowZip64=True) @@ -684,5 +716,9 @@ def __init__(self, location: str, stream: IO[bytes]) -> None: self.location = location self.stream = stream + @property + def is_concrete(self) -> bool: + return False + def as_zipfile(self) -> zipfile.ZipFile: return zipfile.ZipFile(self.stream, allowZip64=True) diff --git a/src/pip/_internal/metadata/importlib/_dists.py b/src/pip/_internal/metadata/importlib/_dists.py index 36cd326232e..63b5ff1a012 100644 --- a/src/pip/_internal/metadata/importlib/_dists.py +++ b/src/pip/_internal/metadata/importlib/_dists.py @@ -102,16 +102,22 @@ def __init__( dist: importlib.metadata.Distribution, info_location: Optional[BasePath], installed_location: Optional[BasePath], + concrete: bool, ) -> None: self._dist = dist self._info_location = info_location self._installed_location = installed_location + self._concrete = concrete + + @property + def is_concrete(self) -> bool: + return self._concrete @classmethod def from_directory(cls, directory: str) -> BaseDistribution: info_location = pathlib.Path(directory) dist = importlib.metadata.Distribution.at(info_location) - return cls(dist, info_location, info_location.parent) + return cls(dist, info_location, info_location.parent, concrete=True) @classmethod def from_metadata_file_contents( @@ -128,7 +134,7 @@ def from_metadata_file_contents( metadata_path.write_bytes(metadata_contents) # Construct dist pointing to the newly created directory. dist = importlib.metadata.Distribution.at(metadata_path.parent) - return cls(dist, metadata_path.parent, None) + return cls(dist, metadata_path.parent, None, concrete=False) @classmethod def from_wheel(cls, wheel: Wheel, name: str) -> BaseDistribution: @@ -137,7 +143,14 @@ def from_wheel(cls, wheel: Wheel, name: str) -> BaseDistribution: dist = WheelDistribution.from_zipfile(zf, name, wheel.location) except zipfile.BadZipFile as e: raise InvalidWheel(wheel.location, name) from e - return cls(dist, dist.info_location, pathlib.PurePosixPath(wheel.location)) + except UnsupportedWheel as e: + raise UnsupportedWheel(f"{name} has an invalid wheel, {e}") + return cls( + dist, + dist.info_location, + pathlib.PurePosixPath(wheel.location), + concrete=wheel.is_concrete, + ) @property def location(self) -> Optional[str]: diff --git a/src/pip/_internal/metadata/importlib/_envs.py b/src/pip/_internal/metadata/importlib/_envs.py index 70cb7a6009a..b7355d4935c 100644 --- a/src/pip/_internal/metadata/importlib/_envs.py +++ b/src/pip/_internal/metadata/importlib/_envs.py @@ -80,7 +80,7 @@ def find(self, location: str) -> Iterator[BaseDistribution]: installed_location: Optional[BasePath] = None else: installed_location = info_location.parent - yield Distribution(dist, info_location, installed_location) + yield Distribution(dist, info_location, installed_location, concrete=True) def find_linked(self, location: str) -> Iterator[BaseDistribution]: """Read location in egg-link files and return distributions in there. @@ -104,7 +104,7 @@ def find_linked(self, location: str) -> Iterator[BaseDistribution]: continue target_location = str(path.joinpath(target_rel)) for dist, info_location in self._find_impl(target_location): - yield Distribution(dist, info_location, path) + yield Distribution(dist, info_location, path, concrete=True) def _find_eggs_in_dir(self, location: str) -> Iterator[BaseDistribution]: from pip._vendor.pkg_resources import find_distributions @@ -116,7 +116,7 @@ def _find_eggs_in_dir(self, location: str) -> Iterator[BaseDistribution]: if not entry.name.endswith(".egg"): continue for dist in find_distributions(entry.path): - yield legacy.Distribution(dist) + yield legacy.Distribution(dist, concrete=True) def _find_eggs_in_zip(self, location: str) -> Iterator[BaseDistribution]: from pip._vendor.pkg_resources import find_eggs_in_zip @@ -128,7 +128,7 @@ def _find_eggs_in_zip(self, location: str) -> Iterator[BaseDistribution]: except zipimport.ZipImportError: return for dist in find_eggs_in_zip(importer, location): - yield legacy.Distribution(dist) + yield legacy.Distribution(dist, concrete=True) def find_eggs(self, location: str) -> Iterator[BaseDistribution]: """Find eggs in a location. diff --git a/src/pip/_internal/metadata/pkg_resources.py b/src/pip/_internal/metadata/pkg_resources.py index 4ea84f93a6f..428b1f4a7dd 100644 --- a/src/pip/_internal/metadata/pkg_resources.py +++ b/src/pip/_internal/metadata/pkg_resources.py @@ -81,8 +81,9 @@ def run_script(self, script_name: str, namespace: str) -> None: class Distribution(BaseDistribution): - def __init__(self, dist: pkg_resources.Distribution) -> None: + def __init__(self, dist: pkg_resources.Distribution, concrete: bool) -> None: self._dist = dist + self._concrete = concrete # This is populated lazily, to avoid loading metadata for all possible # distributions eagerly. self.__extra_mapping: Optional[Mapping[NormalizedName, str]] = None @@ -96,6 +97,10 @@ def _extra_mapping(self) -> Mapping[NormalizedName, str]: return self.__extra_mapping + @property + def is_concrete(self) -> bool: + return self._concrete + @classmethod def from_directory(cls, directory: str) -> BaseDistribution: dist_dir = directory.rstrip(os.sep) @@ -114,7 +119,7 @@ def from_directory(cls, directory: str) -> BaseDistribution: dist_name = os.path.splitext(dist_dir_name)[0].split("-")[0] dist = dist_cls(base_dir, project_name=dist_name, metadata=metadata) - return cls(dist) + return cls(dist, concrete=True) @classmethod def from_metadata_file_contents( @@ -131,7 +136,7 @@ def from_metadata_file_contents( metadata=InMemoryMetadata(metadata_dict, filename), project_name=project_name, ) - return cls(dist) + return cls(dist, concrete=False) @classmethod def from_wheel(cls, wheel: Wheel, name: str) -> BaseDistribution: @@ -152,7 +157,7 @@ def from_wheel(cls, wheel: Wheel, name: str) -> BaseDistribution: metadata=InMemoryMetadata(metadata_dict, wheel.location), project_name=name, ) - return cls(dist) + return cls(dist, concrete=wheel.is_concrete) @property def location(self) -> Optional[str]: @@ -264,7 +269,7 @@ def from_paths(cls, paths: Optional[List[str]]) -> BaseEnvironment: def _iter_distributions(self) -> Iterator[BaseDistribution]: for dist in self._ws: - yield Distribution(dist) + yield Distribution(dist, concrete=True) def _search_distribution(self, name: str) -> Optional[BaseDistribution]: """Find a distribution matching the ``name`` in the environment. diff --git a/src/pip/_internal/network/download.py b/src/pip/_internal/network/download.py index 5c3bce3d2fd..9a5d6fb9c5d 100644 --- a/src/pip/_internal/network/download.py +++ b/src/pip/_internal/network/download.py @@ -114,7 +114,7 @@ def _get_http_response_filename(resp: Response, link: Link) -> str: def _http_get_download(session: PipSession, link: Link) -> Response: - target_url = link.url.split("#", 1)[0] + target_url = link.url_without_fragment resp = session.get(target_url, headers=HEADERS, stream=True) raise_for_status(resp) return resp diff --git a/src/pip/_internal/operations/check.py b/src/pip/_internal/operations/check.py index 4b6fbc4c375..4391d3e0121 100644 --- a/src/pip/_internal/operations/check.py +++ b/src/pip/_internal/operations/check.py @@ -23,7 +23,6 @@ from pip._vendor.packaging.utils import NormalizedName, canonicalize_name from pip._vendor.packaging.version import Version -from pip._internal.distributions import make_distribution_for_install_requirement from pip._internal.metadata import get_default_environment from pip._internal.metadata.base import BaseDistribution from pip._internal.req.req_install import InstallRequirement @@ -154,8 +153,8 @@ def _simulate_installation_of( # Modify it as installing requirement_set would (assuming no errors) for inst_req in to_install: - abstract_dist = make_distribution_for_install_requirement(inst_req) - dist = abstract_dist.get_metadata_distribution() + assert inst_req.is_concrete + dist = inst_req.get_dist() name = dist.canonical_name package_set[name] = PackageDetails(dist.version, list(dist.iter_dependencies())) diff --git a/src/pip/_internal/operations/prepare.py b/src/pip/_internal/operations/prepare.py index e6aa3447200..a319a475ee9 100644 --- a/src/pip/_internal/operations/prepare.py +++ b/src/pip/_internal/operations/prepare.py @@ -4,18 +4,22 @@ # The following comment should be removed at some point in the future. # mypy: strict-optional=False +import bz2 +import json import mimetypes import os import shutil from dataclasses import dataclass from pathlib import Path -from typing import Dict, Iterable, List, Optional +from typing import Dict, Iterable, List, Optional, Tuple from pip._vendor.packaging.utils import canonicalize_name +from pip._vendor.requests.exceptions import InvalidSchema +from pip._internal.cache import LinkMetadataCache, should_cache from pip._internal.distributions import make_distribution_for_install_requirement -from pip._internal.distributions.installed import InstalledDistribution from pip._internal.exceptions import ( + CacheMetadataError, DirectoryUrlHashUnsupported, HashMismatch, HashUnpinned, @@ -25,7 +29,11 @@ VcsHashUnsupported, ) from pip._internal.index.package_finder import PackageFinder -from pip._internal.metadata import BaseDistribution, get_metadata_distribution +from pip._internal.metadata import ( + BaseDistribution, + get_metadata_distribution, + serialize_metadata, +) from pip._internal.models.direct_url import ArchiveInfo from pip._internal.models.link import Link from pip._internal.models.wheel import Wheel @@ -63,16 +71,31 @@ def _get_prepared_distribution( finder: PackageFinder, build_isolation: bool, check_build_deps: bool, -) -> BaseDistribution: - """Prepare a distribution for installation.""" +) -> Tuple[bool, BaseDistribution]: + """Prepare a distribution for installation. + + This method will only be called by the preparer at the end of the resolve, and only + for commands which need installable artifacts (not just resolved metadata). If the + dist was previously metadata-only, the preparer must have downloaded the file + corresponding to the dist and set ``req.local_file_path``. + + This method will execute ``req.cache_concrete_dist()``, so that after invocation, + ``req.is_concrete`` will be True, because ``req.get_dist()`` will return a concrete + ``Distribution``. + + :returns: a 2-tuple: + - (bool): whether the metadata had to be constructed (e.g. from an sdist build), + - (BaseDistribution): the concrete distribution which is ready to be installed. + """ abstract_dist = make_distribution_for_install_requirement(req) tracker_id = abstract_dist.build_tracker_id - if tracker_id is not None: + builds_metadata = tracker_id is not None + if builds_metadata: with build_tracker.track(req, tracker_id): abstract_dist.prepare_distribution_metadata( finder, build_isolation, check_build_deps ) - return abstract_dist.get_metadata_distribution() + return (builds_metadata, abstract_dist.get_metadata_distribution()) def unpack_vcs_link(link: Link, location: str, verbosity: int) -> None: @@ -190,6 +213,8 @@ def _check_download_dir( ) -> Optional[str]: """Check download_dir for previously downloaded file with correct hash If a correct file is found return its path else None + + If a file is found at the given path, but with an invalid hash, the file is deleted. """ download_path = os.path.join(download_dir, link.filename) @@ -212,6 +237,45 @@ def _check_download_dir( return download_path +@dataclass(frozen=True) +class CacheableDist: + metadata: str + filename: Path + canonical_name: str + + @classmethod + def from_dist(cls, link: Link, dist: BaseDistribution) -> "CacheableDist": + """Extract the serializable data necessary to generate a metadata-only dist.""" + return cls( + metadata=serialize_metadata(dist.metadata), + filename=Path(link.filename), + canonical_name=dist.canonical_name, + ) + + def to_dist(self) -> BaseDistribution: + """Return a metadata-only dist from the deserialized cache entry.""" + return get_metadata_distribution( + metadata_contents=self.metadata.encode("utf-8"), + filename=str(self.filename), + canonical_name=self.canonical_name, + ) + + def to_json(self) -> Dict[str, str]: + return { + "metadata": self.metadata, + "filename": str(self.filename), + "canonical_name": self.canonical_name, + } + + @classmethod + def from_json(cls, args: Dict[str, str]) -> "CacheableDist": + return cls( + metadata=args["metadata"], + filename=Path(args["filename"]), + canonical_name=args["canonical_name"], + ) + + class RequirementPreparer: """Prepares a Requirement""" @@ -231,6 +295,7 @@ def __init__( lazy_wheel: bool, verbosity: int, legacy_resolver: bool, + metadata_cache: Optional[LinkMetadataCache] = None, ) -> None: super().__init__() @@ -273,6 +338,8 @@ def __init__( # Previous "header" printed for a link-based InstallRequirement self._previous_requirement_header = ("", "") + self._metadata_cache = metadata_cache + def _log_preparing_link(self, req: InstallRequirement) -> None: """Provide context for the requirement being prepared.""" if req.link.is_file and not req.is_wheel_from_cache: @@ -304,11 +371,7 @@ def _ensure_link_req_src_dir( self, req: InstallRequirement, parallel_builds: bool ) -> None: """Ensure source_dir of a linked InstallRequirement.""" - # Since source_dir is only set for editable requirements. - if req.link.is_wheel: - # We don't need to unpack wheels, so no need for a source - # directory. - return + assert not req.link.is_wheel assert req.source_dir is None if req.link.is_existing_dir(): # build local directories in-tree @@ -355,6 +418,47 @@ def _get_linked_req_hashes(self, req: InstallRequirement) -> Hashes: # showing the user what the hash should be. return req.hashes(trust_internet=False) or MissingHashes() + def _rewrite_link_and_hashes_for_cached_wheel( + self, req: InstallRequirement, hashes: Hashes + ) -> Optional[Hashes]: + """Check the hash of the requirement's source eagerly and rewrite its link. + + ``req.link`` is unconditionally rewritten to the cached wheel source so that the + requirement corresponds to where it was actually downloaded from instead of the + local cache entry. + + :returns: None if the source hash validated successfully. + """ + assert hashes + assert req.is_wheel_from_cache + assert req.download_info is not None + assert req.link.is_wheel + assert req.link.is_file + + # TODO: is it possible to avoid providing a "wrong" req.link in the first place + # in the resolver, instead of having to patch it up afterwards? + req.link = req.cached_wheel_source_link + + # We need to verify hashes, and we have found the requirement in the cache + # of locally built wheels. + if ( + isinstance(req.download_info.info, ArchiveInfo) + and req.download_info.info.hashes + and hashes.has_one_of(req.download_info.info.hashes) + ): + # At this point we know the requirement was built from a hashable source + # artifact, and we verified that the cache entry's hash of the original + # artifact matches one of the hashes we expect. We don't verify hashes + # against the cached wheel, because the wheel is not the original. + return None + + logger.warning( + "The hashes of the source archive found in cache entry " + "don't match, ignoring cached built wheel " + "and re-downloading source." + ) + return hashes + def _fetch_metadata_only( self, req: InstallRequirement, @@ -365,14 +469,81 @@ def _fetch_metadata_only( ) return None if self.require_hashes: + # Hash checking also means hashes are provided for all reqs, so no resolve + # is necessary and metadata-only fetching provides no speedup. logger.debug( "Metadata-only fetching is not used as hash checking is required", ) return None - # Try PEP 658 metadata first, then fall back to lazy wheel if unavailable. - return self._fetch_metadata_using_link_data_attr( - req - ) or self._fetch_metadata_using_lazy_wheel(req.link) + + return ( + self._fetch_cached_metadata(req) + or self._fetch_metadata_using_link_data_attr(req) + or self._fetch_metadata_using_lazy_wheel(req) + ) + + def _locate_metadata_cache_entry(self, link: Link) -> Optional[Path]: + """If the metadata cache is active, generate a filesystem path from the hash of + the given Link.""" + if self._metadata_cache is None: + return None + + return self._metadata_cache.cache_path(link) + + def _fetch_cached_metadata( + self, req: InstallRequirement + ) -> Optional[BaseDistribution]: + cached_path = self._locate_metadata_cache_entry(req.link) + if cached_path is None: + return None + + # Quietly continue if the cache entry does not exist. + if not os.path.isfile(cached_path): + logger.debug( + "no cached metadata for link %s at %s", + req.link, + cached_path, + ) + return None + + try: + with bz2.open(cached_path, mode="rt", encoding="utf-8") as f: + logger.debug( + "found cached metadata for link %s at %s", req.link, cached_path + ) + args = json.load(f) + cached_dist = CacheableDist.from_json(args) + return cached_dist.to_dist() + except Exception: + raise CacheMetadataError(req, "error reading cached metadata") + + def _cache_metadata( + self, + req: InstallRequirement, + metadata_dist: BaseDistribution, + ) -> None: + cached_path = self._locate_metadata_cache_entry(req.link) + if cached_path is None: + return + + # The cache file exists already, so we have nothing to do. + if os.path.isfile(cached_path): + logger.debug( + "metadata for link %s is already cached at %s", req.link, cached_path + ) + return + + # The metadata cache is split across several subdirectories, so ensure the + # containing directory for the cache file exists before writing. + os.makedirs(str(cached_path.parent), exist_ok=True) + try: + cacheable_dist = CacheableDist.from_dist(req.link, metadata_dist) + args = cacheable_dist.to_json() + logger.debug("caching metadata for link %s at %s", req.link, cached_path) + with bz2.open(cached_path, mode="wt", encoding="utf-8") as f: + json.dump(args, f) + except Exception: + raise CacheMetadataError(req, "failed to serialize metadata") def _fetch_metadata_using_link_data_attr( self, @@ -390,6 +561,9 @@ def _fetch_metadata_using_link_data_attr( metadata_link, ) # (2) Download the contents of the METADATA file, separate from the dist itself. + # NB: this request will hit the CacheControl HTTP cache, which will be very + # quick since the METADATA file is very small. Therefore, we can rely on + # HTTP caching instead of LinkMetadataCache. metadata_file = get_http_url( metadata_link, self._download, @@ -417,36 +591,48 @@ def _fetch_metadata_using_link_data_attr( def _fetch_metadata_using_lazy_wheel( self, - link: Link, + req: InstallRequirement, ) -> Optional[BaseDistribution]: """Fetch metadata using lazy wheel, if possible.""" # --use-feature=fast-deps must be provided. if not self.use_lazy_wheel: return None - if link.is_file or not link.is_wheel: + if req.link.is_file or not req.link.is_wheel: logger.debug( "Lazy wheel is not used as %r does not point to a remote wheel", - link, + req.link, ) return None - wheel = Wheel(link.filename) + wheel = Wheel(req.link.filename) name = canonicalize_name(wheel.name) logger.info( "Obtaining dependency information from %s %s", name, wheel.version, ) - url = link.url.split("#", 1)[0] + try: - return dist_from_wheel_url(name, url, self._session) + lazy_wheel_dist = dist_from_wheel_url( + name, req.link.url_without_fragment, self._session + ) except HTTPRangeRequestUnsupported: - logger.debug("%s does not support range requests", url) + logger.debug("%s does not support range requests", req.link) return None + # If we've used the lazy wheel approach, then PEP 658 metadata is not available. + # If the wheel is very large (>1GB), then retrieving it from the CacheControl + # HTTP cache may take multiple seconds, even on a fast computer, and the + # preparer will unnecessarily copy the cached response to disk before deleting + # it at the end of the run. Caching the dist metadata in LinkMetadataCache means + # later pip executions can retrieve metadata within milliseconds and avoid + # thrashing the disk. + self._cache_metadata(req, lazy_wheel_dist) + return lazy_wheel_dist + def _complete_partial_requirements( self, - partially_downloaded_reqs: Iterable[InstallRequirement], + metadata_only_reqs: List[InstallRequirement], parallel_builds: bool = False, ) -> None: """Download any requirements which were only fetched by metadata.""" @@ -458,9 +644,23 @@ def _complete_partial_requirements( # `req.local_file_path` on the appropriate requirement after passing # all the links at once into BatchDownloader. links_to_fully_download: Dict[Link, InstallRequirement] = {} - for req in partially_downloaded_reqs: + for req in metadata_only_reqs: assert req.link - links_to_fully_download[req.link] = req + + # (1) File URLs don't need to be downloaded, so skip them. + if req.link.scheme == "file": + continue + # (2) If this is e.g. a git url, we don't know how to handle that in the + # BatchDownloader, so leave it for self._prepare_linked_requirement() at + # the end of this method, which knows how to handle any URL. + can_simply_download = True + try: + # This will raise InvalidSchema if our Session can't download it. + self._session.get_adapter(req.link.url) + except InvalidSchema: + can_simply_download = False + if can_simply_download: + links_to_fully_download[req.link] = req batch_download = self._batch_download( links_to_fully_download.keys(), @@ -485,9 +685,33 @@ def _complete_partial_requirements( # This step is necessary to ensure all lazy wheels are processed # successfully by the 'download', 'wheel', and 'install' commands. - for req in partially_downloaded_reqs: + for req in metadata_only_reqs: self._prepare_linked_requirement(req, parallel_builds) + def _check_download_path(self, req: InstallRequirement) -> None: + """Check if the relevant file is already available in the download directory. + + If so, check its hash, and delete the file if the hash doesn't match.""" + if self.download_dir is None: + return + if not req.link.is_wheel: + return + + hashes = self._get_linked_req_hashes(req) + if file_path := _check_download_dir( + req.link, + self.download_dir, + hashes, + # When a locally built wheel has been found in cache, we don't warn + # about re-downloading when the already downloaded wheel hash does + # not match. This is because the hash must be checked against the + # original link, not the cached link. It that case the already + # downloaded file will be removed and re-fetched from cache (which + # implies a hash check against the cache entry's origin.json). + warn_on_hash_mismatch=not req.is_wheel_from_cache, + ): + self._downloaded[req.link.url] = file_path + def prepare_linked_requirement( self, req: InstallRequirement, parallel_builds: bool = False ) -> BaseDistribution: @@ -495,108 +719,109 @@ def prepare_linked_requirement( assert req.link self._log_preparing_link(req) with indent_log(): - # Check if the relevant file is already available - # in the download directory - file_path = None - if self.download_dir is not None and req.link.is_wheel: - hashes = self._get_linked_req_hashes(req) - file_path = _check_download_dir( - req.link, - self.download_dir, - hashes, - # When a locally built wheel has been found in cache, we don't warn - # about re-downloading when the already downloaded wheel hash does - # not match. This is because the hash must be checked against the - # original link, not the cached link. It that case the already - # downloaded file will be removed and re-fetched from cache (which - # implies a hash check against the cache entry's origin.json). - warn_on_hash_mismatch=not req.is_wheel_from_cache, - ) + # See if the file is already downloaded, and check its hash if so. + self._check_download_path(req) - if file_path is not None: - # The file is already available, so mark it as downloaded - self._downloaded[req.link.url] = file_path - else: - # The file is not available, attempt to fetch only metadata - metadata_dist = self._fetch_metadata_only(req) - if metadata_dist is not None: - req.needs_more_preparation = True - return metadata_dist + # First try to fetch only metadata. + if metadata_dist := self._fetch_metadata_only(req): + # These reqs now have the dependency information from the downloaded + # metadata, without having downloaded the actual dist at all. + req.cache_virtual_metadata_only_dist(metadata_dist) + return metadata_dist # None of the optimizations worked, fully prepare the requirement return self._prepare_linked_requirement(req, parallel_builds) - def prepare_linked_requirements_more( - self, reqs: Iterable[InstallRequirement], parallel_builds: bool = False - ) -> None: - """Prepare linked requirements more, if needed.""" - reqs = [req for req in reqs if req.needs_more_preparation] + def _extract_download_info(self, reqs: Iterable[InstallRequirement]) -> None: + """ + `pip install --report` extracts the download info from each requirement for its + JSON output, so we need to make sure every requirement has this before finishing + the resolve. But .download_info will only be populated by the point this method + is called for requirements already found in the wheel cache, so we need to + synthesize it for uncached results. Luckily, a DirectUrl can be parsed directly + from a url without any other context. However, this also means the download info + will only contain a hash if the link itself declares the hash. + """ for req in reqs: - # Determine if any of these requirements were already downloaded. - if self.download_dir is not None and req.link.is_wheel: - hashes = self._get_linked_req_hashes(req) - file_path = _check_download_dir(req.link, self.download_dir, hashes) - if file_path is not None: - self._downloaded[req.link.url] = file_path - req.needs_more_preparation = False - - # Prepare requirements we found were already downloaded for some - # reason. The other downloads will be completed separately. - partially_downloaded_reqs: List[InstallRequirement] = [] + if req.download_info is None: + self._ensure_download_info(req) + + def _force_fully_prepared( + self, reqs: Iterable[InstallRequirement], assert_has_dist_files: bool + ) -> None: + """ + The legacy resolver seems to prepare requirements differently that can leave + them half-done in certain code paths. I'm not quite sure how it's doing things, + but at least we can do this to make sure they do things right. + """ for req in reqs: - if req.needs_more_preparation: - partially_downloaded_reqs.append(req) - else: - self._prepare_linked_requirement(req, parallel_builds) + req.prepared = True + if assert_has_dist_files: + assert req.is_concrete - # TODO: separate this part out from RequirementPreparer when the v1 - # resolver can be removed! + def _ensure_dist_files( + self, reqs: Iterable[InstallRequirement], parallel_builds: bool = False + ) -> None: + """Download any metadata-only linked requirements.""" + metadata_only_reqs = [req for req in reqs if not req.is_concrete] self._complete_partial_requirements( - partially_downloaded_reqs, + metadata_only_reqs, parallel_builds=parallel_builds, ) - def _prepare_linked_requirement( - self, req: InstallRequirement, parallel_builds: bool - ) -> BaseDistribution: - assert req.link - link = req.link - - hashes = self._get_linked_req_hashes(req) + def finalize_linked_requirements( + self, + reqs: Iterable[InstallRequirement], + require_dist_files: bool, + parallel_builds: bool = False, + ) -> None: + """Prepare linked requirements more, if needed. + + Neighboring .metadata files as per PEP 658 or lazy wheels via fast-deps will be + preferred to extract metadata from any concrete requirement (one that has been + mapped to a Link) without downloading the underlying wheel or sdist. When ``pip + install --dry-run`` is called, we want to avoid ever downloading the underlying + dist, but we still need to provide all of the results that pip commands expect + from the typical resolve process. + + Those expectations vary, but one distinction lies in whether the command needs + an actual physical dist somewhere on the filesystem, or just the metadata about + it from the resolver (as in ``pip install --report``). If the command requires + actual physical filesystem locations for the resolved dists, it must call this + method with ``require_dist_files=True`` to fully download anything + that remains. + """ + if require_dist_files: + self._ensure_dist_files(reqs, parallel_builds=parallel_builds) + else: + self._extract_download_info(reqs) + self._force_fully_prepared(reqs, assert_has_dist_files=require_dist_files) - if hashes and req.is_wheel_from_cache: - assert req.download_info is not None - assert link.is_wheel - assert link.is_file - # We need to verify hashes, and we have found the requirement in the cache - # of locally built wheels. - if ( - isinstance(req.download_info.info, ArchiveInfo) - and req.download_info.info.hashes - and hashes.has_one_of(req.download_info.info.hashes) - ): - # At this point we know the requirement was built from a hashable source - # artifact, and we verified that the cache entry's hash of the original - # artifact matches one of the hashes we expect. We don't verify hashes - # against the cached wheel, because the wheel is not the original. - hashes = None - else: - logger.warning( - "The hashes of the source archive found in cache entry " - "don't match, ignoring cached built wheel " - "and re-downloading source." - ) - req.link = req.cached_wheel_source_link - link = req.link + def _ensure_local_file_path( + self, req: InstallRequirement, hashes: Optional[Hashes] + ) -> None: + """Ensure that ``req.link`` is downloaded locally, matches the expected hash, + and that ``req.local_file_path`` is set to the download location.""" + if req.link.is_existing_dir(): + return - self._ensure_link_req_src_dir(req, parallel_builds) + # NB: req.local_file_path may be set already, if it was: + # (1) sourced from a local file (such as a local .tar.gz path), + # (2) also in the wheel cache (e.g. built from an sdist). + # We will overwrite it if so, since the local file path will still point to the + # .tar.gz source instead of the wheel cache entry. - if link.is_existing_dir(): - local_file = None - elif link.url not in self._downloaded: + local_file: Optional[File] = None + # The file may have already been downloaded in batch if it was + # a metadata-only requirement, or if it was already in the download directory. + if file_path := self._downloaded.get(req.link.url, None): + if hashes: + hashes.check_against_path(file_path) + local_file = File(file_path, content_type=None) + else: try: local_file = unpack_url( - link, + req.link, req.source_dir, self._download, self.verbosity, @@ -606,51 +831,82 @@ def _prepare_linked_requirement( except NetworkConnectionError as exc: raise InstallationError( f"Could not install requirement {req} because of HTTP " - f"error {exc} for URL {link}" - ) - else: - file_path = self._downloaded[link.url] - if hashes: - hashes.check_against_path(file_path) - local_file = File(file_path, content_type=None) + f"error {exc} for URL {req.link}" + ) from exc - # If download_info is set, we got it from the wheel cache. - if req.download_info is None: - # Editables don't go through this function (see - # prepare_editable_requirement). - assert not req.editable - req.download_info = direct_url_from_link(link, req.source_dir) - # Make sure we have a hash in download_info. If we got it as part of the - # URL, it will have been verified and we can rely on it. Otherwise we - # compute it from the downloaded file. - # FIXME: https://github.com/pypa/pip/issues/11943 - if ( - isinstance(req.download_info.info, ArchiveInfo) - and not req.download_info.info.hashes - and local_file - ): - hash = hash_file(local_file.path)[0].hexdigest() - # We populate info.hash for backward compatibility. - # This will automatically populate info.hashes. - req.download_info.info.hash = f"sha256={hash}" - - # For use in later processing, - # preserve the file path on the requirement. - if local_file: + if local_file is not None: req.local_file_path = local_file.path - dist = _get_prepared_distribution( + def _prepare_and_finalize_dist(self, req: InstallRequirement) -> BaseDistribution: + (builds_metadata, dist) = _get_prepared_distribution( req, self.build_tracker, self.finder, self.build_isolation, self.check_build_deps, ) + assert dist.is_concrete + assert req.is_concrete + assert req.get_dist() is dist + + if builds_metadata and should_cache(req): + self._cache_metadata(req, dist) + return dist + def _prepare_linked_requirement( + self, req: InstallRequirement, parallel_builds: bool + ) -> BaseDistribution: + """Ensure the dist pointing to the fully-resolved requirement is downloaded and + installable.""" + assert req.link, "this requirement must have a download link to fully prepare" + + hashes: Optional[Hashes] = self._get_linked_req_hashes(req) + + if hashes and req.is_wheel_from_cache: + hashes = self._rewrite_link_and_hashes_for_cached_wheel(req, hashes) + + # req.source_dir is only set for editable requirements. We don't need to unpack + # wheels, so no need for a source directory. + if not req.link.is_wheel: + self._ensure_link_req_src_dir(req, parallel_builds) + + # Ensure the dist is downloaded, check its hash, and unpack it into the source + # directory (if applicable). + self._ensure_local_file_path(req, hashes) + + # Set req.download_info for --report output. + if req.download_info is None: + # If download_info is set, we got it from the wheel cache. + self._ensure_download_info(req) + + # Build (if necessary) and prepare the distribution for installation. + return self._prepare_and_finalize_dist(req) + + def _ensure_download_info(self, req: InstallRequirement) -> None: + """Ensure that ``req.download_info`` is set, for uses such as --report.""" + assert req.download_info is None + # Editables don't go through this function (see prepare_editable_requirement). + assert not req.editable + req.download_info = direct_url_from_link(req.link, req.source_dir) + # Make sure we have a hash in download_info. If we got it as part of the + # URL, it will have been verified and we can rely on it. Otherwise we + # compute it from the downloaded file. + # FIXME: https://github.com/pypa/pip/issues/11943 + if ( + isinstance(req.download_info.info, ArchiveInfo) + and not req.download_info.info.hashes + and req.local_file_path + ): + hash = hash_file(req.local_file_path)[0].hexdigest() + # We populate info.hash for backward compatibility. + # This will automatically populate info.hashes. + req.download_info.info.hash = f"sha256={hash}" + def save_linked_requirement(self, req: InstallRequirement) -> None: assert self.download_dir is not None assert req.link is not None + assert req.is_concrete link = req.link if link.is_vcs or (link.is_existing_dir() and req.editable): # Make a .zip of the source_dir we already created. @@ -694,18 +950,9 @@ def prepare_editable_requirement( req.update_editable() assert req.source_dir req.download_info = direct_url_for_editable(req.unpacked_source_directory) - - dist = _get_prepared_distribution( - req, - self.build_tracker, - self.finder, - self.build_isolation, - self.check_build_deps, - ) - req.check_if_exists(self.use_user_site) - return dist + return self._prepare_and_finalize_dist(req) def prepare_installed_requirement( self, @@ -729,4 +976,4 @@ def prepare_installed_requirement( "completely repeatable environment, install into an " "empty virtualenv." ) - return InstalledDistribution(req).get_metadata_distribution() + return self._prepare_and_finalize_dist(req) diff --git a/src/pip/_internal/req/constructors.py b/src/pip/_internal/req/constructors.py index d73236e05c6..6055fd577f9 100644 --- a/src/pip/_internal/req/constructors.py +++ b/src/pip/_internal/req/constructors.py @@ -552,7 +552,7 @@ def install_req_extend_extras( """ result = copy.copy(ireq) result.extras = {*ireq.extras, *extras} - result.req = ( + result._req = ( _set_requirement_extras(ireq.req, result.extras) if ireq.req is not None else None diff --git a/src/pip/_internal/req/req_install.py b/src/pip/_internal/req/req_install.py index 834bc513356..69315a9088e 100644 --- a/src/pip/_internal/req/req_install.py +++ b/src/pip/_internal/req/req_install.py @@ -7,7 +7,16 @@ import zipfile from optparse import Values from pathlib import Path -from typing import Any, Collection, Dict, Iterable, List, Optional, Sequence, Union +from typing import ( + TYPE_CHECKING, + Collection, + Dict, + Iterable, + List, + Optional, + Sequence, + Union, +) from pip._vendor.packaging.markers import Marker from pip._vendor.packaging.requirements import Requirement @@ -23,10 +32,7 @@ from pip._internal.metadata import ( BaseDistribution, get_default_environment, - get_directory_distribution, - get_wheel_distribution, ) -from pip._internal.metadata.base import FilesystemWheel from pip._internal.models.direct_url import DirectUrl from pip._internal.models.link import Link from pip._internal.operations.build.metadata import generate_metadata @@ -59,6 +65,9 @@ from pip._internal.utils.virtualenv import running_under_virtualenv from pip._internal.vcs import vcs +if TYPE_CHECKING: + import email.message + logger = logging.getLogger(__name__) @@ -88,7 +97,7 @@ def __init__( permit_editable_wheels: bool = False, ) -> None: assert req is None or isinstance(req, Requirement), req - self.req = req + self._req = req self.comes_from = comes_from self.constraint = constraint self.editable = editable @@ -150,6 +159,7 @@ def __init__( self.hash_options = hash_options if hash_options else {} self.config_settings = config_settings # Set to True after successful preparation of this requirement + # TODO: this is only used in the legacy resolver: remove this! self.prepared = False # User supplied requirement are explicitly requested for installation # by the user via CLI arguments or requirements files, as opposed to, @@ -191,12 +201,32 @@ def __init__( ) self.use_pep517 = True - # This requirement needs more preparation before it can be built - self.needs_more_preparation = False + # When a dist is computed for this requirement, cache it here so it's visible + # everywhere within pip and isn't computed more than once. This may be + # a "virtual" dist without a physical location on the filesystem, or + # a "concrete" dist which has been fully downloaded. + self._dist: Optional[BaseDistribution] = None # This requirement needs to be unpacked before it can be installed. self._archive_source: Optional[Path] = None + @property + def req(self) -> Optional[Requirement]: + """Calculate a requirement from the cached dist, if populated. + + The cached dist can be populated by either + ``self.cache_virtual_metadata_only_dist()`` or + ``self.cache_concrete_dist()`` and can also be retrieved with + ``self.get_dist()``.""" + if self._req is not None: + return self._req + if self._dist is not None: + name = self._dist.canonical_name + version = str(self._dist.version) + self._req = Requirement(f"{name}=={version}") + return self._req + return None + def __str__(self) -> str: if self.req: s = redact_auth_from_requirement(self.req) @@ -386,7 +416,7 @@ def ensure_build_location( def _set_requirement(self) -> None: """Set requirement after generating metadata.""" - assert self.req is None + assert self._req is None assert self.metadata is not None assert self.source_dir is not None @@ -396,7 +426,7 @@ def _set_requirement(self) -> None: else: op = "===" - self.req = get_requirement( + self._req = get_requirement( "".join( [ self.metadata["Name"], @@ -422,7 +452,7 @@ def warn_on_mismatching_name(self) -> None: metadata_name, self.name, ) - self.req = get_requirement(metadata_name) + self._req = get_requirement(metadata_name) def check_if_exists(self, use_user_site: bool) -> None: """Find an installed distribution that satisfies or conflicts @@ -550,11 +580,11 @@ def isolated_editable_sanity_check(self) -> None: f"Consider using a build backend that supports PEP 660." ) - def prepare_metadata(self) -> None: + def prepare_metadata_directory(self) -> None: """Ensure that project metadata is available. - Under PEP 517 and PEP 660, call the backend hook to prepare the metadata. - Under legacy processing, call setup.py egg-info. + Under PEP 517 and PEP 660, call the backend hook to prepare the metadata + directory. Under legacy processing, call setup.py egg-info. """ assert self.source_dir, f"No source dir for {self}" details = self.name or f"from {self.link}" @@ -586,6 +616,8 @@ def prepare_metadata(self) -> None: details=details, ) + def validate_sdist_metadata(self) -> None: + """Ensure that we have a dist, and ensure it corresponds to expectations.""" # Act on the newly generated metadata, based on the name and version. if not self.name: self._set_requirement() @@ -595,25 +627,51 @@ def prepare_metadata(self) -> None: self.assert_source_matches_version() @property - def metadata(self) -> Any: - if not hasattr(self, "_metadata"): - self._metadata = self.get_dist().metadata - - return self._metadata + def metadata(self) -> "email.message.Message": + return self.get_dist().metadata def get_dist(self) -> BaseDistribution: - if self.metadata_directory: - return get_directory_distribution(self.metadata_directory) - elif self.local_file_path and self.is_wheel: - assert self.req is not None - return get_wheel_distribution( - FilesystemWheel(self.local_file_path), - canonicalize_name(self.req.name), - ) - raise AssertionError( - f"InstallRequirement {self} has no metadata directory and no wheel: " - f"can't make a distribution." - ) + """Retrieve the dist resolved from this requirement. + + :raises AssertionError: if the resolver has not yet been executed. + """ + if self._dist is None: + raise AssertionError(f"{self!r} has no dist associated.") + return self._dist + + def cache_virtual_metadata_only_dist(self, dist: BaseDistribution) -> None: + """Associate a "virtual" metadata-only dist to this requirement. + + This dist cannot be installed, but it can be used to complete the resolve + process. + + :raises AssertionError: if a dist has already been associated. + :raises AssertionError: if the provided dist is "concrete", i.e. exists + somewhere on the filesystem. + """ + assert self._dist is None, self + assert not dist.is_concrete, dist + self._dist = dist + + def cache_concrete_dist(self, dist: BaseDistribution) -> None: + """Associate a "concrete" dist to this requirement. + + A concrete dist exists somewhere on the filesystem and can be installed. + + :raises AssertionError: if a concrete dist has already been associated. + :raises AssertionError: if the provided dist is not concrete. + """ + if self._dist is not None: + # If we set a dist twice for the same requirement, we must be hydrating + # a concrete dist for what was previously virtual. This will occur in the + # case of `install --dry-run` when PEP 658 metadata is available. + assert not self._dist.is_concrete + assert dist.is_concrete + self._dist = dist + + @property + def is_concrete(self) -> bool: + return self._dist is not None and self._dist.is_concrete def assert_source_matches_version(self) -> None: assert self.source_dir, f"No source dir for {self}" diff --git a/src/pip/_internal/req/req_set.py b/src/pip/_internal/req/req_set.py index ec7a6e07a25..1bec85cc409 100644 --- a/src/pip/_internal/req/req_set.py +++ b/src/pip/_internal/req/req_set.py @@ -43,7 +43,7 @@ def add_unnamed_requirement(self, install_req: InstallRequirement) -> None: self.unnamed_requirements.append(install_req) def add_named_requirement(self, install_req: InstallRequirement) -> None: - assert install_req.name + assert install_req.name, install_req project_name = canonicalize_name(install_req.name) self.requirements[project_name] = install_req diff --git a/src/pip/_internal/resolution/resolvelib/resolver.py b/src/pip/_internal/resolution/resolvelib/resolver.py index c12beef0b2a..4880a5b9802 100644 --- a/src/pip/_internal/resolution/resolvelib/resolver.py +++ b/src/pip/_internal/resolution/resolvelib/resolver.py @@ -175,11 +175,6 @@ def resolve( req_set.add_named_requirement(ireq) - reqs = req_set.all_requirements - self.factory.preparer.prepare_linked_requirements_more(reqs) - for req in reqs: - req.prepared = True - req.needs_more_preparation = False return req_set def get_installation_order( diff --git a/src/pip/_internal/wheel_builder.py b/src/pip/_internal/wheel_builder.py index 93f8e1f5b2f..efe7a54bd1e 100644 --- a/src/pip/_internal/wheel_builder.py +++ b/src/pip/_internal/wheel_builder.py @@ -3,7 +3,6 @@ import logging import os.path -import re import shutil from typing import Iterable, List, Optional, Tuple @@ -25,23 +24,12 @@ from pip._internal.utils.subprocess import call_subprocess from pip._internal.utils.temp_dir import TempDirectory from pip._internal.utils.urls import path_to_url -from pip._internal.vcs import vcs logger = logging.getLogger(__name__) -_egg_info_re = re.compile(r"([a-z0-9_.]+)-([a-z0-9_.!+-]+)", re.IGNORECASE) - BuildResult = Tuple[List[InstallRequirement], List[InstallRequirement]] -def _contains_egg_info(s: str) -> bool: - """Determine whether the string looks like an egg_info. - - :param s: The string to parse. E.g. foo-2.1 - """ - return bool(_egg_info_re.search(s)) - - def _should_build( req: InstallRequirement, need_wheel: bool, @@ -87,54 +75,6 @@ def should_build_for_install_command( return _should_build(req, need_wheel=False) -def _should_cache( - req: InstallRequirement, -) -> Optional[bool]: - """ - Return whether a built InstallRequirement can be stored in the persistent - wheel cache, assuming the wheel cache is available, and _should_build() - has determined a wheel needs to be built. - """ - if req.editable or not req.source_dir: - # never cache editable requirements - return False - - if req.link and req.link.is_vcs: - # VCS checkout. Do not cache - # unless it points to an immutable commit hash. - assert not req.editable - assert req.source_dir - vcs_backend = vcs.get_backend_for_scheme(req.link.scheme) - assert vcs_backend - if vcs_backend.is_immutable_rev_checkout(req.link.url, req.source_dir): - return True - return False - - assert req.link - base, ext = req.link.splitext() - if _contains_egg_info(base): - return True - - # Otherwise, do not cache. - return False - - -def _get_cache_dir( - req: InstallRequirement, - wheel_cache: WheelCache, -) -> str: - """Return the persistent or temporary cache directory where the built - wheel need to be stored. - """ - cache_available = bool(wheel_cache.cache_dir) - assert req.link - if cache_available and _should_cache(req): - cache_dir = wheel_cache.get_path_for_link(req.link) - else: - cache_dir = wheel_cache.get_ephem_path_for_link(req.link) - return cache_dir - - def _verify_one(req: InstallRequirement, wheel_path: str) -> None: canonical_name = canonicalize_name(req.name or "") w = Wheel(os.path.basename(wheel_path)) @@ -315,7 +255,7 @@ def build( build_successes, build_failures = [], [] for req in requirements: assert req.name - cache_dir = _get_cache_dir(req, wheel_cache) + cache_dir = wheel_cache.resolve_cache_dir(req) wheel_file = _build_one( req, cache_dir, diff --git a/tests/conftest.py b/tests/conftest.py index da4ab5b9dfb..3a168c484a3 100644 --- a/tests/conftest.py +++ b/tests/conftest.py @@ -748,6 +748,9 @@ class FakePackage: requires_dist: Tuple[str, ...] = () # This will override the Name specified in the actual dist's METADATA. metadata_name: Optional[str] = None + # Whether to delete the file this points to, which causes any attempt to fetch this + # package to fail unless it is processed as a metadata-only dist. + delete_linked_file: bool = False def metadata_filename(self) -> str: """This is specified by PEP 658.""" @@ -837,6 +840,27 @@ def fake_packages() -> Dict[str, List[FakePackage]]: ("simple==1.0",), ), ], + "complex-dist": [ + FakePackage( + "complex-dist", + "0.1", + "complex_dist-0.1-py2.py3-none-any.whl", + MetadataKind.Unhashed, + # Validate that the wheel isn't fetched if metadata is available and + # --dry-run is on, when the metadata presents no hash itself. + delete_linked_file=True, + ), + ], + "corruptwheel": [ + FakePackage( + "corruptwheel", + "1.0", + "corruptwheel-1.0-py2.py3-none-any.whl", + # Validate that the wheel isn't fetched if metadata is available and + # --dry-run is on, when the metadata *does* present a hash. + MetadataKind.Sha256, + ), + ], "has-script": [ # Ensure we check PEP 658 metadata hashing errors for wheel files. FakePackage( @@ -922,10 +946,10 @@ def html_index_for_packages( f' {package_link.filename}
' # noqa: E501 ) # (3.2) Copy over the corresponding file in `shared_data.packages`. - shutil.copy( - shared_data.packages / package_link.filename, - pkg_subdir / package_link.filename, - ) + cached_file = shared_data.packages / package_link.filename + new_file = pkg_subdir / package_link.filename + if not package_link.delete_linked_file: + shutil.copy(cached_file, new_file) # (3.3) Write a metadata file, if applicable. if package_link.metadata != MetadataKind.NoFile: with open(pkg_subdir / package_link.metadata_filename(), "wb") as f: @@ -980,7 +1004,8 @@ def html_index_with_onetime_server( """Serve files from a generated pypi index, erroring if a file is downloaded more than once. - Provide `-i http://localhost:8000` to pip invocations to point them at this server. + Provide `-i http://localhost:` to pip invocations to point them at + this server. """ class InDirectoryServer(http.server.ThreadingHTTPServer): @@ -995,7 +1020,7 @@ def finish_request(self: "Self", request: Any, client_address: Any) -> None: class Handler(OneTimeDownloadHandler): _seen_paths: ClassVar[Set[str]] = set() - with InDirectoryServer(("", 8000), Handler) as httpd: + with InDirectoryServer(("", 0), Handler) as httpd: server_thread = threading.Thread(target=httpd.serve_forever) server_thread.start() diff --git a/tests/functional/test_check.py b/tests/functional/test_check.py index f50f5593e5c..9e59a4af3d0 100644 --- a/tests/functional/test_check.py +++ b/tests/functional/test_check.py @@ -123,10 +123,7 @@ def test_check_complicated_name_missing(script: PipTestEnvironment) -> None: # Without dependency result = script.pip("install", "--no-index", package_a_path, "--no-deps") - assert ( - "Successfully installed package_A-1.0" in result.stdout - or "Successfully installed package-A-1.0" in result.stdout - ), str(result) + assert "Successfully installed package-a-1.0" in result.stdout, str(result) result = script.pip("check", expect_error=True) expected_lines = ("package-a 1.0 requires dependency-b, which is not installed.",) @@ -149,10 +146,7 @@ def test_check_complicated_name_broken(script: PipTestEnvironment) -> None: # With broken dependency result = script.pip("install", "--no-index", package_a_path, "--no-deps") - assert ( - "Successfully installed package_A-1.0" in result.stdout - or "Successfully installed package-A-1.0" in result.stdout - ), str(result) + assert "Successfully installed package-a-1.0" in result.stdout, str(result) result = script.pip( "install", @@ -185,10 +179,7 @@ def test_check_complicated_name_clean(script: PipTestEnvironment) -> None: ) result = script.pip("install", "--no-index", package_a_path, "--no-deps") - assert ( - "Successfully installed package_A-1.0" in result.stdout - or "Successfully installed package-A-1.0" in result.stdout - ), str(result) + assert "Successfully installed package-a-1.0" in result.stdout, str(result) result = script.pip( "install", @@ -216,10 +207,7 @@ def test_check_considers_conditional_reqs(script: PipTestEnvironment) -> None: ) result = script.pip("install", "--no-index", package_a_path, "--no-deps") - assert ( - "Successfully installed package_A-1.0" in result.stdout - or "Successfully installed package-A-1.0" in result.stdout - ), str(result) + assert "Successfully installed package-a-1.0" in result.stdout, str(result) result = script.pip("check", expect_error=True) expected_lines = ("package-a 1.0 requires dependency-b, which is not installed.",) diff --git a/tests/functional/test_download.py b/tests/functional/test_download.py index d469e71c360..53c815ae98b 100644 --- a/tests/functional/test_download.py +++ b/tests/functional/test_download.py @@ -1273,6 +1273,7 @@ def download_server_html_index( ) -> Callable[..., Tuple[TestPipResult, Path]]: """Execute `pip download` against a generated PyPI index.""" download_dir = tmpdir / "download_dir" + _, server_port = html_index_with_onetime_server.server_address def run_for_generated_index( args: List[str], @@ -1287,7 +1288,7 @@ def run_for_generated_index( "-d", str(download_dir), "-i", - "http://localhost:8000", + f"http://localhost:{server_port}", *args, ] result = script.pip(*pip_args, allow_error=allow_error) diff --git a/tests/functional/test_install.py b/tests/functional/test_install.py index 8c30a69b515..c4fc5685607 100644 --- a/tests/functional/test_install.py +++ b/tests/functional/test_install.py @@ -2094,7 +2094,7 @@ def test_install_conflict_results_in_warning( # Install pkgA without its dependency result1 = script.pip("install", "--no-index", pkgA_path, "--no-deps") - assert "Successfully installed pkgA-1.0" in result1.stdout, str(result1) + assert "Successfully installed pkga-1.0" in result1.stdout, str(result1) # Then install an incorrect version of the dependency result2 = script.pip( @@ -2104,7 +2104,7 @@ def test_install_conflict_results_in_warning( allow_stderr_error=True, ) assert "pkga 1.0 requires pkgb==1.0" in result2.stderr, str(result2) - assert "Successfully installed pkgB-2.0" in result2.stdout, str(result2) + assert "Successfully installed pkgb-2.0" in result2.stdout, str(result2) def test_install_conflict_warning_can_be_suppressed( @@ -2124,11 +2124,11 @@ def test_install_conflict_warning_can_be_suppressed( # Install pkgA without its dependency result1 = script.pip("install", "--no-index", pkgA_path, "--no-deps") - assert "Successfully installed pkgA-1.0" in result1.stdout, str(result1) + assert "Successfully installed pkga-1.0" in result1.stdout, str(result1) # Then install an incorrect version of the dependency; suppressing warning result2 = script.pip("install", "--no-index", pkgB_path, "--no-warn-conflicts") - assert "Successfully installed pkgB-2.0" in result2.stdout, str(result2) + assert "Successfully installed pkgb-2.0" in result2.stdout, str(result2) def test_target_install_ignores_distutils_config_install_prefix( diff --git a/tests/functional/test_install_check.py b/tests/functional/test_install_check.py index 8a8a7c93a80..d16165e03f4 100644 --- a/tests/functional/test_install_check.py +++ b/tests/functional/test_install_check.py @@ -28,7 +28,7 @@ def test_check_install_canonicalization(script: PipTestEnvironment) -> None: # Let's install pkgA without its dependency result = script.pip("install", "--no-index", pkga_path, "--no-deps") - assert "Successfully installed pkgA-1.0" in result.stdout, str(result) + assert "Successfully installed pkga-1.0" in result.stdout, str(result) # Install the first missing dependency. Only an error for the # second dependency should remain. diff --git a/tests/functional/test_install_metadata.py b/tests/functional/test_install_metadata.py new file mode 100644 index 00000000000..fd04adf4e8d --- /dev/null +++ b/tests/functional/test_install_metadata.py @@ -0,0 +1,239 @@ +import json +import re +from pathlib import Path +from typing import Any, Callable, Dict, Iterator, List, Tuple + +import pytest +from pip._vendor.packaging.requirements import Requirement + +from pip._internal.models.direct_url import DirectUrl +from pip._internal.utils.urls import path_to_url +from tests.lib import ( + PipTestEnvironment, + TestPipResult, +) + + +@pytest.fixture +def install_with_generated_html_index( + script: PipTestEnvironment, + html_index_for_packages: Path, + tmpdir: Path, +) -> Callable[..., Tuple[TestPipResult, Dict[str, Any]]]: + """Execute `pip download` against a generated PyPI index.""" + output_file = tmpdir / "output_file.json" + + def run_for_generated_index( + args: List[str], + *, + dry_run: bool = True, + allow_error: bool = False, + ) -> Tuple[TestPipResult, Dict[str, Any]]: + """ + Produce a PyPI directory structure pointing to the specified packages, then + execute `pip install --report ... -i ...` pointing to our generated index. + """ + pip_args = [ + "install", + *(("--dry-run",) if dry_run else ()), + "--ignore-installed", + "--report", + str(output_file), + "-i", + path_to_url(str(html_index_for_packages)), + *args, + ] + result = script.pip(*pip_args, allow_error=allow_error) + try: + with open(output_file, "rb") as f: + report = json.load(f) + except FileNotFoundError: + if allow_error: + report = {} + else: + raise + return (result, report) + + return run_for_generated_index + + +def iter_dists(report: Dict[str, Any]) -> Iterator[Tuple[Requirement, DirectUrl]]: + """Parse a (req,url) tuple from each installed dist in the --report json.""" + for inst in report["install"]: + metadata = inst["metadata"] + name = metadata["name"] + version = metadata["version"] + req = Requirement(f"{name}=={version}") + direct_url = DirectUrl.from_dict(inst["download_info"]) + yield (req, direct_url) + + +@pytest.mark.parametrize( + "requirement_to_install, expected_outputs", + [ + ("simple2==1.0", ["simple2==1.0", "simple==1.0"]), + ("simple==2.0", ["simple==2.0"]), + ( + "colander", + ["colander==0.9.9", "translationstring==1.1"], + ), + ( + "compilewheel", + ["compilewheel==1.0", "simple==1.0"], + ), + ], +) +def test_install_with_metadata( + install_with_generated_html_index: Callable[ + ..., Tuple[TestPipResult, Dict[str, Any]] + ], + requirement_to_install: str, + expected_outputs: List[str], +) -> None: + """Verify that if a data-dist-info-metadata attribute is present, then it is used + instead of the actual dist's METADATA.""" + _, report = install_with_generated_html_index( + [requirement_to_install], + ) + installed = sorted(str(r) for r, _ in iter_dists(report)) + assert installed == expected_outputs + + +@pytest.mark.parametrize( + "requirement_to_install, real_hash", + [ + ( + "simple==3.0", + "95e0f200b6302989bcf2cead9465cf229168295ea330ca30d1ffeab5c0fed996", + ), + ( + "has-script", + "16ba92d7f6f992f6de5ecb7d58c914675cf21f57f8e674fb29dcb4f4c9507e5b", + ), + ], +) +def test_incorrect_metadata_hash( + install_with_generated_html_index: Callable[ + ..., Tuple[TestPipResult, Dict[str, Any]] + ], + requirement_to_install: str, + real_hash: str, +) -> None: + """Verify that if a hash for data-dist-info-metadata is provided, it must match the + actual hash of the metadata file.""" + result, _ = install_with_generated_html_index( + [requirement_to_install], + allow_error=True, + ) + assert result.returncode != 0 + expected_msg = f"""\ + Expected sha256 wrong-hash + Got {real_hash}""" + assert expected_msg in result.stderr + + +@pytest.mark.parametrize( + "requirement_to_install, expected_url", + [ + ("simple2==2.0", "simple2-2.0.tar.gz.metadata"), + ("priority", "priority-1.0-py2.py3-none-any.whl.metadata"), + ], +) +def test_metadata_not_found( + install_with_generated_html_index: Callable[ + ..., Tuple[TestPipResult, Dict[str, Any]] + ], + requirement_to_install: str, + expected_url: str, +) -> None: + """Verify that if a data-dist-info-metadata attribute is provided, that pip will + fetch the .metadata file at the location specified by PEP 658, and error + if unavailable.""" + result, _ = install_with_generated_html_index( + [requirement_to_install], + allow_error=True, + ) + assert result.returncode != 0 + expected_re = re.escape(expected_url) + pattern = re.compile( + f"ERROR: 404 Client Error: FileNotFoundError for url:.*{expected_re}" + ) + assert pattern.search(result.stderr), (pattern, result.stderr) + + +def test_produces_error_for_mismatched_package_name_in_metadata( + install_with_generated_html_index: Callable[ + ..., Tuple[TestPipResult, Dict[str, Any]] + ], +) -> None: + """Verify that the package name from the metadata matches the requested package.""" + result, _ = install_with_generated_html_index( + ["simple2==3.0"], + allow_error=True, + ) + assert result.returncode != 0 + assert ( + "simple2-3.0.tar.gz has inconsistent Name: expected 'simple2', but metadata " + "has 'not-simple2'" + ) in result.stdout + + +@pytest.mark.parametrize( + "requirement", + [ + "requires-simple-extra==0.1", + "REQUIRES_SIMPLE-EXTRA==0.1", + "REQUIRES....simple-_-EXTRA==0.1", + ], +) +def test_canonicalizes_package_name_before_verifying_metadata( + install_with_generated_html_index: Callable[ + ..., Tuple[TestPipResult, Dict[str, Any]] + ], + requirement: str, +) -> None: + """Verify that the package name from the command line and the package's + METADATA are both canonicalized before comparison, while the name from the METADATA + is always used verbatim to represent the installed candidate in --report. + + Regression test for https://github.com/pypa/pip/issues/12038 + """ + _, report = install_with_generated_html_index( + [requirement], + ) + reqs = [str(r) for r, _ in iter_dists(report)] + assert reqs == ["Requires_Simple.Extra==0.1"] + + +@pytest.mark.parametrize( + "requirement,err_string", + [ + # It's important that we verify pip won't even attempt to fetch the file, so we + # construct an input that will cause it to error if it tries at all. + ( + "complex-dist==0.1", + "Could not install packages due to an OSError: [Errno 2] No such file or directory", # noqa: E501 + ), + ("corruptwheel==1.0", ".whl is invalid."), + ], +) +def test_dry_run_avoids_downloading_metadata_only_dists( + install_with_generated_html_index: Callable[ + ..., Tuple[TestPipResult, Dict[str, Any]] + ], + requirement: str, + err_string: str, +) -> None: + """Verify that the underlying dist files are not downloaded at all when + `install --dry-run` is used to resolve dists with PEP 658 metadata.""" + _, report = install_with_generated_html_index( + [requirement], + ) + assert [requirement] == [str(r) for r, _ in iter_dists(report)] + result, _ = install_with_generated_html_index( + [requirement], + dry_run=False, + allow_error=True, + ) + assert result.returncode != 0 + assert err_string in result.stderr diff --git a/tests/functional/test_install_reqs.py b/tests/functional/test_install_reqs.py index b1aed6ad3f4..81668d9e9da 100644 --- a/tests/functional/test_install_reqs.py +++ b/tests/functional/test_install_reqs.py @@ -614,7 +614,7 @@ def test_install_distribution_full_union( result = script.pip_install_local( to_install, f"{to_install}[bar]", f"{to_install}[baz]" ) - assert "Building wheel for LocalExtras" in result.stdout + assert "Building wheel for localextras" in result.stdout result.did_create(script.site_packages / "simple") result.did_create(script.site_packages / "singlemodule.py") diff --git a/tests/functional/test_wheel.py b/tests/functional/test_wheel.py index 1bddd40dc41..69825899df7 100644 --- a/tests/functional/test_wheel.py +++ b/tests/functional/test_wheel.py @@ -285,7 +285,7 @@ def test_wheel_package_with_latin1_setup( pkg_to_wheel = data.packages.joinpath("SetupPyLatin1") result = script.pip("wheel", pkg_to_wheel) - assert "Successfully built SetupPyUTF8" in result.stdout + assert "Successfully built setuppyutf8" in result.stdout def test_pip_wheel_with_pep518_build_reqs( diff --git a/tests/unit/metadata/test_metadata_pkg_resources.py b/tests/unit/metadata/test_metadata_pkg_resources.py index 6044c14e4ca..195442cbe1e 100644 --- a/tests/unit/metadata/test_metadata_pkg_resources.py +++ b/tests/unit/metadata/test_metadata_pkg_resources.py @@ -104,6 +104,7 @@ def test_wheel_metadata_works() -> None: metadata=InMemoryMetadata({"METADATA": metadata.as_bytes()}, ""), project_name=name, ), + concrete=False, ) assert name == dist.canonical_name == dist.raw_name diff --git a/tests/unit/test_cache.py b/tests/unit/test_cache.py index 30cdb6ebece..8eb51d5f889 100644 --- a/tests/unit/test_cache.py +++ b/tests/unit/test_cache.py @@ -1,13 +1,32 @@ import os from pathlib import Path +import pytest from pip._vendor.packaging.tags import Tag, interpreter_name, interpreter_version -from pip._internal.cache import WheelCache, _hash_dict +from pip._internal.cache import WheelCache, _contains_egg_info, _hash_dict from pip._internal.models.link import Link from pip._internal.utils.misc import ensure_dir +@pytest.mark.parametrize( + "s, expected", + [ + # Trivial. + ("pip-18.0", True), + # Ambiguous. + ("foo-2-2", True), + ("im-valid", True), + # Invalid. + ("invalid", False), + ("im_invalid", False), + ], +) +def test_contains_egg_info(s: str, expected: bool) -> None: + result = _contains_egg_info(s) + assert result == expected + + def test_falsey_path_none() -> None: wc = WheelCache("") assert wc.cache_dir is None diff --git a/tests/unit/test_req.py b/tests/unit/test_req.py index 8a95c058706..5757850c6b3 100644 --- a/tests/unit/test_req.py +++ b/tests/unit/test_req.py @@ -23,6 +23,7 @@ PreviousBuildDirError, ) from pip._internal.index.package_finder import PackageFinder +from pip._internal.metadata import get_metadata_distribution from pip._internal.models.direct_url import ArchiveInfo, DirectUrl, DirInfo, VcsInfo from pip._internal.models.link import Link from pip._internal.network.session import PipSession @@ -144,7 +145,11 @@ def test_no_reuse_existing_build_dir(self, data: TestData) -> None: ): resolver.resolve(reqset.all_requirements, True) - def test_environment_marker_extras(self, data: TestData) -> None: + def test_environment_marker_extras( + self, + data: TestData, + monkeypatch: pytest.MonkeyPatch, + ) -> None: """ Test that the environment marker extras are used with non-wheel installs. @@ -154,6 +159,13 @@ def test_environment_marker_extras(self, data: TestData) -> None: os.fspath(data.packages.joinpath("LocalEnvironMarker")), ) req.user_supplied = True + + def cache_concrete_dist(self, dist): # type: ignore[no-untyped-def] + self._dist = dist + + monkeypatch.setattr( + req, "cache_concrete_dist", partial(cache_concrete_dist, req) + ) reqset.add_unnamed_requirement(req) finder = make_test_finder(find_links=[data.find_links]) with self._basic_resolver(finder) as resolver: @@ -499,12 +511,23 @@ def test_download_info_local_dir(self, data: TestData) -> None: assert req.download_info.url.startswith("file://") assert isinstance(req.download_info.info, DirInfo) - def test_download_info_local_editable_dir(self, data: TestData) -> None: + def test_download_info_local_editable_dir( + self, + data: TestData, + monkeypatch: pytest.MonkeyPatch, + ) -> None: """Test that download_info is set for requirements from a local editable dir.""" finder = make_test_finder() with self._basic_resolver(finder) as resolver: ireq_url = data.packages.joinpath("FSPkg").as_uri() ireq = get_processed_req_from_line(f"-e {ireq_url}#egg=FSPkg") + + def cache_concrete_dist(self, dist): # type: ignore[no-untyped-def] + self._dist = dist + + monkeypatch.setattr( + ireq, "cache_concrete_dist", partial(cache_concrete_dist, ireq) + ) reqset = resolver.resolve([ireq], True) assert len(reqset.all_requirements) == 1 req = reqset.all_requirements[0] @@ -909,7 +932,9 @@ def test_mismatched_versions(caplog: pytest.LogCaptureFixture) -> None: metadata = email.message.Message() metadata["name"] = "simplewheel" metadata["version"] = "1.0" - req._metadata = metadata + req._dist = get_metadata_distribution( + bytes(metadata), "simplewheel-1.0.whl", "simplewheel" + ) req.assert_source_matches_version() assert caplog.records[-1].message == ( diff --git a/tests/unit/test_wheel_builder.py b/tests/unit/test_wheel_builder.py index d5f372dd5cd..eb23d74ea94 100644 --- a/tests/unit/test_wheel_builder.py +++ b/tests/unit/test_wheel_builder.py @@ -6,7 +6,8 @@ import pytest -from pip._internal import wheel_builder +from pip._internal import cache, wheel_builder +from pip._internal.cache import _contains_egg_info from pip._internal.models.link import Link from pip._internal.operations.build.wheel_legacy import format_command_result from pip._internal.req.req_install import InstallRequirement @@ -28,7 +29,7 @@ ], ) def test_contains_egg_info(s: str, expected: bool) -> None: - result = wheel_builder._contains_egg_info(s) + result = _contains_egg_info(s) assert result == expected @@ -116,7 +117,7 @@ def test_should_build_for_wheel_command(req: ReqMock, expected: bool) -> None: ], ) def test_should_cache(req: ReqMock, expected: bool) -> None: - assert wheel_builder._should_cache(cast(InstallRequirement, req)) is expected + assert cache.should_cache(cast(InstallRequirement, req)) is expected def test_should_cache_git_sha(tmpdir: Path) -> None: @@ -126,12 +127,12 @@ def test_should_cache_git_sha(tmpdir: Path) -> None: # a link referencing a sha should be cached url = "git+https://g.c/o/r@" + commit + "#egg=mypkg" req = ReqMock(link=Link(url), source_dir=repo_path) - assert wheel_builder._should_cache(cast(InstallRequirement, req)) + assert cache.should_cache(cast(InstallRequirement, req)) # a link not referencing a sha should not be cached url = "git+https://g.c/o/r@master#egg=mypkg" req = ReqMock(link=Link(url), source_dir=repo_path) - assert not wheel_builder._should_cache(cast(InstallRequirement, req)) + assert not cache.should_cache(cast(InstallRequirement, req)) def test_format_command_result__INFO(caplog: pytest.LogCaptureFixture) -> None: