diff --git a/news/12186.bugfix.rst b/news/12186.bugfix.rst
new file mode 100644
index 00000000000..b51d84a0cb2
--- /dev/null
+++ b/news/12186.bugfix.rst
@@ -0,0 +1 @@
+Avoid downloading any dists in ``install --dry-run`` if PEP 658 ``.metadata`` files or lazy wheels are available.
diff --git a/news/12256.feature.rst b/news/12256.feature.rst
new file mode 100644
index 00000000000..0b5bb356188
--- /dev/null
+++ b/news/12256.feature.rst
@@ -0,0 +1 @@
+Cache computed metadata from sdists and lazy wheels in ``~/.cache/pip/link-metadata`` when ``--use-feature=metadata-cache`` is enabled.
diff --git a/news/12863.trivial.rst b/news/12863.trivial.rst
new file mode 100644
index 00000000000..dc36a82a0df
--- /dev/null
+++ b/news/12863.trivial.rst
@@ -0,0 +1 @@
+Cache "concrete" dists by ``Distribution`` instead of ``InstallRequirement``.
diff --git a/news/12871.trivial.rst b/news/12871.trivial.rst
new file mode 100644
index 00000000000..186e2bcb3c4
--- /dev/null
+++ b/news/12871.trivial.rst
@@ -0,0 +1 @@
+Refactor much of ``RequirementPreparer`` to avoid duplicated code paths for metadata-only requirements.
diff --git a/src/pip/_internal/cache.py b/src/pip/_internal/cache.py
index 6b4512672db..534d0e866b6 100644
--- a/src/pip/_internal/cache.py
+++ b/src/pip/_internal/cache.py
@@ -1,12 +1,14 @@
"""Cache Management
"""
+import abc
import hashlib
import json
import logging
import os
+import re
from pathlib import Path
-from typing import Any, Dict, List, Optional
+from typing import Dict, Iterator, List, Optional, Tuple
from pip._vendor.packaging.tags import Tag, interpreter_name, interpreter_version
from pip._vendor.packaging.utils import canonicalize_name
@@ -15,21 +17,71 @@
from pip._internal.models.direct_url import DirectUrl
from pip._internal.models.link import Link
from pip._internal.models.wheel import Wheel
+from pip._internal.req.req_install import InstallRequirement
from pip._internal.utils.temp_dir import TempDirectory, tempdir_kinds
from pip._internal.utils.urls import path_to_url
+from pip._internal.vcs import vcs
logger = logging.getLogger(__name__)
+_egg_info_re = re.compile(r"([a-z0-9_.]+)-([a-z0-9_.!+-]+)", re.IGNORECASE)
+
ORIGIN_JSON_NAME = "origin.json"
+def _contains_egg_info(s: str) -> bool:
+ """Determine whether the string looks like an egg_info.
+
+ :param s: The string to parse. E.g. foo-2.1
+ """
+ return bool(_egg_info_re.search(s))
+
+
+def should_cache(
+ req: InstallRequirement,
+) -> bool:
+ """
+ Return whether a built InstallRequirement can be stored in the persistent
+ wheel cache, assuming the wheel cache is available, and _should_build()
+ has determined a wheel needs to be built.
+ """
+ if not req.link:
+ return False
+
+ if req.link.is_wheel:
+ return False
+
+ if req.editable or not req.source_dir:
+ # never cache editable requirements
+ return False
+
+ if req.link and req.link.is_vcs:
+ # VCS checkout. Do not cache
+ # unless it points to an immutable commit hash.
+ assert not req.editable
+ assert req.source_dir
+ vcs_backend = vcs.get_backend_for_scheme(req.link.scheme)
+ assert vcs_backend
+ if vcs_backend.is_immutable_rev_checkout(req.link.url, req.source_dir):
+ return True
+ return False
+
+ assert req.link
+ base, ext = req.link.splitext()
+ if _contains_egg_info(base):
+ return True
+
+ # Otherwise, do not cache.
+ return False
+
+
def _hash_dict(d: Dict[str, str]) -> str:
"""Return a stable sha224 of a dictionary."""
s = json.dumps(d, sort_keys=True, separators=(",", ":"), ensure_ascii=True)
return hashlib.sha224(s.encode("ascii")).hexdigest()
-class Cache:
+class Cache(abc.ABC):
"""An abstract class - provides cache directories for data from links
:param cache_dir: The root of the cache.
@@ -73,20 +125,28 @@ def _get_cache_path_parts(self, link: Link) -> List[str]:
return parts
- def _get_candidates(self, link: Link, canonical_package_name: str) -> List[Any]:
- can_not_cache = not self.cache_dir or not canonical_package_name or not link
- if can_not_cache:
- return []
+ @abc.abstractmethod
+ def get_path_for_link(self, link: Link) -> str:
+ """Return a directory to store cached items in for link."""
+ ...
+
+ def cache_path(self, link: Link) -> Path:
+ return Path(self.get_path_for_link(link))
- path = self.get_path_for_link(link)
- if os.path.isdir(path):
- return [(candidate, path) for candidate in os.listdir(path)]
- return []
+
+class LinkMetadataCache(Cache):
+ """Persistently store the metadata of dists found at each link."""
def get_path_for_link(self, link: Link) -> str:
- """Return a directory to store cached items in for link."""
- raise NotImplementedError()
+ parts = self._get_cache_path_parts(link)
+ assert self.cache_dir
+ return os.path.join(self.cache_dir, "link-metadata", *parts)
+
+class WheelCacheBase(Cache):
+ """Specializations to the cache concept for wheels."""
+
+ @abc.abstractmethod
def get(
self,
link: Link,
@@ -96,10 +156,27 @@ def get(
"""Returns a link to a cached item if it exists, otherwise returns the
passed link.
"""
- raise NotImplementedError()
+ ...
+
+ def _can_cache(self, link: Link, canonical_package_name: str) -> bool:
+ return bool(self.cache_dir and canonical_package_name and link)
+ def _get_candidates(
+ self, link: Link, canonical_package_name: str
+ ) -> Iterator[Tuple[str, str]]:
+ if not self._can_cache(link, canonical_package_name):
+ return
+
+ path = self.get_path_for_link(link)
+ if not os.path.isdir(path):
+ return
-class SimpleWheelCache(Cache):
+ for candidate in os.scandir(path):
+ if candidate.is_file():
+ yield (candidate.name, path)
+
+
+class SimpleWheelCache(WheelCacheBase):
"""A cache of wheels for future installs."""
def __init__(self, cache_dir: str) -> None:
@@ -131,7 +208,7 @@ def get(
package_name: Optional[str],
supported_tags: List[Tag],
) -> Link:
- candidates = []
+ candidates: List[Tuple[int, str, str]] = []
if not package_name:
return link
@@ -205,7 +282,7 @@ def __init__(
)
-class WheelCache(Cache):
+class WheelCache(WheelCacheBase):
"""Wraps EphemWheelCache and SimpleWheelCache into a single Cache
This Cache allows for gracefully degradation, using the ephem wheel cache
@@ -223,6 +300,15 @@ def get_path_for_link(self, link: Link) -> str:
def get_ephem_path_for_link(self, link: Link) -> str:
return self._ephem_cache.get_path_for_link(link)
+ def resolve_cache_dir(self, req: InstallRequirement) -> str:
+ """Return the persistent or temporary cache directory where the built or
+ downloaded wheel should be stored."""
+ cache_available = bool(self.cache_dir)
+ assert req.link, req
+ if cache_available and should_cache(req):
+ return self.get_path_for_link(req.link)
+ return self.get_ephem_path_for_link(req.link)
+
def get(
self,
link: Link,
diff --git a/src/pip/_internal/cli/cmdoptions.py b/src/pip/_internal/cli/cmdoptions.py
index 0b7cff77bdd..cca4a92954f 100644
--- a/src/pip/_internal/cli/cmdoptions.py
+++ b/src/pip/_internal/cli/cmdoptions.py
@@ -1009,6 +1009,8 @@ def check_list_path_option(options: Values) -> None:
default=[],
choices=[
"fast-deps",
+ "metadata-cache",
+ "truststore",
]
+ ALWAYS_ENABLED_FEATURES,
help="Enable new functionality, that may be backward incompatible.",
diff --git a/src/pip/_internal/cli/req_command.py b/src/pip/_internal/cli/req_command.py
index 92900f94ff4..c4355670546 100644
--- a/src/pip/_internal/cli/req_command.py
+++ b/src/pip/_internal/cli/req_command.py
@@ -10,7 +10,7 @@
from optparse import Values
from typing import Any, List, Optional, Tuple
-from pip._internal.cache import WheelCache
+from pip._internal.cache import LinkMetadataCache, WheelCache
from pip._internal.cli import cmdoptions
from pip._internal.cli.index_command import IndexGroupCommand
from pip._internal.cli.index_command import SessionCommandMixin as SessionCommandMixin
@@ -127,6 +127,16 @@ def make_requirement_preparer(
"fast-deps has no effect when used with the legacy resolver."
)
+ if options.cache_dir and "metadata-cache" in options.features_enabled:
+ logger.warning(
+ "pip is using a local cache for metadata information. "
+ "This experimental feature is enabled through "
+ "--use-feature=metadata-cache and it is not ready for "
+ "production."
+ )
+ metadata_cache = LinkMetadataCache(options.cache_dir)
+ else:
+ metadata_cache = None
return RequirementPreparer(
build_dir=temp_build_dir_path,
src_dir=options.src_dir,
@@ -142,6 +152,7 @@ def make_requirement_preparer(
lazy_wheel=lazy_wheel,
verbosity=verbosity,
legacy_resolver=legacy_resolver,
+ metadata_cache=metadata_cache,
)
@classmethod
diff --git a/src/pip/_internal/commands/download.py b/src/pip/_internal/commands/download.py
index 917bbb91d83..5528c1866a0 100644
--- a/src/pip/_internal/commands/download.py
+++ b/src/pip/_internal/commands/download.py
@@ -130,6 +130,9 @@ def run(self, options: Values, args: List[str]) -> int:
self.trace_basic_info(finder)
requirement_set = resolver.resolve(reqs, check_supported_wheels=True)
+ preparer.finalize_linked_requirements(
+ requirement_set.requirements.values(), require_dist_files=True
+ )
downloaded: List[str] = []
for req in requirement_set.requirements.values():
@@ -138,8 +141,6 @@ def run(self, options: Values, args: List[str]) -> int:
preparer.save_linked_requirement(req)
downloaded.append(req.name)
- preparer.prepare_linked_requirements_more(requirement_set.requirements.values())
-
if downloaded:
write_output("Successfully downloaded %s", " ".join(downloaded))
diff --git a/src/pip/_internal/commands/install.py b/src/pip/_internal/commands/install.py
index ad45a2f2a57..90d73842ffc 100644
--- a/src/pip/_internal/commands/install.py
+++ b/src/pip/_internal/commands/install.py
@@ -85,7 +85,8 @@ def add_options(self) -> None:
help=(
"Don't actually install anything, just print what would be. "
"Can be used in combination with --ignore-installed "
- "to 'resolve' the requirements."
+ "to 'resolve' the requirements. If package metadata is available "
+ "or cached, --dry-run also avoids downloading the dependency at all."
),
)
self.cmd_opts.add_option(
@@ -379,6 +380,10 @@ def run(self, options: Values, args: List[str]) -> int:
requirement_set = resolver.resolve(
reqs, check_supported_wheels=not options.target_dir
)
+ preparer.finalize_linked_requirements(
+ requirement_set.requirements.values(),
+ require_dist_files=not options.dry_run,
+ )
if options.json_report_file:
report = InstallationReport(requirement_set.requirements_to_install)
diff --git a/src/pip/_internal/commands/wheel.py b/src/pip/_internal/commands/wheel.py
index 278719f4e0c..f3a81db6602 100644
--- a/src/pip/_internal/commands/wheel.py
+++ b/src/pip/_internal/commands/wheel.py
@@ -145,6 +145,9 @@ def run(self, options: Values, args: List[str]) -> int:
self.trace_basic_info(finder)
requirement_set = resolver.resolve(reqs, check_supported_wheels=True)
+ preparer.finalize_linked_requirements(
+ requirement_set.requirements.values(), require_dist_files=True
+ )
reqs_to_build: List[InstallRequirement] = []
for req in requirement_set.requirements.values():
@@ -153,8 +156,6 @@ def run(self, options: Values, args: List[str]) -> int:
elif should_build_for_wheel_command(req):
reqs_to_build.append(req)
- preparer.prepare_linked_requirements_more(requirement_set.requirements.values())
-
# build wheels
build_successes, build_failures = build(
reqs_to_build,
diff --git a/src/pip/_internal/distributions/__init__.py b/src/pip/_internal/distributions/__init__.py
index 9a89a838b9a..a870b227d1e 100644
--- a/src/pip/_internal/distributions/__init__.py
+++ b/src/pip/_internal/distributions/__init__.py
@@ -1,4 +1,5 @@
from pip._internal.distributions.base import AbstractDistribution
+from pip._internal.distributions.installed import InstalledDistribution
from pip._internal.distributions.sdist import SourceDistribution
from pip._internal.distributions.wheel import WheelDistribution
from pip._internal.req.req_install import InstallRequirement
@@ -7,7 +8,18 @@
def make_distribution_for_install_requirement(
install_req: InstallRequirement,
) -> AbstractDistribution:
- """Returns a Distribution for the given InstallRequirement"""
+ """Returns an AbstractDistribution for the given InstallRequirement.
+
+ As AbstractDistribution only covers installable artifacts, this method may only be
+ invoked at the conclusion of a resolve, when the RequirementPreparer has downloaded
+ the file corresponding to the resolved dist. Commands which intend to consume
+ metadata-only resolves without downloading should not call this method or
+ consume AbstractDistribution objects.
+ """
+ # Only pre-installed requirements will have a .satisfied_by dist.
+ if install_req.satisfied_by:
+ return InstalledDistribution(install_req)
+
# Editable requirements will always be source distributions. They use the
# legacy logic until we create a modern standard for them.
if install_req.editable:
diff --git a/src/pip/_internal/distributions/base.py b/src/pip/_internal/distributions/base.py
index 6e4d0c91a90..0a132b88f98 100644
--- a/src/pip/_internal/distributions/base.py
+++ b/src/pip/_internal/distributions/base.py
@@ -37,11 +37,17 @@ def build_tracker_id(self) -> Optional[str]:
If None, then this dist has no work to do in the build tracker, and
``.prepare_distribution_metadata()`` will not be called."""
- raise NotImplementedError()
+ ...
@abc.abstractmethod
def get_metadata_distribution(self) -> BaseDistribution:
- raise NotImplementedError()
+ """Generate a concrete ``BaseDistribution`` instance for this artifact.
+
+ The implementation should also cache the result with
+ ``self.req.cache_concrete_dist()`` so the distribution is available to other
+ users of the ``InstallRequirement``. This method is not called within the build
+ tracker context, so it should not identify any new setup requirements."""
+ ...
@abc.abstractmethod
def prepare_distribution_metadata(
@@ -50,4 +56,11 @@ def prepare_distribution_metadata(
build_isolation: bool,
check_build_deps: bool,
) -> None:
- raise NotImplementedError()
+ """Generate the information necessary to extract metadata from the artifact.
+
+ This method will be executed within the context of ``BuildTracker#track()``, so
+ it needs to fully identify any setup requirements so they can be added to the
+ same active set of tracked builds, while ``.get_metadata_distribution()`` takes
+ care of generating and caching the ``BaseDistribution`` to expose to the rest of
+ the resolve."""
+ ...
diff --git a/src/pip/_internal/distributions/installed.py b/src/pip/_internal/distributions/installed.py
index ab8d53be740..83e99fca9ca 100644
--- a/src/pip/_internal/distributions/installed.py
+++ b/src/pip/_internal/distributions/installed.py
@@ -1,9 +1,11 @@
-from typing import Optional
+from typing import TYPE_CHECKING, Optional
from pip._internal.distributions.base import AbstractDistribution
-from pip._internal.index.package_finder import PackageFinder
from pip._internal.metadata import BaseDistribution
+if TYPE_CHECKING:
+ from pip._internal.index.package_finder import PackageFinder
+
class InstalledDistribution(AbstractDistribution):
"""Represents an installed package.
@@ -17,12 +19,14 @@ def build_tracker_id(self) -> Optional[str]:
return None
def get_metadata_distribution(self) -> BaseDistribution:
- assert self.req.satisfied_by is not None, "not actually installed"
- return self.req.satisfied_by
+ dist = self.req.satisfied_by
+ assert dist is not None, "not actually installed"
+ self.req.cache_concrete_dist(dist)
+ return dist
def prepare_distribution_metadata(
self,
- finder: PackageFinder,
+ finder: "PackageFinder",
build_isolation: bool,
check_build_deps: bool,
) -> None:
diff --git a/src/pip/_internal/distributions/sdist.py b/src/pip/_internal/distributions/sdist.py
index 28ea5cea16c..3e4a3e8d371 100644
--- a/src/pip/_internal/distributions/sdist.py
+++ b/src/pip/_internal/distributions/sdist.py
@@ -1,10 +1,10 @@
import logging
-from typing import TYPE_CHECKING, Iterable, Optional, Set, Tuple
+from typing import TYPE_CHECKING, Iterable, Set, Tuple
from pip._internal.build_env import BuildEnvironment
from pip._internal.distributions.base import AbstractDistribution
from pip._internal.exceptions import InstallationError
-from pip._internal.metadata import BaseDistribution
+from pip._internal.metadata import BaseDistribution, get_directory_distribution
from pip._internal.utils.subprocess import runner_with_spinner_message
if TYPE_CHECKING:
@@ -21,13 +21,19 @@ class SourceDistribution(AbstractDistribution):
"""
@property
- def build_tracker_id(self) -> Optional[str]:
+ def build_tracker_id(self) -> str:
"""Identify this requirement uniquely by its link."""
assert self.req.link
return self.req.link.url_without_fragment
def get_metadata_distribution(self) -> BaseDistribution:
- return self.req.get_dist()
+ assert (
+ self.req.metadata_directory
+ ), "Set as part of .prepare_distribution_metadata()"
+ dist = get_directory_distribution(self.req.metadata_directory)
+ self.req.cache_concrete_dist(dist)
+ self.req.validate_sdist_metadata()
+ return dist
def prepare_distribution_metadata(
self,
@@ -66,7 +72,11 @@ def prepare_distribution_metadata(
self._raise_conflicts("the backend dependencies", conflicting)
if missing:
self._raise_missing_reqs(missing)
- self.req.prepare_metadata()
+
+ # NB: we must still call .cache_concrete_dist() and .validate_sdist_metadata()
+ # before the InstallRequirement itself has been updated with the metadata from
+ # this directory!
+ self.req.prepare_metadata_directory()
def _prepare_build_backend(self, finder: "PackageFinder") -> None:
# Isolate in a BuildEnvironment and install the build-time
diff --git a/src/pip/_internal/distributions/wheel.py b/src/pip/_internal/distributions/wheel.py
index bfadd39dcb7..e75c7910379 100644
--- a/src/pip/_internal/distributions/wheel.py
+++ b/src/pip/_internal/distributions/wheel.py
@@ -31,7 +31,9 @@ def get_metadata_distribution(self) -> BaseDistribution:
assert self.req.local_file_path, "Set as part of preparation during download"
assert self.req.name, "Wheels are never unnamed"
wheel = FilesystemWheel(self.req.local_file_path)
- return get_wheel_distribution(wheel, canonicalize_name(self.req.name))
+ dist = get_wheel_distribution(wheel, canonicalize_name(self.req.name))
+ self.req.cache_concrete_dist(dist)
+ return dist
def prepare_distribution_metadata(
self,
diff --git a/src/pip/_internal/exceptions.py b/src/pip/_internal/exceptions.py
index 2587740f73a..7f6c45adaf0 100644
--- a/src/pip/_internal/exceptions.py
+++ b/src/pip/_internal/exceptions.py
@@ -246,6 +246,25 @@ def __str__(self) -> str:
return f"None {self.metadata_name} metadata found for distribution: {self.dist}"
+class CacheMetadataError(PipError):
+ """Raised when de/serializing a requirement into the metadata cache."""
+
+ def __init__(
+ self,
+ req: "InstallRequirement",
+ reason: str,
+ ) -> None:
+ """
+ :param req: The requirement we attempted to cache.
+ :param reason: Context about the precise error that occurred.
+ """
+ self.req = req
+ self.reason = reason
+
+ def __str__(self) -> str:
+ return f"{self.reason} for {self.req} from {self.req.link}"
+
+
class UserInstallationInvalid(InstallationError):
"""A --user install is requested on an environment without user site."""
diff --git a/src/pip/_internal/metadata/__init__.py b/src/pip/_internal/metadata/__init__.py
index aa232b6cabd..2ed1fe8d64a 100644
--- a/src/pip/_internal/metadata/__init__.py
+++ b/src/pip/_internal/metadata/__init__.py
@@ -6,7 +6,14 @@
from pip._internal.utils.misc import strtobool
-from .base import BaseDistribution, BaseEnvironment, FilesystemWheel, MemoryWheel, Wheel
+from .base import (
+ BaseDistribution,
+ BaseEnvironment,
+ FilesystemWheel,
+ MemoryWheel,
+ Wheel,
+ serialize_metadata,
+)
if TYPE_CHECKING:
from typing import Literal, Protocol
@@ -23,6 +30,7 @@
"get_environment",
"get_wheel_distribution",
"select_backend",
+ "serialize_metadata",
]
diff --git a/src/pip/_internal/metadata/base.py b/src/pip/_internal/metadata/base.py
index 9eabcdb278b..9c95f9f7a31 100644
--- a/src/pip/_internal/metadata/base.py
+++ b/src/pip/_internal/metadata/base.py
@@ -1,6 +1,9 @@
import csv
+import email.generator
import email.message
+import email.policy
import functools
+import io
import json
import logging
import pathlib
@@ -90,6 +93,18 @@ def _convert_installed_files_path(
return str(pathlib.Path(*info, *entry))
+def serialize_metadata(msg: email.message.Message) -> str:
+ """Write a dist's metadata to a string.
+
+ Calling ``str(dist.metadata)`` may raise an error by misinterpreting RST directives
+ as email headers. This method uses the more robust ``email.policy.EmailPolicy`` to
+ avoid those parsing errors."""
+ out = io.StringIO()
+ g = email.generator.Generator(out, policy=email.policy.EmailPolicy())
+ g.flatten(msg)
+ return out.getvalue()
+
+
class RequiresEntry(NamedTuple):
requirement: str
extra: str
@@ -97,6 +112,15 @@ class RequiresEntry(NamedTuple):
class BaseDistribution(Protocol):
+ @property
+ def is_concrete(self) -> bool:
+ """Whether the distribution really exists somewhere on disk.
+
+ If this is false, it has been synthesized from metadata, e.g. via
+ ``.from_metadata_file_contents()``, or ``.from_wheel()`` against
+ a ``MemoryWheel``."""
+ raise NotImplementedError()
+
@classmethod
def from_directory(cls, directory: str) -> "BaseDistribution":
"""Load the distribution from a metadata directory.
@@ -667,6 +691,10 @@ def iter_installed_distributions(
class Wheel(Protocol):
location: str
+ @property
+ def is_concrete(self) -> bool:
+ raise NotImplementedError()
+
def as_zipfile(self) -> zipfile.ZipFile:
raise NotImplementedError()
@@ -675,6 +703,10 @@ class FilesystemWheel(Wheel):
def __init__(self, location: str) -> None:
self.location = location
+ @property
+ def is_concrete(self) -> bool:
+ return True
+
def as_zipfile(self) -> zipfile.ZipFile:
return zipfile.ZipFile(self.location, allowZip64=True)
@@ -684,5 +716,9 @@ def __init__(self, location: str, stream: IO[bytes]) -> None:
self.location = location
self.stream = stream
+ @property
+ def is_concrete(self) -> bool:
+ return False
+
def as_zipfile(self) -> zipfile.ZipFile:
return zipfile.ZipFile(self.stream, allowZip64=True)
diff --git a/src/pip/_internal/metadata/importlib/_dists.py b/src/pip/_internal/metadata/importlib/_dists.py
index 36cd326232e..63b5ff1a012 100644
--- a/src/pip/_internal/metadata/importlib/_dists.py
+++ b/src/pip/_internal/metadata/importlib/_dists.py
@@ -102,16 +102,22 @@ def __init__(
dist: importlib.metadata.Distribution,
info_location: Optional[BasePath],
installed_location: Optional[BasePath],
+ concrete: bool,
) -> None:
self._dist = dist
self._info_location = info_location
self._installed_location = installed_location
+ self._concrete = concrete
+
+ @property
+ def is_concrete(self) -> bool:
+ return self._concrete
@classmethod
def from_directory(cls, directory: str) -> BaseDistribution:
info_location = pathlib.Path(directory)
dist = importlib.metadata.Distribution.at(info_location)
- return cls(dist, info_location, info_location.parent)
+ return cls(dist, info_location, info_location.parent, concrete=True)
@classmethod
def from_metadata_file_contents(
@@ -128,7 +134,7 @@ def from_metadata_file_contents(
metadata_path.write_bytes(metadata_contents)
# Construct dist pointing to the newly created directory.
dist = importlib.metadata.Distribution.at(metadata_path.parent)
- return cls(dist, metadata_path.parent, None)
+ return cls(dist, metadata_path.parent, None, concrete=False)
@classmethod
def from_wheel(cls, wheel: Wheel, name: str) -> BaseDistribution:
@@ -137,7 +143,14 @@ def from_wheel(cls, wheel: Wheel, name: str) -> BaseDistribution:
dist = WheelDistribution.from_zipfile(zf, name, wheel.location)
except zipfile.BadZipFile as e:
raise InvalidWheel(wheel.location, name) from e
- return cls(dist, dist.info_location, pathlib.PurePosixPath(wheel.location))
+ except UnsupportedWheel as e:
+ raise UnsupportedWheel(f"{name} has an invalid wheel, {e}")
+ return cls(
+ dist,
+ dist.info_location,
+ pathlib.PurePosixPath(wheel.location),
+ concrete=wheel.is_concrete,
+ )
@property
def location(self) -> Optional[str]:
diff --git a/src/pip/_internal/metadata/importlib/_envs.py b/src/pip/_internal/metadata/importlib/_envs.py
index 70cb7a6009a..b7355d4935c 100644
--- a/src/pip/_internal/metadata/importlib/_envs.py
+++ b/src/pip/_internal/metadata/importlib/_envs.py
@@ -80,7 +80,7 @@ def find(self, location: str) -> Iterator[BaseDistribution]:
installed_location: Optional[BasePath] = None
else:
installed_location = info_location.parent
- yield Distribution(dist, info_location, installed_location)
+ yield Distribution(dist, info_location, installed_location, concrete=True)
def find_linked(self, location: str) -> Iterator[BaseDistribution]:
"""Read location in egg-link files and return distributions in there.
@@ -104,7 +104,7 @@ def find_linked(self, location: str) -> Iterator[BaseDistribution]:
continue
target_location = str(path.joinpath(target_rel))
for dist, info_location in self._find_impl(target_location):
- yield Distribution(dist, info_location, path)
+ yield Distribution(dist, info_location, path, concrete=True)
def _find_eggs_in_dir(self, location: str) -> Iterator[BaseDistribution]:
from pip._vendor.pkg_resources import find_distributions
@@ -116,7 +116,7 @@ def _find_eggs_in_dir(self, location: str) -> Iterator[BaseDistribution]:
if not entry.name.endswith(".egg"):
continue
for dist in find_distributions(entry.path):
- yield legacy.Distribution(dist)
+ yield legacy.Distribution(dist, concrete=True)
def _find_eggs_in_zip(self, location: str) -> Iterator[BaseDistribution]:
from pip._vendor.pkg_resources import find_eggs_in_zip
@@ -128,7 +128,7 @@ def _find_eggs_in_zip(self, location: str) -> Iterator[BaseDistribution]:
except zipimport.ZipImportError:
return
for dist in find_eggs_in_zip(importer, location):
- yield legacy.Distribution(dist)
+ yield legacy.Distribution(dist, concrete=True)
def find_eggs(self, location: str) -> Iterator[BaseDistribution]:
"""Find eggs in a location.
diff --git a/src/pip/_internal/metadata/pkg_resources.py b/src/pip/_internal/metadata/pkg_resources.py
index 4ea84f93a6f..428b1f4a7dd 100644
--- a/src/pip/_internal/metadata/pkg_resources.py
+++ b/src/pip/_internal/metadata/pkg_resources.py
@@ -81,8 +81,9 @@ def run_script(self, script_name: str, namespace: str) -> None:
class Distribution(BaseDistribution):
- def __init__(self, dist: pkg_resources.Distribution) -> None:
+ def __init__(self, dist: pkg_resources.Distribution, concrete: bool) -> None:
self._dist = dist
+ self._concrete = concrete
# This is populated lazily, to avoid loading metadata for all possible
# distributions eagerly.
self.__extra_mapping: Optional[Mapping[NormalizedName, str]] = None
@@ -96,6 +97,10 @@ def _extra_mapping(self) -> Mapping[NormalizedName, str]:
return self.__extra_mapping
+ @property
+ def is_concrete(self) -> bool:
+ return self._concrete
+
@classmethod
def from_directory(cls, directory: str) -> BaseDistribution:
dist_dir = directory.rstrip(os.sep)
@@ -114,7 +119,7 @@ def from_directory(cls, directory: str) -> BaseDistribution:
dist_name = os.path.splitext(dist_dir_name)[0].split("-")[0]
dist = dist_cls(base_dir, project_name=dist_name, metadata=metadata)
- return cls(dist)
+ return cls(dist, concrete=True)
@classmethod
def from_metadata_file_contents(
@@ -131,7 +136,7 @@ def from_metadata_file_contents(
metadata=InMemoryMetadata(metadata_dict, filename),
project_name=project_name,
)
- return cls(dist)
+ return cls(dist, concrete=False)
@classmethod
def from_wheel(cls, wheel: Wheel, name: str) -> BaseDistribution:
@@ -152,7 +157,7 @@ def from_wheel(cls, wheel: Wheel, name: str) -> BaseDistribution:
metadata=InMemoryMetadata(metadata_dict, wheel.location),
project_name=name,
)
- return cls(dist)
+ return cls(dist, concrete=wheel.is_concrete)
@property
def location(self) -> Optional[str]:
@@ -264,7 +269,7 @@ def from_paths(cls, paths: Optional[List[str]]) -> BaseEnvironment:
def _iter_distributions(self) -> Iterator[BaseDistribution]:
for dist in self._ws:
- yield Distribution(dist)
+ yield Distribution(dist, concrete=True)
def _search_distribution(self, name: str) -> Optional[BaseDistribution]:
"""Find a distribution matching the ``name`` in the environment.
diff --git a/src/pip/_internal/network/download.py b/src/pip/_internal/network/download.py
index 5c3bce3d2fd..9a5d6fb9c5d 100644
--- a/src/pip/_internal/network/download.py
+++ b/src/pip/_internal/network/download.py
@@ -114,7 +114,7 @@ def _get_http_response_filename(resp: Response, link: Link) -> str:
def _http_get_download(session: PipSession, link: Link) -> Response:
- target_url = link.url.split("#", 1)[0]
+ target_url = link.url_without_fragment
resp = session.get(target_url, headers=HEADERS, stream=True)
raise_for_status(resp)
return resp
diff --git a/src/pip/_internal/operations/check.py b/src/pip/_internal/operations/check.py
index 4b6fbc4c375..4391d3e0121 100644
--- a/src/pip/_internal/operations/check.py
+++ b/src/pip/_internal/operations/check.py
@@ -23,7 +23,6 @@
from pip._vendor.packaging.utils import NormalizedName, canonicalize_name
from pip._vendor.packaging.version import Version
-from pip._internal.distributions import make_distribution_for_install_requirement
from pip._internal.metadata import get_default_environment
from pip._internal.metadata.base import BaseDistribution
from pip._internal.req.req_install import InstallRequirement
@@ -154,8 +153,8 @@ def _simulate_installation_of(
# Modify it as installing requirement_set would (assuming no errors)
for inst_req in to_install:
- abstract_dist = make_distribution_for_install_requirement(inst_req)
- dist = abstract_dist.get_metadata_distribution()
+ assert inst_req.is_concrete
+ dist = inst_req.get_dist()
name = dist.canonical_name
package_set[name] = PackageDetails(dist.version, list(dist.iter_dependencies()))
diff --git a/src/pip/_internal/operations/prepare.py b/src/pip/_internal/operations/prepare.py
index e6aa3447200..a319a475ee9 100644
--- a/src/pip/_internal/operations/prepare.py
+++ b/src/pip/_internal/operations/prepare.py
@@ -4,18 +4,22 @@
# The following comment should be removed at some point in the future.
# mypy: strict-optional=False
+import bz2
+import json
import mimetypes
import os
import shutil
from dataclasses import dataclass
from pathlib import Path
-from typing import Dict, Iterable, List, Optional
+from typing import Dict, Iterable, List, Optional, Tuple
from pip._vendor.packaging.utils import canonicalize_name
+from pip._vendor.requests.exceptions import InvalidSchema
+from pip._internal.cache import LinkMetadataCache, should_cache
from pip._internal.distributions import make_distribution_for_install_requirement
-from pip._internal.distributions.installed import InstalledDistribution
from pip._internal.exceptions import (
+ CacheMetadataError,
DirectoryUrlHashUnsupported,
HashMismatch,
HashUnpinned,
@@ -25,7 +29,11 @@
VcsHashUnsupported,
)
from pip._internal.index.package_finder import PackageFinder
-from pip._internal.metadata import BaseDistribution, get_metadata_distribution
+from pip._internal.metadata import (
+ BaseDistribution,
+ get_metadata_distribution,
+ serialize_metadata,
+)
from pip._internal.models.direct_url import ArchiveInfo
from pip._internal.models.link import Link
from pip._internal.models.wheel import Wheel
@@ -63,16 +71,31 @@ def _get_prepared_distribution(
finder: PackageFinder,
build_isolation: bool,
check_build_deps: bool,
-) -> BaseDistribution:
- """Prepare a distribution for installation."""
+) -> Tuple[bool, BaseDistribution]:
+ """Prepare a distribution for installation.
+
+ This method will only be called by the preparer at the end of the resolve, and only
+ for commands which need installable artifacts (not just resolved metadata). If the
+ dist was previously metadata-only, the preparer must have downloaded the file
+ corresponding to the dist and set ``req.local_file_path``.
+
+ This method will execute ``req.cache_concrete_dist()``, so that after invocation,
+ ``req.is_concrete`` will be True, because ``req.get_dist()`` will return a concrete
+ ``Distribution``.
+
+ :returns: a 2-tuple:
+ - (bool): whether the metadata had to be constructed (e.g. from an sdist build),
+ - (BaseDistribution): the concrete distribution which is ready to be installed.
+ """
abstract_dist = make_distribution_for_install_requirement(req)
tracker_id = abstract_dist.build_tracker_id
- if tracker_id is not None:
+ builds_metadata = tracker_id is not None
+ if builds_metadata:
with build_tracker.track(req, tracker_id):
abstract_dist.prepare_distribution_metadata(
finder, build_isolation, check_build_deps
)
- return abstract_dist.get_metadata_distribution()
+ return (builds_metadata, abstract_dist.get_metadata_distribution())
def unpack_vcs_link(link: Link, location: str, verbosity: int) -> None:
@@ -190,6 +213,8 @@ def _check_download_dir(
) -> Optional[str]:
"""Check download_dir for previously downloaded file with correct hash
If a correct file is found return its path else None
+
+ If a file is found at the given path, but with an invalid hash, the file is deleted.
"""
download_path = os.path.join(download_dir, link.filename)
@@ -212,6 +237,45 @@ def _check_download_dir(
return download_path
+@dataclass(frozen=True)
+class CacheableDist:
+ metadata: str
+ filename: Path
+ canonical_name: str
+
+ @classmethod
+ def from_dist(cls, link: Link, dist: BaseDistribution) -> "CacheableDist":
+ """Extract the serializable data necessary to generate a metadata-only dist."""
+ return cls(
+ metadata=serialize_metadata(dist.metadata),
+ filename=Path(link.filename),
+ canonical_name=dist.canonical_name,
+ )
+
+ def to_dist(self) -> BaseDistribution:
+ """Return a metadata-only dist from the deserialized cache entry."""
+ return get_metadata_distribution(
+ metadata_contents=self.metadata.encode("utf-8"),
+ filename=str(self.filename),
+ canonical_name=self.canonical_name,
+ )
+
+ def to_json(self) -> Dict[str, str]:
+ return {
+ "metadata": self.metadata,
+ "filename": str(self.filename),
+ "canonical_name": self.canonical_name,
+ }
+
+ @classmethod
+ def from_json(cls, args: Dict[str, str]) -> "CacheableDist":
+ return cls(
+ metadata=args["metadata"],
+ filename=Path(args["filename"]),
+ canonical_name=args["canonical_name"],
+ )
+
+
class RequirementPreparer:
"""Prepares a Requirement"""
@@ -231,6 +295,7 @@ def __init__(
lazy_wheel: bool,
verbosity: int,
legacy_resolver: bool,
+ metadata_cache: Optional[LinkMetadataCache] = None,
) -> None:
super().__init__()
@@ -273,6 +338,8 @@ def __init__(
# Previous "header" printed for a link-based InstallRequirement
self._previous_requirement_header = ("", "")
+ self._metadata_cache = metadata_cache
+
def _log_preparing_link(self, req: InstallRequirement) -> None:
"""Provide context for the requirement being prepared."""
if req.link.is_file and not req.is_wheel_from_cache:
@@ -304,11 +371,7 @@ def _ensure_link_req_src_dir(
self, req: InstallRequirement, parallel_builds: bool
) -> None:
"""Ensure source_dir of a linked InstallRequirement."""
- # Since source_dir is only set for editable requirements.
- if req.link.is_wheel:
- # We don't need to unpack wheels, so no need for a source
- # directory.
- return
+ assert not req.link.is_wheel
assert req.source_dir is None
if req.link.is_existing_dir():
# build local directories in-tree
@@ -355,6 +418,47 @@ def _get_linked_req_hashes(self, req: InstallRequirement) -> Hashes:
# showing the user what the hash should be.
return req.hashes(trust_internet=False) or MissingHashes()
+ def _rewrite_link_and_hashes_for_cached_wheel(
+ self, req: InstallRequirement, hashes: Hashes
+ ) -> Optional[Hashes]:
+ """Check the hash of the requirement's source eagerly and rewrite its link.
+
+ ``req.link`` is unconditionally rewritten to the cached wheel source so that the
+ requirement corresponds to where it was actually downloaded from instead of the
+ local cache entry.
+
+ :returns: None if the source hash validated successfully.
+ """
+ assert hashes
+ assert req.is_wheel_from_cache
+ assert req.download_info is not None
+ assert req.link.is_wheel
+ assert req.link.is_file
+
+ # TODO: is it possible to avoid providing a "wrong" req.link in the first place
+ # in the resolver, instead of having to patch it up afterwards?
+ req.link = req.cached_wheel_source_link
+
+ # We need to verify hashes, and we have found the requirement in the cache
+ # of locally built wheels.
+ if (
+ isinstance(req.download_info.info, ArchiveInfo)
+ and req.download_info.info.hashes
+ and hashes.has_one_of(req.download_info.info.hashes)
+ ):
+ # At this point we know the requirement was built from a hashable source
+ # artifact, and we verified that the cache entry's hash of the original
+ # artifact matches one of the hashes we expect. We don't verify hashes
+ # against the cached wheel, because the wheel is not the original.
+ return None
+
+ logger.warning(
+ "The hashes of the source archive found in cache entry "
+ "don't match, ignoring cached built wheel "
+ "and re-downloading source."
+ )
+ return hashes
+
def _fetch_metadata_only(
self,
req: InstallRequirement,
@@ -365,14 +469,81 @@ def _fetch_metadata_only(
)
return None
if self.require_hashes:
+ # Hash checking also means hashes are provided for all reqs, so no resolve
+ # is necessary and metadata-only fetching provides no speedup.
logger.debug(
"Metadata-only fetching is not used as hash checking is required",
)
return None
- # Try PEP 658 metadata first, then fall back to lazy wheel if unavailable.
- return self._fetch_metadata_using_link_data_attr(
- req
- ) or self._fetch_metadata_using_lazy_wheel(req.link)
+
+ return (
+ self._fetch_cached_metadata(req)
+ or self._fetch_metadata_using_link_data_attr(req)
+ or self._fetch_metadata_using_lazy_wheel(req)
+ )
+
+ def _locate_metadata_cache_entry(self, link: Link) -> Optional[Path]:
+ """If the metadata cache is active, generate a filesystem path from the hash of
+ the given Link."""
+ if self._metadata_cache is None:
+ return None
+
+ return self._metadata_cache.cache_path(link)
+
+ def _fetch_cached_metadata(
+ self, req: InstallRequirement
+ ) -> Optional[BaseDistribution]:
+ cached_path = self._locate_metadata_cache_entry(req.link)
+ if cached_path is None:
+ return None
+
+ # Quietly continue if the cache entry does not exist.
+ if not os.path.isfile(cached_path):
+ logger.debug(
+ "no cached metadata for link %s at %s",
+ req.link,
+ cached_path,
+ )
+ return None
+
+ try:
+ with bz2.open(cached_path, mode="rt", encoding="utf-8") as f:
+ logger.debug(
+ "found cached metadata for link %s at %s", req.link, cached_path
+ )
+ args = json.load(f)
+ cached_dist = CacheableDist.from_json(args)
+ return cached_dist.to_dist()
+ except Exception:
+ raise CacheMetadataError(req, "error reading cached metadata")
+
+ def _cache_metadata(
+ self,
+ req: InstallRequirement,
+ metadata_dist: BaseDistribution,
+ ) -> None:
+ cached_path = self._locate_metadata_cache_entry(req.link)
+ if cached_path is None:
+ return
+
+ # The cache file exists already, so we have nothing to do.
+ if os.path.isfile(cached_path):
+ logger.debug(
+ "metadata for link %s is already cached at %s", req.link, cached_path
+ )
+ return
+
+ # The metadata cache is split across several subdirectories, so ensure the
+ # containing directory for the cache file exists before writing.
+ os.makedirs(str(cached_path.parent), exist_ok=True)
+ try:
+ cacheable_dist = CacheableDist.from_dist(req.link, metadata_dist)
+ args = cacheable_dist.to_json()
+ logger.debug("caching metadata for link %s at %s", req.link, cached_path)
+ with bz2.open(cached_path, mode="wt", encoding="utf-8") as f:
+ json.dump(args, f)
+ except Exception:
+ raise CacheMetadataError(req, "failed to serialize metadata")
def _fetch_metadata_using_link_data_attr(
self,
@@ -390,6 +561,9 @@ def _fetch_metadata_using_link_data_attr(
metadata_link,
)
# (2) Download the contents of the METADATA file, separate from the dist itself.
+ # NB: this request will hit the CacheControl HTTP cache, which will be very
+ # quick since the METADATA file is very small. Therefore, we can rely on
+ # HTTP caching instead of LinkMetadataCache.
metadata_file = get_http_url(
metadata_link,
self._download,
@@ -417,36 +591,48 @@ def _fetch_metadata_using_link_data_attr(
def _fetch_metadata_using_lazy_wheel(
self,
- link: Link,
+ req: InstallRequirement,
) -> Optional[BaseDistribution]:
"""Fetch metadata using lazy wheel, if possible."""
# --use-feature=fast-deps must be provided.
if not self.use_lazy_wheel:
return None
- if link.is_file or not link.is_wheel:
+ if req.link.is_file or not req.link.is_wheel:
logger.debug(
"Lazy wheel is not used as %r does not point to a remote wheel",
- link,
+ req.link,
)
return None
- wheel = Wheel(link.filename)
+ wheel = Wheel(req.link.filename)
name = canonicalize_name(wheel.name)
logger.info(
"Obtaining dependency information from %s %s",
name,
wheel.version,
)
- url = link.url.split("#", 1)[0]
+
try:
- return dist_from_wheel_url(name, url, self._session)
+ lazy_wheel_dist = dist_from_wheel_url(
+ name, req.link.url_without_fragment, self._session
+ )
except HTTPRangeRequestUnsupported:
- logger.debug("%s does not support range requests", url)
+ logger.debug("%s does not support range requests", req.link)
return None
+ # If we've used the lazy wheel approach, then PEP 658 metadata is not available.
+ # If the wheel is very large (>1GB), then retrieving it from the CacheControl
+ # HTTP cache may take multiple seconds, even on a fast computer, and the
+ # preparer will unnecessarily copy the cached response to disk before deleting
+ # it at the end of the run. Caching the dist metadata in LinkMetadataCache means
+ # later pip executions can retrieve metadata within milliseconds and avoid
+ # thrashing the disk.
+ self._cache_metadata(req, lazy_wheel_dist)
+ return lazy_wheel_dist
+
def _complete_partial_requirements(
self,
- partially_downloaded_reqs: Iterable[InstallRequirement],
+ metadata_only_reqs: List[InstallRequirement],
parallel_builds: bool = False,
) -> None:
"""Download any requirements which were only fetched by metadata."""
@@ -458,9 +644,23 @@ def _complete_partial_requirements(
# `req.local_file_path` on the appropriate requirement after passing
# all the links at once into BatchDownloader.
links_to_fully_download: Dict[Link, InstallRequirement] = {}
- for req in partially_downloaded_reqs:
+ for req in metadata_only_reqs:
assert req.link
- links_to_fully_download[req.link] = req
+
+ # (1) File URLs don't need to be downloaded, so skip them.
+ if req.link.scheme == "file":
+ continue
+ # (2) If this is e.g. a git url, we don't know how to handle that in the
+ # BatchDownloader, so leave it for self._prepare_linked_requirement() at
+ # the end of this method, which knows how to handle any URL.
+ can_simply_download = True
+ try:
+ # This will raise InvalidSchema if our Session can't download it.
+ self._session.get_adapter(req.link.url)
+ except InvalidSchema:
+ can_simply_download = False
+ if can_simply_download:
+ links_to_fully_download[req.link] = req
batch_download = self._batch_download(
links_to_fully_download.keys(),
@@ -485,9 +685,33 @@ def _complete_partial_requirements(
# This step is necessary to ensure all lazy wheels are processed
# successfully by the 'download', 'wheel', and 'install' commands.
- for req in partially_downloaded_reqs:
+ for req in metadata_only_reqs:
self._prepare_linked_requirement(req, parallel_builds)
+ def _check_download_path(self, req: InstallRequirement) -> None:
+ """Check if the relevant file is already available in the download directory.
+
+ If so, check its hash, and delete the file if the hash doesn't match."""
+ if self.download_dir is None:
+ return
+ if not req.link.is_wheel:
+ return
+
+ hashes = self._get_linked_req_hashes(req)
+ if file_path := _check_download_dir(
+ req.link,
+ self.download_dir,
+ hashes,
+ # When a locally built wheel has been found in cache, we don't warn
+ # about re-downloading when the already downloaded wheel hash does
+ # not match. This is because the hash must be checked against the
+ # original link, not the cached link. It that case the already
+ # downloaded file will be removed and re-fetched from cache (which
+ # implies a hash check against the cache entry's origin.json).
+ warn_on_hash_mismatch=not req.is_wheel_from_cache,
+ ):
+ self._downloaded[req.link.url] = file_path
+
def prepare_linked_requirement(
self, req: InstallRequirement, parallel_builds: bool = False
) -> BaseDistribution:
@@ -495,108 +719,109 @@ def prepare_linked_requirement(
assert req.link
self._log_preparing_link(req)
with indent_log():
- # Check if the relevant file is already available
- # in the download directory
- file_path = None
- if self.download_dir is not None and req.link.is_wheel:
- hashes = self._get_linked_req_hashes(req)
- file_path = _check_download_dir(
- req.link,
- self.download_dir,
- hashes,
- # When a locally built wheel has been found in cache, we don't warn
- # about re-downloading when the already downloaded wheel hash does
- # not match. This is because the hash must be checked against the
- # original link, not the cached link. It that case the already
- # downloaded file will be removed and re-fetched from cache (which
- # implies a hash check against the cache entry's origin.json).
- warn_on_hash_mismatch=not req.is_wheel_from_cache,
- )
+ # See if the file is already downloaded, and check its hash if so.
+ self._check_download_path(req)
- if file_path is not None:
- # The file is already available, so mark it as downloaded
- self._downloaded[req.link.url] = file_path
- else:
- # The file is not available, attempt to fetch only metadata
- metadata_dist = self._fetch_metadata_only(req)
- if metadata_dist is not None:
- req.needs_more_preparation = True
- return metadata_dist
+ # First try to fetch only metadata.
+ if metadata_dist := self._fetch_metadata_only(req):
+ # These reqs now have the dependency information from the downloaded
+ # metadata, without having downloaded the actual dist at all.
+ req.cache_virtual_metadata_only_dist(metadata_dist)
+ return metadata_dist
# None of the optimizations worked, fully prepare the requirement
return self._prepare_linked_requirement(req, parallel_builds)
- def prepare_linked_requirements_more(
- self, reqs: Iterable[InstallRequirement], parallel_builds: bool = False
- ) -> None:
- """Prepare linked requirements more, if needed."""
- reqs = [req for req in reqs if req.needs_more_preparation]
+ def _extract_download_info(self, reqs: Iterable[InstallRequirement]) -> None:
+ """
+ `pip install --report` extracts the download info from each requirement for its
+ JSON output, so we need to make sure every requirement has this before finishing
+ the resolve. But .download_info will only be populated by the point this method
+ is called for requirements already found in the wheel cache, so we need to
+ synthesize it for uncached results. Luckily, a DirectUrl can be parsed directly
+ from a url without any other context. However, this also means the download info
+ will only contain a hash if the link itself declares the hash.
+ """
for req in reqs:
- # Determine if any of these requirements were already downloaded.
- if self.download_dir is not None and req.link.is_wheel:
- hashes = self._get_linked_req_hashes(req)
- file_path = _check_download_dir(req.link, self.download_dir, hashes)
- if file_path is not None:
- self._downloaded[req.link.url] = file_path
- req.needs_more_preparation = False
-
- # Prepare requirements we found were already downloaded for some
- # reason. The other downloads will be completed separately.
- partially_downloaded_reqs: List[InstallRequirement] = []
+ if req.download_info is None:
+ self._ensure_download_info(req)
+
+ def _force_fully_prepared(
+ self, reqs: Iterable[InstallRequirement], assert_has_dist_files: bool
+ ) -> None:
+ """
+ The legacy resolver seems to prepare requirements differently that can leave
+ them half-done in certain code paths. I'm not quite sure how it's doing things,
+ but at least we can do this to make sure they do things right.
+ """
for req in reqs:
- if req.needs_more_preparation:
- partially_downloaded_reqs.append(req)
- else:
- self._prepare_linked_requirement(req, parallel_builds)
+ req.prepared = True
+ if assert_has_dist_files:
+ assert req.is_concrete
- # TODO: separate this part out from RequirementPreparer when the v1
- # resolver can be removed!
+ def _ensure_dist_files(
+ self, reqs: Iterable[InstallRequirement], parallel_builds: bool = False
+ ) -> None:
+ """Download any metadata-only linked requirements."""
+ metadata_only_reqs = [req for req in reqs if not req.is_concrete]
self._complete_partial_requirements(
- partially_downloaded_reqs,
+ metadata_only_reqs,
parallel_builds=parallel_builds,
)
- def _prepare_linked_requirement(
- self, req: InstallRequirement, parallel_builds: bool
- ) -> BaseDistribution:
- assert req.link
- link = req.link
-
- hashes = self._get_linked_req_hashes(req)
+ def finalize_linked_requirements(
+ self,
+ reqs: Iterable[InstallRequirement],
+ require_dist_files: bool,
+ parallel_builds: bool = False,
+ ) -> None:
+ """Prepare linked requirements more, if needed.
+
+ Neighboring .metadata files as per PEP 658 or lazy wheels via fast-deps will be
+ preferred to extract metadata from any concrete requirement (one that has been
+ mapped to a Link) without downloading the underlying wheel or sdist. When ``pip
+ install --dry-run`` is called, we want to avoid ever downloading the underlying
+ dist, but we still need to provide all of the results that pip commands expect
+ from the typical resolve process.
+
+ Those expectations vary, but one distinction lies in whether the command needs
+ an actual physical dist somewhere on the filesystem, or just the metadata about
+ it from the resolver (as in ``pip install --report``). If the command requires
+ actual physical filesystem locations for the resolved dists, it must call this
+ method with ``require_dist_files=True`` to fully download anything
+ that remains.
+ """
+ if require_dist_files:
+ self._ensure_dist_files(reqs, parallel_builds=parallel_builds)
+ else:
+ self._extract_download_info(reqs)
+ self._force_fully_prepared(reqs, assert_has_dist_files=require_dist_files)
- if hashes and req.is_wheel_from_cache:
- assert req.download_info is not None
- assert link.is_wheel
- assert link.is_file
- # We need to verify hashes, and we have found the requirement in the cache
- # of locally built wheels.
- if (
- isinstance(req.download_info.info, ArchiveInfo)
- and req.download_info.info.hashes
- and hashes.has_one_of(req.download_info.info.hashes)
- ):
- # At this point we know the requirement was built from a hashable source
- # artifact, and we verified that the cache entry's hash of the original
- # artifact matches one of the hashes we expect. We don't verify hashes
- # against the cached wheel, because the wheel is not the original.
- hashes = None
- else:
- logger.warning(
- "The hashes of the source archive found in cache entry "
- "don't match, ignoring cached built wheel "
- "and re-downloading source."
- )
- req.link = req.cached_wheel_source_link
- link = req.link
+ def _ensure_local_file_path(
+ self, req: InstallRequirement, hashes: Optional[Hashes]
+ ) -> None:
+ """Ensure that ``req.link`` is downloaded locally, matches the expected hash,
+ and that ``req.local_file_path`` is set to the download location."""
+ if req.link.is_existing_dir():
+ return
- self._ensure_link_req_src_dir(req, parallel_builds)
+ # NB: req.local_file_path may be set already, if it was:
+ # (1) sourced from a local file (such as a local .tar.gz path),
+ # (2) also in the wheel cache (e.g. built from an sdist).
+ # We will overwrite it if so, since the local file path will still point to the
+ # .tar.gz source instead of the wheel cache entry.
- if link.is_existing_dir():
- local_file = None
- elif link.url not in self._downloaded:
+ local_file: Optional[File] = None
+ # The file may have already been downloaded in batch if it was
+ # a metadata-only requirement, or if it was already in the download directory.
+ if file_path := self._downloaded.get(req.link.url, None):
+ if hashes:
+ hashes.check_against_path(file_path)
+ local_file = File(file_path, content_type=None)
+ else:
try:
local_file = unpack_url(
- link,
+ req.link,
req.source_dir,
self._download,
self.verbosity,
@@ -606,51 +831,82 @@ def _prepare_linked_requirement(
except NetworkConnectionError as exc:
raise InstallationError(
f"Could not install requirement {req} because of HTTP "
- f"error {exc} for URL {link}"
- )
- else:
- file_path = self._downloaded[link.url]
- if hashes:
- hashes.check_against_path(file_path)
- local_file = File(file_path, content_type=None)
+ f"error {exc} for URL {req.link}"
+ ) from exc
- # If download_info is set, we got it from the wheel cache.
- if req.download_info is None:
- # Editables don't go through this function (see
- # prepare_editable_requirement).
- assert not req.editable
- req.download_info = direct_url_from_link(link, req.source_dir)
- # Make sure we have a hash in download_info. If we got it as part of the
- # URL, it will have been verified and we can rely on it. Otherwise we
- # compute it from the downloaded file.
- # FIXME: https://github.com/pypa/pip/issues/11943
- if (
- isinstance(req.download_info.info, ArchiveInfo)
- and not req.download_info.info.hashes
- and local_file
- ):
- hash = hash_file(local_file.path)[0].hexdigest()
- # We populate info.hash for backward compatibility.
- # This will automatically populate info.hashes.
- req.download_info.info.hash = f"sha256={hash}"
-
- # For use in later processing,
- # preserve the file path on the requirement.
- if local_file:
+ if local_file is not None:
req.local_file_path = local_file.path
- dist = _get_prepared_distribution(
+ def _prepare_and_finalize_dist(self, req: InstallRequirement) -> BaseDistribution:
+ (builds_metadata, dist) = _get_prepared_distribution(
req,
self.build_tracker,
self.finder,
self.build_isolation,
self.check_build_deps,
)
+ assert dist.is_concrete
+ assert req.is_concrete
+ assert req.get_dist() is dist
+
+ if builds_metadata and should_cache(req):
+ self._cache_metadata(req, dist)
+
return dist
+ def _prepare_linked_requirement(
+ self, req: InstallRequirement, parallel_builds: bool
+ ) -> BaseDistribution:
+ """Ensure the dist pointing to the fully-resolved requirement is downloaded and
+ installable."""
+ assert req.link, "this requirement must have a download link to fully prepare"
+
+ hashes: Optional[Hashes] = self._get_linked_req_hashes(req)
+
+ if hashes and req.is_wheel_from_cache:
+ hashes = self._rewrite_link_and_hashes_for_cached_wheel(req, hashes)
+
+ # req.source_dir is only set for editable requirements. We don't need to unpack
+ # wheels, so no need for a source directory.
+ if not req.link.is_wheel:
+ self._ensure_link_req_src_dir(req, parallel_builds)
+
+ # Ensure the dist is downloaded, check its hash, and unpack it into the source
+ # directory (if applicable).
+ self._ensure_local_file_path(req, hashes)
+
+ # Set req.download_info for --report output.
+ if req.download_info is None:
+ # If download_info is set, we got it from the wheel cache.
+ self._ensure_download_info(req)
+
+ # Build (if necessary) and prepare the distribution for installation.
+ return self._prepare_and_finalize_dist(req)
+
+ def _ensure_download_info(self, req: InstallRequirement) -> None:
+ """Ensure that ``req.download_info`` is set, for uses such as --report."""
+ assert req.download_info is None
+ # Editables don't go through this function (see prepare_editable_requirement).
+ assert not req.editable
+ req.download_info = direct_url_from_link(req.link, req.source_dir)
+ # Make sure we have a hash in download_info. If we got it as part of the
+ # URL, it will have been verified and we can rely on it. Otherwise we
+ # compute it from the downloaded file.
+ # FIXME: https://github.com/pypa/pip/issues/11943
+ if (
+ isinstance(req.download_info.info, ArchiveInfo)
+ and not req.download_info.info.hashes
+ and req.local_file_path
+ ):
+ hash = hash_file(req.local_file_path)[0].hexdigest()
+ # We populate info.hash for backward compatibility.
+ # This will automatically populate info.hashes.
+ req.download_info.info.hash = f"sha256={hash}"
+
def save_linked_requirement(self, req: InstallRequirement) -> None:
assert self.download_dir is not None
assert req.link is not None
+ assert req.is_concrete
link = req.link
if link.is_vcs or (link.is_existing_dir() and req.editable):
# Make a .zip of the source_dir we already created.
@@ -694,18 +950,9 @@ def prepare_editable_requirement(
req.update_editable()
assert req.source_dir
req.download_info = direct_url_for_editable(req.unpacked_source_directory)
-
- dist = _get_prepared_distribution(
- req,
- self.build_tracker,
- self.finder,
- self.build_isolation,
- self.check_build_deps,
- )
-
req.check_if_exists(self.use_user_site)
- return dist
+ return self._prepare_and_finalize_dist(req)
def prepare_installed_requirement(
self,
@@ -729,4 +976,4 @@ def prepare_installed_requirement(
"completely repeatable environment, install into an "
"empty virtualenv."
)
- return InstalledDistribution(req).get_metadata_distribution()
+ return self._prepare_and_finalize_dist(req)
diff --git a/src/pip/_internal/req/constructors.py b/src/pip/_internal/req/constructors.py
index d73236e05c6..6055fd577f9 100644
--- a/src/pip/_internal/req/constructors.py
+++ b/src/pip/_internal/req/constructors.py
@@ -552,7 +552,7 @@ def install_req_extend_extras(
"""
result = copy.copy(ireq)
result.extras = {*ireq.extras, *extras}
- result.req = (
+ result._req = (
_set_requirement_extras(ireq.req, result.extras)
if ireq.req is not None
else None
diff --git a/src/pip/_internal/req/req_install.py b/src/pip/_internal/req/req_install.py
index 834bc513356..69315a9088e 100644
--- a/src/pip/_internal/req/req_install.py
+++ b/src/pip/_internal/req/req_install.py
@@ -7,7 +7,16 @@
import zipfile
from optparse import Values
from pathlib import Path
-from typing import Any, Collection, Dict, Iterable, List, Optional, Sequence, Union
+from typing import (
+ TYPE_CHECKING,
+ Collection,
+ Dict,
+ Iterable,
+ List,
+ Optional,
+ Sequence,
+ Union,
+)
from pip._vendor.packaging.markers import Marker
from pip._vendor.packaging.requirements import Requirement
@@ -23,10 +32,7 @@
from pip._internal.metadata import (
BaseDistribution,
get_default_environment,
- get_directory_distribution,
- get_wheel_distribution,
)
-from pip._internal.metadata.base import FilesystemWheel
from pip._internal.models.direct_url import DirectUrl
from pip._internal.models.link import Link
from pip._internal.operations.build.metadata import generate_metadata
@@ -59,6 +65,9 @@
from pip._internal.utils.virtualenv import running_under_virtualenv
from pip._internal.vcs import vcs
+if TYPE_CHECKING:
+ import email.message
+
logger = logging.getLogger(__name__)
@@ -88,7 +97,7 @@ def __init__(
permit_editable_wheels: bool = False,
) -> None:
assert req is None or isinstance(req, Requirement), req
- self.req = req
+ self._req = req
self.comes_from = comes_from
self.constraint = constraint
self.editable = editable
@@ -150,6 +159,7 @@ def __init__(
self.hash_options = hash_options if hash_options else {}
self.config_settings = config_settings
# Set to True after successful preparation of this requirement
+ # TODO: this is only used in the legacy resolver: remove this!
self.prepared = False
# User supplied requirement are explicitly requested for installation
# by the user via CLI arguments or requirements files, as opposed to,
@@ -191,12 +201,32 @@ def __init__(
)
self.use_pep517 = True
- # This requirement needs more preparation before it can be built
- self.needs_more_preparation = False
+ # When a dist is computed for this requirement, cache it here so it's visible
+ # everywhere within pip and isn't computed more than once. This may be
+ # a "virtual" dist without a physical location on the filesystem, or
+ # a "concrete" dist which has been fully downloaded.
+ self._dist: Optional[BaseDistribution] = None
# This requirement needs to be unpacked before it can be installed.
self._archive_source: Optional[Path] = None
+ @property
+ def req(self) -> Optional[Requirement]:
+ """Calculate a requirement from the cached dist, if populated.
+
+ The cached dist can be populated by either
+ ``self.cache_virtual_metadata_only_dist()`` or
+ ``self.cache_concrete_dist()`` and can also be retrieved with
+ ``self.get_dist()``."""
+ if self._req is not None:
+ return self._req
+ if self._dist is not None:
+ name = self._dist.canonical_name
+ version = str(self._dist.version)
+ self._req = Requirement(f"{name}=={version}")
+ return self._req
+ return None
+
def __str__(self) -> str:
if self.req:
s = redact_auth_from_requirement(self.req)
@@ -386,7 +416,7 @@ def ensure_build_location(
def _set_requirement(self) -> None:
"""Set requirement after generating metadata."""
- assert self.req is None
+ assert self._req is None
assert self.metadata is not None
assert self.source_dir is not None
@@ -396,7 +426,7 @@ def _set_requirement(self) -> None:
else:
op = "==="
- self.req = get_requirement(
+ self._req = get_requirement(
"".join(
[
self.metadata["Name"],
@@ -422,7 +452,7 @@ def warn_on_mismatching_name(self) -> None:
metadata_name,
self.name,
)
- self.req = get_requirement(metadata_name)
+ self._req = get_requirement(metadata_name)
def check_if_exists(self, use_user_site: bool) -> None:
"""Find an installed distribution that satisfies or conflicts
@@ -550,11 +580,11 @@ def isolated_editable_sanity_check(self) -> None:
f"Consider using a build backend that supports PEP 660."
)
- def prepare_metadata(self) -> None:
+ def prepare_metadata_directory(self) -> None:
"""Ensure that project metadata is available.
- Under PEP 517 and PEP 660, call the backend hook to prepare the metadata.
- Under legacy processing, call setup.py egg-info.
+ Under PEP 517 and PEP 660, call the backend hook to prepare the metadata
+ directory. Under legacy processing, call setup.py egg-info.
"""
assert self.source_dir, f"No source dir for {self}"
details = self.name or f"from {self.link}"
@@ -586,6 +616,8 @@ def prepare_metadata(self) -> None:
details=details,
)
+ def validate_sdist_metadata(self) -> None:
+ """Ensure that we have a dist, and ensure it corresponds to expectations."""
# Act on the newly generated metadata, based on the name and version.
if not self.name:
self._set_requirement()
@@ -595,25 +627,51 @@ def prepare_metadata(self) -> None:
self.assert_source_matches_version()
@property
- def metadata(self) -> Any:
- if not hasattr(self, "_metadata"):
- self._metadata = self.get_dist().metadata
-
- return self._metadata
+ def metadata(self) -> "email.message.Message":
+ return self.get_dist().metadata
def get_dist(self) -> BaseDistribution:
- if self.metadata_directory:
- return get_directory_distribution(self.metadata_directory)
- elif self.local_file_path and self.is_wheel:
- assert self.req is not None
- return get_wheel_distribution(
- FilesystemWheel(self.local_file_path),
- canonicalize_name(self.req.name),
- )
- raise AssertionError(
- f"InstallRequirement {self} has no metadata directory and no wheel: "
- f"can't make a distribution."
- )
+ """Retrieve the dist resolved from this requirement.
+
+ :raises AssertionError: if the resolver has not yet been executed.
+ """
+ if self._dist is None:
+ raise AssertionError(f"{self!r} has no dist associated.")
+ return self._dist
+
+ def cache_virtual_metadata_only_dist(self, dist: BaseDistribution) -> None:
+ """Associate a "virtual" metadata-only dist to this requirement.
+
+ This dist cannot be installed, but it can be used to complete the resolve
+ process.
+
+ :raises AssertionError: if a dist has already been associated.
+ :raises AssertionError: if the provided dist is "concrete", i.e. exists
+ somewhere on the filesystem.
+ """
+ assert self._dist is None, self
+ assert not dist.is_concrete, dist
+ self._dist = dist
+
+ def cache_concrete_dist(self, dist: BaseDistribution) -> None:
+ """Associate a "concrete" dist to this requirement.
+
+ A concrete dist exists somewhere on the filesystem and can be installed.
+
+ :raises AssertionError: if a concrete dist has already been associated.
+ :raises AssertionError: if the provided dist is not concrete.
+ """
+ if self._dist is not None:
+ # If we set a dist twice for the same requirement, we must be hydrating
+ # a concrete dist for what was previously virtual. This will occur in the
+ # case of `install --dry-run` when PEP 658 metadata is available.
+ assert not self._dist.is_concrete
+ assert dist.is_concrete
+ self._dist = dist
+
+ @property
+ def is_concrete(self) -> bool:
+ return self._dist is not None and self._dist.is_concrete
def assert_source_matches_version(self) -> None:
assert self.source_dir, f"No source dir for {self}"
diff --git a/src/pip/_internal/req/req_set.py b/src/pip/_internal/req/req_set.py
index ec7a6e07a25..1bec85cc409 100644
--- a/src/pip/_internal/req/req_set.py
+++ b/src/pip/_internal/req/req_set.py
@@ -43,7 +43,7 @@ def add_unnamed_requirement(self, install_req: InstallRequirement) -> None:
self.unnamed_requirements.append(install_req)
def add_named_requirement(self, install_req: InstallRequirement) -> None:
- assert install_req.name
+ assert install_req.name, install_req
project_name = canonicalize_name(install_req.name)
self.requirements[project_name] = install_req
diff --git a/src/pip/_internal/resolution/resolvelib/resolver.py b/src/pip/_internal/resolution/resolvelib/resolver.py
index c12beef0b2a..4880a5b9802 100644
--- a/src/pip/_internal/resolution/resolvelib/resolver.py
+++ b/src/pip/_internal/resolution/resolvelib/resolver.py
@@ -175,11 +175,6 @@ def resolve(
req_set.add_named_requirement(ireq)
- reqs = req_set.all_requirements
- self.factory.preparer.prepare_linked_requirements_more(reqs)
- for req in reqs:
- req.prepared = True
- req.needs_more_preparation = False
return req_set
def get_installation_order(
diff --git a/src/pip/_internal/wheel_builder.py b/src/pip/_internal/wheel_builder.py
index 93f8e1f5b2f..efe7a54bd1e 100644
--- a/src/pip/_internal/wheel_builder.py
+++ b/src/pip/_internal/wheel_builder.py
@@ -3,7 +3,6 @@
import logging
import os.path
-import re
import shutil
from typing import Iterable, List, Optional, Tuple
@@ -25,23 +24,12 @@
from pip._internal.utils.subprocess import call_subprocess
from pip._internal.utils.temp_dir import TempDirectory
from pip._internal.utils.urls import path_to_url
-from pip._internal.vcs import vcs
logger = logging.getLogger(__name__)
-_egg_info_re = re.compile(r"([a-z0-9_.]+)-([a-z0-9_.!+-]+)", re.IGNORECASE)
-
BuildResult = Tuple[List[InstallRequirement], List[InstallRequirement]]
-def _contains_egg_info(s: str) -> bool:
- """Determine whether the string looks like an egg_info.
-
- :param s: The string to parse. E.g. foo-2.1
- """
- return bool(_egg_info_re.search(s))
-
-
def _should_build(
req: InstallRequirement,
need_wheel: bool,
@@ -87,54 +75,6 @@ def should_build_for_install_command(
return _should_build(req, need_wheel=False)
-def _should_cache(
- req: InstallRequirement,
-) -> Optional[bool]:
- """
- Return whether a built InstallRequirement can be stored in the persistent
- wheel cache, assuming the wheel cache is available, and _should_build()
- has determined a wheel needs to be built.
- """
- if req.editable or not req.source_dir:
- # never cache editable requirements
- return False
-
- if req.link and req.link.is_vcs:
- # VCS checkout. Do not cache
- # unless it points to an immutable commit hash.
- assert not req.editable
- assert req.source_dir
- vcs_backend = vcs.get_backend_for_scheme(req.link.scheme)
- assert vcs_backend
- if vcs_backend.is_immutable_rev_checkout(req.link.url, req.source_dir):
- return True
- return False
-
- assert req.link
- base, ext = req.link.splitext()
- if _contains_egg_info(base):
- return True
-
- # Otherwise, do not cache.
- return False
-
-
-def _get_cache_dir(
- req: InstallRequirement,
- wheel_cache: WheelCache,
-) -> str:
- """Return the persistent or temporary cache directory where the built
- wheel need to be stored.
- """
- cache_available = bool(wheel_cache.cache_dir)
- assert req.link
- if cache_available and _should_cache(req):
- cache_dir = wheel_cache.get_path_for_link(req.link)
- else:
- cache_dir = wheel_cache.get_ephem_path_for_link(req.link)
- return cache_dir
-
-
def _verify_one(req: InstallRequirement, wheel_path: str) -> None:
canonical_name = canonicalize_name(req.name or "")
w = Wheel(os.path.basename(wheel_path))
@@ -315,7 +255,7 @@ def build(
build_successes, build_failures = [], []
for req in requirements:
assert req.name
- cache_dir = _get_cache_dir(req, wheel_cache)
+ cache_dir = wheel_cache.resolve_cache_dir(req)
wheel_file = _build_one(
req,
cache_dir,
diff --git a/tests/conftest.py b/tests/conftest.py
index da4ab5b9dfb..3a168c484a3 100644
--- a/tests/conftest.py
+++ b/tests/conftest.py
@@ -748,6 +748,9 @@ class FakePackage:
requires_dist: Tuple[str, ...] = ()
# This will override the Name specified in the actual dist's METADATA.
metadata_name: Optional[str] = None
+ # Whether to delete the file this points to, which causes any attempt to fetch this
+ # package to fail unless it is processed as a metadata-only dist.
+ delete_linked_file: bool = False
def metadata_filename(self) -> str:
"""This is specified by PEP 658."""
@@ -837,6 +840,27 @@ def fake_packages() -> Dict[str, List[FakePackage]]:
("simple==1.0",),
),
],
+ "complex-dist": [
+ FakePackage(
+ "complex-dist",
+ "0.1",
+ "complex_dist-0.1-py2.py3-none-any.whl",
+ MetadataKind.Unhashed,
+ # Validate that the wheel isn't fetched if metadata is available and
+ # --dry-run is on, when the metadata presents no hash itself.
+ delete_linked_file=True,
+ ),
+ ],
+ "corruptwheel": [
+ FakePackage(
+ "corruptwheel",
+ "1.0",
+ "corruptwheel-1.0-py2.py3-none-any.whl",
+ # Validate that the wheel isn't fetched if metadata is available and
+ # --dry-run is on, when the metadata *does* present a hash.
+ MetadataKind.Sha256,
+ ),
+ ],
"has-script": [
# Ensure we check PEP 658 metadata hashing errors for wheel files.
FakePackage(
@@ -922,10 +946,10 @@ def html_index_for_packages(
f' {package_link.filename}
' # noqa: E501
)
# (3.2) Copy over the corresponding file in `shared_data.packages`.
- shutil.copy(
- shared_data.packages / package_link.filename,
- pkg_subdir / package_link.filename,
- )
+ cached_file = shared_data.packages / package_link.filename
+ new_file = pkg_subdir / package_link.filename
+ if not package_link.delete_linked_file:
+ shutil.copy(cached_file, new_file)
# (3.3) Write a metadata file, if applicable.
if package_link.metadata != MetadataKind.NoFile:
with open(pkg_subdir / package_link.metadata_filename(), "wb") as f:
@@ -980,7 +1004,8 @@ def html_index_with_onetime_server(
"""Serve files from a generated pypi index, erroring if a file is downloaded more
than once.
- Provide `-i http://localhost:8000` to pip invocations to point them at this server.
+ Provide `-i http://localhost:` to pip invocations to point them at
+ this server.
"""
class InDirectoryServer(http.server.ThreadingHTTPServer):
@@ -995,7 +1020,7 @@ def finish_request(self: "Self", request: Any, client_address: Any) -> None:
class Handler(OneTimeDownloadHandler):
_seen_paths: ClassVar[Set[str]] = set()
- with InDirectoryServer(("", 8000), Handler) as httpd:
+ with InDirectoryServer(("", 0), Handler) as httpd:
server_thread = threading.Thread(target=httpd.serve_forever)
server_thread.start()
diff --git a/tests/functional/test_check.py b/tests/functional/test_check.py
index f50f5593e5c..9e59a4af3d0 100644
--- a/tests/functional/test_check.py
+++ b/tests/functional/test_check.py
@@ -123,10 +123,7 @@ def test_check_complicated_name_missing(script: PipTestEnvironment) -> None:
# Without dependency
result = script.pip("install", "--no-index", package_a_path, "--no-deps")
- assert (
- "Successfully installed package_A-1.0" in result.stdout
- or "Successfully installed package-A-1.0" in result.stdout
- ), str(result)
+ assert "Successfully installed package-a-1.0" in result.stdout, str(result)
result = script.pip("check", expect_error=True)
expected_lines = ("package-a 1.0 requires dependency-b, which is not installed.",)
@@ -149,10 +146,7 @@ def test_check_complicated_name_broken(script: PipTestEnvironment) -> None:
# With broken dependency
result = script.pip("install", "--no-index", package_a_path, "--no-deps")
- assert (
- "Successfully installed package_A-1.0" in result.stdout
- or "Successfully installed package-A-1.0" in result.stdout
- ), str(result)
+ assert "Successfully installed package-a-1.0" in result.stdout, str(result)
result = script.pip(
"install",
@@ -185,10 +179,7 @@ def test_check_complicated_name_clean(script: PipTestEnvironment) -> None:
)
result = script.pip("install", "--no-index", package_a_path, "--no-deps")
- assert (
- "Successfully installed package_A-1.0" in result.stdout
- or "Successfully installed package-A-1.0" in result.stdout
- ), str(result)
+ assert "Successfully installed package-a-1.0" in result.stdout, str(result)
result = script.pip(
"install",
@@ -216,10 +207,7 @@ def test_check_considers_conditional_reqs(script: PipTestEnvironment) -> None:
)
result = script.pip("install", "--no-index", package_a_path, "--no-deps")
- assert (
- "Successfully installed package_A-1.0" in result.stdout
- or "Successfully installed package-A-1.0" in result.stdout
- ), str(result)
+ assert "Successfully installed package-a-1.0" in result.stdout, str(result)
result = script.pip("check", expect_error=True)
expected_lines = ("package-a 1.0 requires dependency-b, which is not installed.",)
diff --git a/tests/functional/test_download.py b/tests/functional/test_download.py
index d469e71c360..53c815ae98b 100644
--- a/tests/functional/test_download.py
+++ b/tests/functional/test_download.py
@@ -1273,6 +1273,7 @@ def download_server_html_index(
) -> Callable[..., Tuple[TestPipResult, Path]]:
"""Execute `pip download` against a generated PyPI index."""
download_dir = tmpdir / "download_dir"
+ _, server_port = html_index_with_onetime_server.server_address
def run_for_generated_index(
args: List[str],
@@ -1287,7 +1288,7 @@ def run_for_generated_index(
"-d",
str(download_dir),
"-i",
- "http://localhost:8000",
+ f"http://localhost:{server_port}",
*args,
]
result = script.pip(*pip_args, allow_error=allow_error)
diff --git a/tests/functional/test_install.py b/tests/functional/test_install.py
index 8c30a69b515..c4fc5685607 100644
--- a/tests/functional/test_install.py
+++ b/tests/functional/test_install.py
@@ -2094,7 +2094,7 @@ def test_install_conflict_results_in_warning(
# Install pkgA without its dependency
result1 = script.pip("install", "--no-index", pkgA_path, "--no-deps")
- assert "Successfully installed pkgA-1.0" in result1.stdout, str(result1)
+ assert "Successfully installed pkga-1.0" in result1.stdout, str(result1)
# Then install an incorrect version of the dependency
result2 = script.pip(
@@ -2104,7 +2104,7 @@ def test_install_conflict_results_in_warning(
allow_stderr_error=True,
)
assert "pkga 1.0 requires pkgb==1.0" in result2.stderr, str(result2)
- assert "Successfully installed pkgB-2.0" in result2.stdout, str(result2)
+ assert "Successfully installed pkgb-2.0" in result2.stdout, str(result2)
def test_install_conflict_warning_can_be_suppressed(
@@ -2124,11 +2124,11 @@ def test_install_conflict_warning_can_be_suppressed(
# Install pkgA without its dependency
result1 = script.pip("install", "--no-index", pkgA_path, "--no-deps")
- assert "Successfully installed pkgA-1.0" in result1.stdout, str(result1)
+ assert "Successfully installed pkga-1.0" in result1.stdout, str(result1)
# Then install an incorrect version of the dependency; suppressing warning
result2 = script.pip("install", "--no-index", pkgB_path, "--no-warn-conflicts")
- assert "Successfully installed pkgB-2.0" in result2.stdout, str(result2)
+ assert "Successfully installed pkgb-2.0" in result2.stdout, str(result2)
def test_target_install_ignores_distutils_config_install_prefix(
diff --git a/tests/functional/test_install_check.py b/tests/functional/test_install_check.py
index 8a8a7c93a80..d16165e03f4 100644
--- a/tests/functional/test_install_check.py
+++ b/tests/functional/test_install_check.py
@@ -28,7 +28,7 @@ def test_check_install_canonicalization(script: PipTestEnvironment) -> None:
# Let's install pkgA without its dependency
result = script.pip("install", "--no-index", pkga_path, "--no-deps")
- assert "Successfully installed pkgA-1.0" in result.stdout, str(result)
+ assert "Successfully installed pkga-1.0" in result.stdout, str(result)
# Install the first missing dependency. Only an error for the
# second dependency should remain.
diff --git a/tests/functional/test_install_metadata.py b/tests/functional/test_install_metadata.py
new file mode 100644
index 00000000000..fd04adf4e8d
--- /dev/null
+++ b/tests/functional/test_install_metadata.py
@@ -0,0 +1,239 @@
+import json
+import re
+from pathlib import Path
+from typing import Any, Callable, Dict, Iterator, List, Tuple
+
+import pytest
+from pip._vendor.packaging.requirements import Requirement
+
+from pip._internal.models.direct_url import DirectUrl
+from pip._internal.utils.urls import path_to_url
+from tests.lib import (
+ PipTestEnvironment,
+ TestPipResult,
+)
+
+
+@pytest.fixture
+def install_with_generated_html_index(
+ script: PipTestEnvironment,
+ html_index_for_packages: Path,
+ tmpdir: Path,
+) -> Callable[..., Tuple[TestPipResult, Dict[str, Any]]]:
+ """Execute `pip download` against a generated PyPI index."""
+ output_file = tmpdir / "output_file.json"
+
+ def run_for_generated_index(
+ args: List[str],
+ *,
+ dry_run: bool = True,
+ allow_error: bool = False,
+ ) -> Tuple[TestPipResult, Dict[str, Any]]:
+ """
+ Produce a PyPI directory structure pointing to the specified packages, then
+ execute `pip install --report ... -i ...` pointing to our generated index.
+ """
+ pip_args = [
+ "install",
+ *(("--dry-run",) if dry_run else ()),
+ "--ignore-installed",
+ "--report",
+ str(output_file),
+ "-i",
+ path_to_url(str(html_index_for_packages)),
+ *args,
+ ]
+ result = script.pip(*pip_args, allow_error=allow_error)
+ try:
+ with open(output_file, "rb") as f:
+ report = json.load(f)
+ except FileNotFoundError:
+ if allow_error:
+ report = {}
+ else:
+ raise
+ return (result, report)
+
+ return run_for_generated_index
+
+
+def iter_dists(report: Dict[str, Any]) -> Iterator[Tuple[Requirement, DirectUrl]]:
+ """Parse a (req,url) tuple from each installed dist in the --report json."""
+ for inst in report["install"]:
+ metadata = inst["metadata"]
+ name = metadata["name"]
+ version = metadata["version"]
+ req = Requirement(f"{name}=={version}")
+ direct_url = DirectUrl.from_dict(inst["download_info"])
+ yield (req, direct_url)
+
+
+@pytest.mark.parametrize(
+ "requirement_to_install, expected_outputs",
+ [
+ ("simple2==1.0", ["simple2==1.0", "simple==1.0"]),
+ ("simple==2.0", ["simple==2.0"]),
+ (
+ "colander",
+ ["colander==0.9.9", "translationstring==1.1"],
+ ),
+ (
+ "compilewheel",
+ ["compilewheel==1.0", "simple==1.0"],
+ ),
+ ],
+)
+def test_install_with_metadata(
+ install_with_generated_html_index: Callable[
+ ..., Tuple[TestPipResult, Dict[str, Any]]
+ ],
+ requirement_to_install: str,
+ expected_outputs: List[str],
+) -> None:
+ """Verify that if a data-dist-info-metadata attribute is present, then it is used
+ instead of the actual dist's METADATA."""
+ _, report = install_with_generated_html_index(
+ [requirement_to_install],
+ )
+ installed = sorted(str(r) for r, _ in iter_dists(report))
+ assert installed == expected_outputs
+
+
+@pytest.mark.parametrize(
+ "requirement_to_install, real_hash",
+ [
+ (
+ "simple==3.0",
+ "95e0f200b6302989bcf2cead9465cf229168295ea330ca30d1ffeab5c0fed996",
+ ),
+ (
+ "has-script",
+ "16ba92d7f6f992f6de5ecb7d58c914675cf21f57f8e674fb29dcb4f4c9507e5b",
+ ),
+ ],
+)
+def test_incorrect_metadata_hash(
+ install_with_generated_html_index: Callable[
+ ..., Tuple[TestPipResult, Dict[str, Any]]
+ ],
+ requirement_to_install: str,
+ real_hash: str,
+) -> None:
+ """Verify that if a hash for data-dist-info-metadata is provided, it must match the
+ actual hash of the metadata file."""
+ result, _ = install_with_generated_html_index(
+ [requirement_to_install],
+ allow_error=True,
+ )
+ assert result.returncode != 0
+ expected_msg = f"""\
+ Expected sha256 wrong-hash
+ Got {real_hash}"""
+ assert expected_msg in result.stderr
+
+
+@pytest.mark.parametrize(
+ "requirement_to_install, expected_url",
+ [
+ ("simple2==2.0", "simple2-2.0.tar.gz.metadata"),
+ ("priority", "priority-1.0-py2.py3-none-any.whl.metadata"),
+ ],
+)
+def test_metadata_not_found(
+ install_with_generated_html_index: Callable[
+ ..., Tuple[TestPipResult, Dict[str, Any]]
+ ],
+ requirement_to_install: str,
+ expected_url: str,
+) -> None:
+ """Verify that if a data-dist-info-metadata attribute is provided, that pip will
+ fetch the .metadata file at the location specified by PEP 658, and error
+ if unavailable."""
+ result, _ = install_with_generated_html_index(
+ [requirement_to_install],
+ allow_error=True,
+ )
+ assert result.returncode != 0
+ expected_re = re.escape(expected_url)
+ pattern = re.compile(
+ f"ERROR: 404 Client Error: FileNotFoundError for url:.*{expected_re}"
+ )
+ assert pattern.search(result.stderr), (pattern, result.stderr)
+
+
+def test_produces_error_for_mismatched_package_name_in_metadata(
+ install_with_generated_html_index: Callable[
+ ..., Tuple[TestPipResult, Dict[str, Any]]
+ ],
+) -> None:
+ """Verify that the package name from the metadata matches the requested package."""
+ result, _ = install_with_generated_html_index(
+ ["simple2==3.0"],
+ allow_error=True,
+ )
+ assert result.returncode != 0
+ assert (
+ "simple2-3.0.tar.gz has inconsistent Name: expected 'simple2', but metadata "
+ "has 'not-simple2'"
+ ) in result.stdout
+
+
+@pytest.mark.parametrize(
+ "requirement",
+ [
+ "requires-simple-extra==0.1",
+ "REQUIRES_SIMPLE-EXTRA==0.1",
+ "REQUIRES....simple-_-EXTRA==0.1",
+ ],
+)
+def test_canonicalizes_package_name_before_verifying_metadata(
+ install_with_generated_html_index: Callable[
+ ..., Tuple[TestPipResult, Dict[str, Any]]
+ ],
+ requirement: str,
+) -> None:
+ """Verify that the package name from the command line and the package's
+ METADATA are both canonicalized before comparison, while the name from the METADATA
+ is always used verbatim to represent the installed candidate in --report.
+
+ Regression test for https://github.com/pypa/pip/issues/12038
+ """
+ _, report = install_with_generated_html_index(
+ [requirement],
+ )
+ reqs = [str(r) for r, _ in iter_dists(report)]
+ assert reqs == ["Requires_Simple.Extra==0.1"]
+
+
+@pytest.mark.parametrize(
+ "requirement,err_string",
+ [
+ # It's important that we verify pip won't even attempt to fetch the file, so we
+ # construct an input that will cause it to error if it tries at all.
+ (
+ "complex-dist==0.1",
+ "Could not install packages due to an OSError: [Errno 2] No such file or directory", # noqa: E501
+ ),
+ ("corruptwheel==1.0", ".whl is invalid."),
+ ],
+)
+def test_dry_run_avoids_downloading_metadata_only_dists(
+ install_with_generated_html_index: Callable[
+ ..., Tuple[TestPipResult, Dict[str, Any]]
+ ],
+ requirement: str,
+ err_string: str,
+) -> None:
+ """Verify that the underlying dist files are not downloaded at all when
+ `install --dry-run` is used to resolve dists with PEP 658 metadata."""
+ _, report = install_with_generated_html_index(
+ [requirement],
+ )
+ assert [requirement] == [str(r) for r, _ in iter_dists(report)]
+ result, _ = install_with_generated_html_index(
+ [requirement],
+ dry_run=False,
+ allow_error=True,
+ )
+ assert result.returncode != 0
+ assert err_string in result.stderr
diff --git a/tests/functional/test_install_reqs.py b/tests/functional/test_install_reqs.py
index b1aed6ad3f4..81668d9e9da 100644
--- a/tests/functional/test_install_reqs.py
+++ b/tests/functional/test_install_reqs.py
@@ -614,7 +614,7 @@ def test_install_distribution_full_union(
result = script.pip_install_local(
to_install, f"{to_install}[bar]", f"{to_install}[baz]"
)
- assert "Building wheel for LocalExtras" in result.stdout
+ assert "Building wheel for localextras" in result.stdout
result.did_create(script.site_packages / "simple")
result.did_create(script.site_packages / "singlemodule.py")
diff --git a/tests/functional/test_wheel.py b/tests/functional/test_wheel.py
index 1bddd40dc41..69825899df7 100644
--- a/tests/functional/test_wheel.py
+++ b/tests/functional/test_wheel.py
@@ -285,7 +285,7 @@ def test_wheel_package_with_latin1_setup(
pkg_to_wheel = data.packages.joinpath("SetupPyLatin1")
result = script.pip("wheel", pkg_to_wheel)
- assert "Successfully built SetupPyUTF8" in result.stdout
+ assert "Successfully built setuppyutf8" in result.stdout
def test_pip_wheel_with_pep518_build_reqs(
diff --git a/tests/unit/metadata/test_metadata_pkg_resources.py b/tests/unit/metadata/test_metadata_pkg_resources.py
index 6044c14e4ca..195442cbe1e 100644
--- a/tests/unit/metadata/test_metadata_pkg_resources.py
+++ b/tests/unit/metadata/test_metadata_pkg_resources.py
@@ -104,6 +104,7 @@ def test_wheel_metadata_works() -> None:
metadata=InMemoryMetadata({"METADATA": metadata.as_bytes()}, ""),
project_name=name,
),
+ concrete=False,
)
assert name == dist.canonical_name == dist.raw_name
diff --git a/tests/unit/test_cache.py b/tests/unit/test_cache.py
index 30cdb6ebece..8eb51d5f889 100644
--- a/tests/unit/test_cache.py
+++ b/tests/unit/test_cache.py
@@ -1,13 +1,32 @@
import os
from pathlib import Path
+import pytest
from pip._vendor.packaging.tags import Tag, interpreter_name, interpreter_version
-from pip._internal.cache import WheelCache, _hash_dict
+from pip._internal.cache import WheelCache, _contains_egg_info, _hash_dict
from pip._internal.models.link import Link
from pip._internal.utils.misc import ensure_dir
+@pytest.mark.parametrize(
+ "s, expected",
+ [
+ # Trivial.
+ ("pip-18.0", True),
+ # Ambiguous.
+ ("foo-2-2", True),
+ ("im-valid", True),
+ # Invalid.
+ ("invalid", False),
+ ("im_invalid", False),
+ ],
+)
+def test_contains_egg_info(s: str, expected: bool) -> None:
+ result = _contains_egg_info(s)
+ assert result == expected
+
+
def test_falsey_path_none() -> None:
wc = WheelCache("")
assert wc.cache_dir is None
diff --git a/tests/unit/test_req.py b/tests/unit/test_req.py
index 8a95c058706..5757850c6b3 100644
--- a/tests/unit/test_req.py
+++ b/tests/unit/test_req.py
@@ -23,6 +23,7 @@
PreviousBuildDirError,
)
from pip._internal.index.package_finder import PackageFinder
+from pip._internal.metadata import get_metadata_distribution
from pip._internal.models.direct_url import ArchiveInfo, DirectUrl, DirInfo, VcsInfo
from pip._internal.models.link import Link
from pip._internal.network.session import PipSession
@@ -144,7 +145,11 @@ def test_no_reuse_existing_build_dir(self, data: TestData) -> None:
):
resolver.resolve(reqset.all_requirements, True)
- def test_environment_marker_extras(self, data: TestData) -> None:
+ def test_environment_marker_extras(
+ self,
+ data: TestData,
+ monkeypatch: pytest.MonkeyPatch,
+ ) -> None:
"""
Test that the environment marker extras are used with
non-wheel installs.
@@ -154,6 +159,13 @@ def test_environment_marker_extras(self, data: TestData) -> None:
os.fspath(data.packages.joinpath("LocalEnvironMarker")),
)
req.user_supplied = True
+
+ def cache_concrete_dist(self, dist): # type: ignore[no-untyped-def]
+ self._dist = dist
+
+ monkeypatch.setattr(
+ req, "cache_concrete_dist", partial(cache_concrete_dist, req)
+ )
reqset.add_unnamed_requirement(req)
finder = make_test_finder(find_links=[data.find_links])
with self._basic_resolver(finder) as resolver:
@@ -499,12 +511,23 @@ def test_download_info_local_dir(self, data: TestData) -> None:
assert req.download_info.url.startswith("file://")
assert isinstance(req.download_info.info, DirInfo)
- def test_download_info_local_editable_dir(self, data: TestData) -> None:
+ def test_download_info_local_editable_dir(
+ self,
+ data: TestData,
+ monkeypatch: pytest.MonkeyPatch,
+ ) -> None:
"""Test that download_info is set for requirements from a local editable dir."""
finder = make_test_finder()
with self._basic_resolver(finder) as resolver:
ireq_url = data.packages.joinpath("FSPkg").as_uri()
ireq = get_processed_req_from_line(f"-e {ireq_url}#egg=FSPkg")
+
+ def cache_concrete_dist(self, dist): # type: ignore[no-untyped-def]
+ self._dist = dist
+
+ monkeypatch.setattr(
+ ireq, "cache_concrete_dist", partial(cache_concrete_dist, ireq)
+ )
reqset = resolver.resolve([ireq], True)
assert len(reqset.all_requirements) == 1
req = reqset.all_requirements[0]
@@ -909,7 +932,9 @@ def test_mismatched_versions(caplog: pytest.LogCaptureFixture) -> None:
metadata = email.message.Message()
metadata["name"] = "simplewheel"
metadata["version"] = "1.0"
- req._metadata = metadata
+ req._dist = get_metadata_distribution(
+ bytes(metadata), "simplewheel-1.0.whl", "simplewheel"
+ )
req.assert_source_matches_version()
assert caplog.records[-1].message == (
diff --git a/tests/unit/test_wheel_builder.py b/tests/unit/test_wheel_builder.py
index d5f372dd5cd..eb23d74ea94 100644
--- a/tests/unit/test_wheel_builder.py
+++ b/tests/unit/test_wheel_builder.py
@@ -6,7 +6,8 @@
import pytest
-from pip._internal import wheel_builder
+from pip._internal import cache, wheel_builder
+from pip._internal.cache import _contains_egg_info
from pip._internal.models.link import Link
from pip._internal.operations.build.wheel_legacy import format_command_result
from pip._internal.req.req_install import InstallRequirement
@@ -28,7 +29,7 @@
],
)
def test_contains_egg_info(s: str, expected: bool) -> None:
- result = wheel_builder._contains_egg_info(s)
+ result = _contains_egg_info(s)
assert result == expected
@@ -116,7 +117,7 @@ def test_should_build_for_wheel_command(req: ReqMock, expected: bool) -> None:
],
)
def test_should_cache(req: ReqMock, expected: bool) -> None:
- assert wheel_builder._should_cache(cast(InstallRequirement, req)) is expected
+ assert cache.should_cache(cast(InstallRequirement, req)) is expected
def test_should_cache_git_sha(tmpdir: Path) -> None:
@@ -126,12 +127,12 @@ def test_should_cache_git_sha(tmpdir: Path) -> None:
# a link referencing a sha should be cached
url = "git+https://g.c/o/r@" + commit + "#egg=mypkg"
req = ReqMock(link=Link(url), source_dir=repo_path)
- assert wheel_builder._should_cache(cast(InstallRequirement, req))
+ assert cache.should_cache(cast(InstallRequirement, req))
# a link not referencing a sha should not be cached
url = "git+https://g.c/o/r@master#egg=mypkg"
req = ReqMock(link=Link(url), source_dir=repo_path)
- assert not wheel_builder._should_cache(cast(InstallRequirement, req))
+ assert not cache.should_cache(cast(InstallRequirement, req))
def test_format_command_result__INFO(caplog: pytest.LogCaptureFixture) -> None: