Skip to content

Support wheel cache when using --require-hashes #11897

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 11 commits into from
Apr 15, 2023
1 change: 1 addition & 0 deletions news/5037.feature.rst
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
Support wheel cache when using ``--require-hashes``.
59 changes: 50 additions & 9 deletions src/pip/_internal/operations/prepare.py
Original file line number Diff line number Diff line change
Expand Up @@ -179,7 +179,10 @@ def unpack_url(


def _check_download_dir(
link: Link, download_dir: str, hashes: Optional[Hashes]
link: Link,
download_dir: str,
hashes: Optional[Hashes],
warn_on_hash_mismatch: bool = True,
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Not some that needs to change, I’m just suddenly curious how peopel choose between positive and negative flags. Personally I’d probably choose to implement this as suppress_hash_mismatch_warning=False.

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

In this case I think I wanted to avoid a double negation and save half a brain cycle when reading the only condition using this flag.

) -> Optional[str]:
"""Check download_dir for previously downloaded file with correct hash
If a correct file is found return its path else None
Expand All @@ -195,10 +198,11 @@ def _check_download_dir(
try:
hashes.check_against_path(download_path)
except HashMismatch:
logger.warning(
"Previously-downloaded file %s has bad hash. Re-downloading.",
download_path,
)
if warn_on_hash_mismatch:
logger.warning(
"Previously-downloaded file %s has bad hash. Re-downloading.",
download_path,
)
os.unlink(download_path)
return None
return download_path
Expand Down Expand Up @@ -263,7 +267,7 @@ def __init__(

def _log_preparing_link(self, req: InstallRequirement) -> None:
"""Provide context for the requirement being prepared."""
if req.link.is_file and not req.original_link_is_in_wheel_cache:
if req.link.is_file and not req.is_wheel_from_cache:
message = "Processing %s"
information = str(display_path(req.link.file_path))
else:
Expand All @@ -284,7 +288,7 @@ def _log_preparing_link(self, req: InstallRequirement) -> None:
self._previous_requirement_header = (message, information)
logger.info(message, information)

if req.original_link_is_in_wheel_cache:
if req.is_wheel_from_cache:
with indent_log():
logger.info("Using cached %s", req.link.filename)

Expand Down Expand Up @@ -485,7 +489,18 @@ def prepare_linked_requirement(
file_path = None
if self.download_dir is not None and req.link.is_wheel:
hashes = self._get_linked_req_hashes(req)
file_path = _check_download_dir(req.link, self.download_dir, hashes)
file_path = _check_download_dir(
req.link,
self.download_dir,
hashes,
# When a locally built wheel has been found in cache, we don't warn
# about re-downloading when the already downloaded wheel hash does
# not match. This is because the hash must be checked against the
# original link, not the cached link. It that case the already
# downloaded file will be removed and re-fetched from cache (which
# implies a hash check against the cache entry's origin.json).
warn_on_hash_mismatch=not req.is_wheel_from_cache,
)

if file_path is not None:
# The file is already available, so mark it as downloaded
Expand Down Expand Up @@ -536,9 +551,35 @@ def _prepare_linked_requirement(
assert req.link
link = req.link

self._ensure_link_req_src_dir(req, parallel_builds)
hashes = self._get_linked_req_hashes(req)

if hashes and req.is_wheel_from_cache:
assert req.download_info is not None
assert link.is_wheel
assert link.is_file
# We need to verify hashes, and we have found the requirement in the cache
# of locally built wheels.
if (
isinstance(req.download_info.info, ArchiveInfo)
and req.download_info.info.hashes
and hashes.has_one_of(req.download_info.info.hashes)
):
# At this point we know the requirement was built from a hashable source
# artifact, and we verified that the cache entry's hash of the original
# artifact matches one of the hashes we expect. We don't verify hashes
# against the cached wheel, because the wheel is not the original.
hashes = None
else:
logger.warning(
"The hashes of the source archive found in cache entry "
"don't match, ignoring cached built wheel "
"and re-downloading source."
)
req.link = req.cached_wheel_source_link
link = req.link
Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Hmm, I'm afraid this part still needs some work. If req.download_info contains an invalid hash, it re-downloads an URL with an invalid hash fragment, which means we never get a good cache entry.

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Ok, I resolved this by adding a new cached_wheel_source_link field to InstallRequirement. I also renamed original_link_is_in_wheel_cache to is_wheel_from_cache to better reflect its meaning.


self._ensure_link_req_src_dir(req, parallel_builds)

if link.is_existing_dir():
local_file = None
elif link.url not in self._downloaded:
Expand Down
12 changes: 11 additions & 1 deletion src/pip/_internal/req/req_install.py
Original file line number Diff line number Diff line change
Expand Up @@ -108,7 +108,11 @@ def __init__(
# PEP 508 URL requirement
link = Link(req.url)
self.link = self.original_link = link
self.original_link_is_in_wheel_cache = False

# When this InstallRequirement is a wheel obtained from the cache of locally
# built wheels, this is the source link corresponding to the cache entry, which
# was used to download and build the cached wheel.
self.cached_wheel_source_link: Optional[Link] = None

# Information about the location of the artifact that was downloaded . This
# property is guaranteed to be set in resolver results.
Expand Down Expand Up @@ -437,6 +441,12 @@ def is_wheel(self) -> bool:
return False
return self.link.is_wheel

@property
def is_wheel_from_cache(self) -> bool:
# When True, it means that this InstallRequirement is a local wheel file in the
# cache of locally built wheels.
return self.cached_wheel_source_link is not None

# Things valid for sdists
@property
def unpacked_source_directory(self) -> str:
Expand Down
2 changes: 1 addition & 1 deletion src/pip/_internal/resolution/legacy/resolver.py
Original file line number Diff line number Diff line change
Expand Up @@ -431,7 +431,7 @@ def _populate_link(self, req: InstallRequirement) -> None:
if cache_entry is not None:
logger.debug("Using cached wheel link: %s", cache_entry.link)
if req.link is req.original_link and cache_entry.persistent:
req.original_link_is_in_wheel_cache = True
req.cached_wheel_source_link = req.link
if cache_entry.origin is not None:
req.download_info = cache_entry.origin
else:
Expand Down
6 changes: 4 additions & 2 deletions src/pip/_internal/resolution/resolvelib/candidates.py
Original file line number Diff line number Diff line change
Expand Up @@ -259,7 +259,7 @@ def __init__(
version: Optional[CandidateVersion] = None,
) -> None:
source_link = link
cache_entry = factory.get_wheel_cache_entry(link, name)
cache_entry = factory.get_wheel_cache_entry(source_link, name)
if cache_entry is not None:
logger.debug("Using cached wheel link: %s", cache_entry.link)
link = cache_entry.link
Expand All @@ -277,8 +277,10 @@ def __init__(
)

if cache_entry is not None:
assert ireq.link.is_wheel
assert ireq.link.is_file
if cache_entry.persistent and template.link is template.original_link:
ireq.original_link_is_in_wheel_cache = True
ireq.cached_wheel_source_link = source_link
if cache_entry.origin is not None:
ireq.download_info = cache_entry.origin
else:
Expand Down
2 changes: 1 addition & 1 deletion src/pip/_internal/resolution/resolvelib/factory.py
Original file line number Diff line number Diff line change
Expand Up @@ -535,7 +535,7 @@ def get_wheel_cache_entry(
hash mismatches. Furthermore, cached wheels at present have
nondeterministic contents due to file modification times.
"""
if self._wheel_cache is None or self.preparer.require_hashes:
if self._wheel_cache is None:
return None
return self._wheel_cache.get_cache_entry(
link=link,
Expand Down
7 changes: 7 additions & 0 deletions src/pip/_internal/utils/hashes.py
Original file line number Diff line number Diff line change
Expand Up @@ -105,6 +105,13 @@ def check_against_path(self, path: str) -> None:
with open(path, "rb") as file:
return self.check_against_file(file)

def has_one_of(self, hashes: Dict[str, str]) -> bool:
"""Return whether any of the given hashes are allowed."""
for hash_name, hex_digest in hashes.items():
if self.is_hash_allowed(hash_name, hex_digest):
return True
return False

def __bool__(self) -> bool:
"""Return whether I know any known-good hashes."""
return bool(self._allowed)
Expand Down
42 changes: 42 additions & 0 deletions tests/functional/test_install.py
Original file line number Diff line number Diff line change
Expand Up @@ -729,6 +729,48 @@ def test_bad_link_hash_in_dep_install_failure(
assert "THESE PACKAGES DO NOT MATCH THE HASHES" in result.stderr, result.stderr


def test_hashed_install_from_cache(
script: PipTestEnvironment, data: TestData, tmpdir: Path
) -> None:
"""
Test that installing from a cached built wheel works and that the hash is verified
against the hash of the original source archived stored in the cache entry.
"""
with requirements_file(
"simple2==1.0 --hash=sha256:"
"9336af72ca661e6336eb87bc7de3e8844d853e3848c2b9bbd2e8bf01db88c2c7\n",
tmpdir,
) as reqs_file:
result = script.pip_install_local(
"--use-pep517", "--no-build-isolation", "-r", reqs_file.resolve()
)
assert "Created wheel for simple2" in result.stdout
script.pip("uninstall", "simple2", "-y")
result = script.pip_install_local(
"--use-pep517", "--no-build-isolation", "-r", reqs_file.resolve()
)
assert "Using cached simple2" in result.stdout
# now try with an invalid hash
with requirements_file(
"simple2==1.0 --hash=sha256:invalid\n",
tmpdir,
) as reqs_file:
script.pip("uninstall", "simple2", "-y")
result = script.pip_install_local(
"--use-pep517",
"--no-build-isolation",
"-r",
reqs_file.resolve(),
expect_error=True,
)
assert (
"WARNING: The hashes of the source archive found in cache entry "
"don't match, ignoring cached built wheel and re-downloading source."
) in result.stderr
assert "Using cached simple2" in result.stdout
assert "ERROR: THESE PACKAGES DO NOT MATCH THE HASHES" in result.stderr


def assert_re_match(pattern: str, text: str) -> None:
assert re.search(pattern, text), f"Could not find {pattern!r} in {text!r}"

Expand Down
6 changes: 4 additions & 2 deletions tests/unit/test_req.py
Original file line number Diff line number Diff line change
Expand Up @@ -411,7 +411,8 @@ def test_download_info_archive_legacy_cache(
reqset = resolver.resolve([ireq], True)
assert len(reqset.all_requirements) == 1
req = reqset.all_requirements[0]
assert req.original_link_is_in_wheel_cache
assert req.is_wheel_from_cache
assert req.cached_wheel_source_link
assert req.download_info
assert req.download_info.url == url
assert isinstance(req.download_info.info, ArchiveInfo)
Expand All @@ -437,7 +438,8 @@ def test_download_info_archive_cache_with_origin(
reqset = resolver.resolve([ireq], True)
assert len(reqset.all_requirements) == 1
req = reqset.all_requirements[0]
assert req.original_link_is_in_wheel_cache
assert req.is_wheel_from_cache
assert req.cached_wheel_source_link
assert req.download_info
assert req.download_info.url == url
assert isinstance(req.download_info.info, ArchiveInfo)
Expand Down
8 changes: 8 additions & 0 deletions tests/unit/test_utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -425,6 +425,14 @@ def test_hash(self) -> None:
cache[Hashes({"sha256": ["ab", "cd"]})] = 42
assert cache[Hashes({"sha256": ["ab", "cd"]})] == 42

def test_has_one_of(self) -> None:
hashes = Hashes({"sha256": ["abcd", "efgh"], "sha384": ["ijkl"]})
assert hashes.has_one_of({"sha256": "abcd"})
assert hashes.has_one_of({"sha256": "efgh"})
assert not hashes.has_one_of({"sha256": "xyzt"})
empty_hashes = Hashes()
assert not empty_hashes.has_one_of({"sha256": "xyzt"})


class TestEncoding:
"""Tests for pip._internal.utils.encoding"""
Expand Down