Skip to content

Validate VCS urls in hash-checking mode using their commit hashes #11968

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Draft
wants to merge 6 commits into
base: main
Choose a base branch
from
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
6 changes: 6 additions & 0 deletions docs/html/topics/secure-installs.md
Original file line number Diff line number Diff line change
Expand Up @@ -43,6 +43,12 @@ FooProject == 1.2 \

This prevents a surprising hash mismatch upon the release of a new version that matches the requirement specifier.

```{versionadded} 23.2
VCS URLs that reference a commit hash are now supported in hash checking mode,
since pip now trusts that the VCS provides the required source immutability guarantees.
This is currently only supported with `git` URLs.
```

### Forcing Hash-checking mode

It is possible to force the hash checking mode to be enabled, by passing `--require-hashes` command-line option.
Expand Down
1 change: 1 addition & 0 deletions news/6469.feature.rst
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
Validate VCS urls in hash-checking mode using their commit hashes.
26 changes: 26 additions & 0 deletions src/pip/_internal/exceptions.py
Original file line number Diff line number Diff line change
Expand Up @@ -501,6 +501,32 @@ class VcsHashUnsupported(HashError):
)


class CacheEntryTypeHashNotSupported(HashError):
"""A wheel cache entry was built from a URL that does not support hash checking."""

order = 0
head = (
"Can't verify hashes for these cached requirements because they are "
"from a URL that does not support hash checking:"
)


class DependencyVcsHashNotSupported(HashError):
order = 0
head = (
"Can't verify hashes for these VCS requirements because they are "
"not user supplied, so we don't assume their VCS ref is trusted:"
)


class MutableVcsRefHashNotSupported(HashError):
order = 0
head = (
"Can't verify hashes for these VCS requirements because their ref "
"is not immutable:"
)


class DirectoryUrlHashUnsupported(HashError):
"""A hash was provided for a version-control-system-based requirement, but
we don't have a method for hashing those."""
Expand Down
68 changes: 47 additions & 21 deletions src/pip/_internal/operations/prepare.py
Original file line number Diff line number Diff line change
Expand Up @@ -15,18 +15,21 @@
from pip._internal.distributions import make_distribution_for_install_requirement
from pip._internal.distributions.installed import InstalledDistribution
from pip._internal.exceptions import (
CacheEntryTypeHashNotSupported,
DependencyVcsHashNotSupported,
DirectoryUrlHashUnsupported,
HashMismatch,
HashUnpinned,
InstallationError,
MetadataInconsistent,
MutableVcsRefHashNotSupported,
NetworkConnectionError,
PreviousBuildDirError,
VcsHashUnsupported,
)
from pip._internal.index.package_finder import PackageFinder
from pip._internal.metadata import BaseDistribution, get_metadata_distribution
from pip._internal.models.direct_url import ArchiveInfo
from pip._internal.models.direct_url import ArchiveInfo, VcsInfo
from pip._internal.models.link import Link
from pip._internal.models.wheel import Wheel
from pip._internal.network.download import BatchDownloader, Downloader
Expand All @@ -41,7 +44,7 @@
direct_url_for_editable,
direct_url_from_link,
)
from pip._internal.utils.hashes import Hashes, MissingHashes
from pip._internal.utils.hashes import Hashes, MissingHashes, VcsHashes
from pip._internal.utils.logging import indent_log
from pip._internal.utils.misc import (
display_path,
Expand Down Expand Up @@ -72,10 +75,14 @@ def _get_prepared_distribution(
return abstract_dist.get_metadata_distribution()


def unpack_vcs_link(link: Link, location: str, verbosity: int) -> None:
def unpack_vcs_link(
link: Link, location: str, verbosity: int, hashes: Optional[Hashes] = None
) -> None:
vcs_backend = vcs.get_backend_for_scheme(link.scheme)
assert vcs_backend is not None
vcs_backend.unpack(location, url=hide_url(link.url), verbosity=verbosity)
if hashes and not vcs_backend.is_immutable_rev_checkout(link.url, location):
raise MutableVcsRefHashNotSupported()


class File:
Expand Down Expand Up @@ -152,7 +159,7 @@ def unpack_url(
"""
# non-editable vcs urls
if link.is_vcs:
unpack_vcs_link(link, location, verbosity=verbosity)
unpack_vcs_link(link, location, verbosity=verbosity, hashes=hashes)
return None

assert not link.is_existing_dir()
Expand Down Expand Up @@ -335,6 +342,14 @@ def _get_linked_req_hashes(self, req: InstallRequirement) -> Hashes:
# and raise some more informative errors than otherwise.
# (For example, we can raise VcsHashUnsupported for a VCS URL
# rather than HashMissing.)

# Check that --hash is not used with VCS and local directory direct URLs.
if req.original_link:
if req.original_link.is_vcs and req.hashes(trust_internet=False):
raise VcsHashUnsupported()
if req.original_link.is_existing_dir() and req.hashes(trust_internet=False):
raise DirectoryUrlHashUnsupported()

if not self.require_hashes:
return req.hashes(trust_internet=True)

Expand All @@ -343,7 +358,9 @@ def _get_linked_req_hashes(self, req: InstallRequirement) -> Hashes:
# report less-useful error messages for unhashable
# requirements, complaining that there's no hash provided.
if req.link.is_vcs:
raise VcsHashUnsupported()
if not req.user_supplied:
raise DependencyVcsHashNotSupported()
return VcsHashes()
if req.link.is_existing_dir():
raise DirectoryUrlHashUnsupported()
Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Note to self: when req.is_wheel_from_cache is True, link is the cached wheel and not the link to the artifact that was downloaded to build the wheel. So this function may not necessarily do what one expects when reasoning on req.link.


Expand Down Expand Up @@ -559,24 +576,33 @@ def _prepare_linked_requirement(
assert link.is_file
# We need to verify hashes, and we have found the requirement in the cache
# of locally built wheels.
if (
isinstance(req.download_info.info, ArchiveInfo)
and req.download_info.info.hashes
and hashes.has_one_of(req.download_info.info.hashes)
):
# At this point we know the requirement was built from a hashable source
# artifact, and we verified that the cache entry's hash of the original
# artifact matches one of the hashes we expect. We don't verify hashes
# against the cached wheel, because the wheel is not the original.
if isinstance(req.download_info.info, ArchiveInfo):
if req.download_info.info.hashes and hashes.has_one_of(
req.download_info.info.hashes
):
# At this point we know the requirement was built from a hashable
# source artifact, and we verified that the cache entry's hash of
# the original artifact matches one of the hashes we expect. We
# don't verify hashes against the cached wheel, because the wheel is
# not the original.
hashes = None
else:
logger.warning(
"The hashes of the source archive found in cache entry "
"don't match, ignoring cached built wheel "
"and re-downloading source."
)
req.link = req.cached_wheel_source_link
link = req.link
elif isinstance(req.download_info.info, VcsInfo):
if not req.user_supplied:
raise DependencyVcsHashNotSupported()
Comment on lines +597 to +599
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I wonder if this would break anything. How can this branch be reached?

Copy link
Member Author

@sbidoul sbidoul Apr 17, 2023

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This would raise in require-hashes mode only, if you have a dependency as a git direct URL that is obtained from cache.

I think these should not be considered hash-valid unless the commit hash has been provided by the user, either as a requirement file or a constraint.

This PR still needs work. Next step is adding a lot of test coverage. Then possibly some refactoring.

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

If an immutable VCS URL is both user-supplied and a dependency, would this cause the latter specifier break installation?

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

It should not, because when the dependency is examined by the resolver it is "merged" with the user-supplied one? That's definitely a test case to consider.

# Don't verify hashes against the cached wheel: if it is in cache,
# it means it was built from a URL referencing an immutable commit
# hash.
hashes = None
else:
logger.warning(
"The hashes of the source archive found in cache entry "
"don't match, ignoring cached built wheel "
"and re-downloading source."
)
req.link = req.cached_wheel_source_link
link = req.link
raise CacheEntryTypeHashNotSupported()

self._ensure_link_req_src_dir(req, parallel_builds)

Expand Down
2 changes: 1 addition & 1 deletion src/pip/_internal/req/req_install.py
Original file line number Diff line number Diff line change
Expand Up @@ -275,7 +275,7 @@ def has_hash_options(self) -> bool:
"""
return bool(self.hash_options)

def hashes(self, trust_internet: bool = True) -> Hashes:
def hashes(self, *, trust_internet: bool) -> Hashes:
"""Return a hash-comparer that considers my option- and URL-based
hashes to be known-good.

Expand Down
10 changes: 10 additions & 0 deletions src/pip/_internal/utils/hashes.py
Original file line number Diff line number Diff line change
Expand Up @@ -149,3 +149,13 @@ def __init__(self) -> None:

def _raise(self, gots: Dict[str, "_Hash"]) -> "NoReturn":
raise HashMissing(gots[FAVORITE_HASH].hexdigest())


class VcsHashes(MissingHashes):
"""A workalike for Hashes used for VCS references

It never matches, and is used as a sentinel to indicate that we should
check the VCS reference is an immutable commit reference.
"""

pass
Comment on lines +160 to +161
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Suggested change
pass

4 changes: 2 additions & 2 deletions tests/unit/test_req.py
Original file line number Diff line number Diff line change
Expand Up @@ -206,7 +206,7 @@ def test_unsupported_hashes(self, data: TestData) -> None:
reqset = RequirementSet()
reqset.add_unnamed_requirement(
get_processed_req_from_line(
"git+git://github.com/pypa/pip-test-package --hash=sha256:123",
"git+https://github.com/pypa/pip-test-package --hash=sha256:123",
lineno=1,
)
)
Expand All @@ -229,7 +229,7 @@ def test_unsupported_hashes(self, data: TestData) -> None:
match=(
r"Can't verify hashes for these requirements because we don't "
r"have a way to hash version control repositories:\n"
r" git\+git://github\.com/pypa/pip-test-package \(from -r "
r" git\+https://github\.com/pypa/pip-test-package \(from -r "
r"file \(line 1\)\)\n"
r"Can't verify hashes for these file:// requirements because "
r"they point to directories:\n"
Expand Down