Skip to content

Fix #5874: Filter out candidates with hash mismatches. #6464

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Closed
wants to merge 7 commits into from
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions news/5874.feature
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
Only select candidates in hash-checking mode that will pass hash checks.
46 changes: 37 additions & 9 deletions src/pip/_internal/index.py
Original file line number Diff line number Diff line change
Expand Up @@ -51,6 +51,7 @@
from pip._internal.pep425tags import Pep425Tag
from pip._internal.req import InstallRequirement
from pip._internal.download import PipSession
from pip._internal.utils.hashes import Hashes

SecureOrigin = Tuple[str, str, Optional[str]]
BuildTag = Tuple[Any, ...] # either empty tuple or Tuple[int, str]
Expand Down Expand Up @@ -309,16 +310,41 @@ def _sort_key(self, candidate):
pri = -(support_num)
return (binary_preference, candidate.version, build_tag, pri)

def get_best_candidate(self, candidates):
# type: (List[InstallationCandidate]) -> InstallationCandidate
def get_best_candidate(self, candidates, hashes=None):
# type: (List[InstallationCandidate], Optional[Hashes]) -> Optional[InstallationCandidate] # noqa: E501
"""
Return the best candidate per the instance's sort order, or None if
no candidates are given.
Return the best candidate per the instance's sort order, ignoring
any that do not match the provided hashes in hash-checking mode.
Returns None if no candidates are given or none match the hashes.
"""
if not candidates:
return None

return max(candidates, key=self._sort_key)
# If we are in hash-checking mode, filter out candidates that will
# fail the hash check per the hash provided in their Link URL to
# prevent HashMismatch errors. However, if no hashes are provided
# we don't want to filter out all candidates, but instead let
# a HashMissing error get raised later.
# This is not a security check: after download the contents will
# be hashed and compared to the known-good hashes.
if not hashes:
return max(candidates, self._sort_key)

candidates = sorted(candidates, key=self._sort_key, reverse=True)
for candidate in candidates:
link = candidate.location
if not link.hash:
# Candidates with no hash in their URLs probably still match
# the provided hashes, so we shouldn't filter them out.
return candidate
if hashes.test_against_hash(link.hash_name, link.hash):
return candidate
logger.warning(
"candidate %s ignored: hash %s:%s not among provided hashes",
link.filename, link.hash_name, link.hash,
)

return None


class FoundCandidates(object):
Expand Down Expand Up @@ -386,13 +412,13 @@ def iter_applicable(self):
# Again, converting version to str to deal with debundling.
return (c for c in self.iter_all() if str(c.version) in self._versions)

def get_best(self):
# type: () -> Optional[InstallationCandidate]
def get_best(self, hashes=None):
# type: (Optional[Hashes]) -> Optional[InstallationCandidate]
"""Return the best candidate available, or None if no applicable
candidates are found.
"""
candidates = list(self.iter_applicable())
return self._evaluator.get_best_candidate(candidates)
return self._evaluator.get_best_candidate(candidates, hashes=hashes)


class PackageFinder(object):
Expand Down Expand Up @@ -764,7 +790,9 @@ def find_requirement(self, req, upgrade):
Raises DistributionNotFound or BestVersionAlreadyInstalled otherwise
"""
candidates = self.find_candidates(req.name, req.specifier)
best_candidate = candidates.get_best()
# Get any hashes supplied by the user to filter candidates.
hashes = req.hashes(trust_internet=False)
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Conceptually, there should be a difference between supplying --require-hashes (i.e. hash mode with an empty hash list) and not (not hash mode). candidates.get_best() should only receieve the hash comparer in hash mode, and None otherwise (and get_best probably needs to check for None instead of a falsy hash comparer).

Copy link
Author

@alexbecker alexbecker May 9, 2019

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I think get_best should not filter on falsy hashes. If I supply --require-hashes without any hashes and filter on hashes, this filters out all candidates and results in the following error:

ERROR: Could not find a version that satisfies the requirement <package_name> (from versions: ...)

If I don't filter on falsy hashes then using --require-hashes without any hashes results in:

ERROR: Hashes are required in --require-hashes mode, but they are missing from some requirements. Here is a list of those requirements along with the hashes their downloaded archives actually had. Add lines like these to your requirements files to prevent tampering. (If you did not enable --require-hashes manually, note that it turns on automatically when any package has a hash.)
    tox==3.0 --hash=sha256:9ee7de958a43806402a38c0d2aa07fa8553f4d2c20a15b140e9f771c2afeade0

I think the latter is much more helpful, and clearly some work went into it via the MissingHashes class.

Copy link
Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Although perhaps this is a better place to implement MissingHashes logic?

Copy link
Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Tried replacing MissingHashes with a raise in get_best. It works, except that it makes the error about missing hashes take precedence over the error about not having versions pinned. Currently not having versions pinned in hash mode takes precedence. I am not sure whether this order of precedence is intentional.

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Maybe it’s a good idea to use MissingHashes here (but I don’t think you can just move it here?)

Copy link
Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I think I would prefer to not filter, with a comment about how if hashes is None, an error will be raised in unpack_url which explains what hash to pin to. If desired, in a future PR I could refactor away MissingHashes and raise the same error in get_best instead (which is what I meant by "perhaps this is a better place to implement MissingHashes logic"). But I'd rather not mix refactoring with feature work, and it would require some care to get the order of precedence for error messages right (there's a lot of error handling logic right now in RequirementPreparer.prepare_linked_requirement which would need to get moved around).

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Do you have a plan how the refactoring would be done? If so, maybe it would be better to refactor first, and implement the feature on top of that.

Copy link
Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

It turns out that such a refactoring has a downside the current implementation of MissingHashes will provide the correct hash to the user even if there is no remote hash, whereas if I were to move the raise into get_best it would happen before the candidate is downloaded and so I could only provide the hash to the user if it was provided by the index. So I think it is not worth doing.

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Alright then, let’s work on what we have and improve (if possible) in the future.

best_candidate = candidates.get_best(hashes)

installed_version = None # type: Optional[_BaseVersion]
if req.satisfied_by is not None:
Expand Down
5 changes: 5 additions & 0 deletions src/pip/_internal/utils/hashes.py
Original file line number Diff line number Diff line change
Expand Up @@ -86,6 +86,11 @@ def check_against_path(self, path):
with open(path, 'rb') as file:
return self.check_against_file(file)

def test_against_hash(self, hash_name, hash_value):
# type (str, str) -> bool
"""Return whether a given hash is among the known-good hashes."""
return hash_value in self._allowed.get(hash_name, [])

def __nonzero__(self):
# type: () -> bool
"""Return whether I know any known-good hashes."""
Expand Down
7 changes: 5 additions & 2 deletions tests/unit/test_req.py
Original file line number Diff line number Diff line change
Expand Up @@ -115,7 +115,9 @@ def test_missing_hash_checking(self):
))
# This flag activates --require-hashes mode:
reqset.add_requirement(get_processed_req_from_line(
'tracefront==0.1 --hash=sha256:somehash', lineno=2,
'tracefront==0.1 --hash=sha256:d74cd6119a883bf15e2bae63ac0fea33b48'
'45e1a76b82aad586537d92a25bb51',
lineno=2,
))
# This hash should be accepted because it came from the reqs file, not
# from the internet:
Expand Down Expand Up @@ -145,7 +147,8 @@ def test_missing_hash_checking(self):
r' blessings==1.0 --hash=sha256:[0-9a-f]+\n'
r'THESE PACKAGES DO NOT MATCH THE HASHES.*\n'
r' tracefront==0.1 .*:\n'
r' Expected sha256 somehash\n'
r' Expected sha256 d74cd6119a883bf15e2bae63ac0fea33b4845e1a'
r'76b82aad586537d92a25bb51\n'
r' Got [0-9a-f]+$',
resolver.resolve,
reqset
Expand Down