From c9065dbba2a25a22ae124c7d3f10b707c3311312 Mon Sep 17 00:00:00 2001 From: Alex Becker Date: Sun, 5 May 2019 16:12:33 -0700 Subject: [PATCH 1/7] Fix #5874: Filter out candidates with hash mismatches. Changes the behavior of `pip install` in hash-checking mode to filter out any candidates whose hashes (obtained via URL) do not match the hashes provided. This prevents a HashMismatch error when a more preferred binary distribution is upload for a release after a user pins the hashes for that release. Note that a second hash comparison is performed when the candidate is downloaded. This is important because the first check is not secure: it trusts that the hash in the URL is the same as the hash in the content, and it also does not error when the user is in hash-checking mode but has not provided a hash. --- news/5874.feature | 1 + src/pip/_internal/index.py | 25 ++++++++++++++++++++++--- src/pip/_internal/utils/hashes.py | 5 +++++ 3 files changed, 28 insertions(+), 3 deletions(-) create mode 100644 news/5874.feature diff --git a/news/5874.feature b/news/5874.feature new file mode 100644 index 00000000000..f66051af388 --- /dev/null +++ b/news/5874.feature @@ -0,0 +1 @@ +Only select candidates in hash-checking mode that will pass hash checks. diff --git a/src/pip/_internal/index.py b/src/pip/_internal/index.py index ff614b346c0..52c11d0221a 100644 --- a/src/pip/_internal/index.py +++ b/src/pip/_internal/index.py @@ -51,6 +51,7 @@ from pip._internal.pep425tags import Pep425Tag from pip._internal.req import InstallRequirement from pip._internal.download import PipSession + from pip._internal.utils import Hashes SecureOrigin = Tuple[str, str, Optional[str]] BuildTag = Tuple[Any, ...] # either empty tuple or Tuple[int, str] @@ -386,12 +387,28 @@ def iter_applicable(self): # Again, converting version to str to deal with debundling. return (c for c in self.iter_all() if str(c.version) in self._versions) - def get_best(self): - # type: () -> Optional[InstallationCandidate] + def get_best(self, hashes=None): + # type: (Optional[Hashes]) -> Optional[InstallationCandidate] """Return the best candidate available, or None if no applicable candidates are found. """ candidates = list(self.iter_applicable()) + if hashes: + # If we are in hash-checking mode, filter out candidates that will + # fail the hash check. This prevents HashMismatch errors when a new + # distribution is uploaded for an old release. + def test_against_hashes(candidate): + link = candidate.location + is_match = hashes.test_against_hash(link.hash_name, link.hash) + if not is_match: + logger.warning( + "candidate %s ignored: hash %s:%s not among provided " + "hashes", + link.filename, link.hash_name, link.hash, + ) + return is_match + + candidates = [c for c in candidates if test_against_hashes(c)] return self._evaluator.get_best_candidate(candidates) @@ -764,7 +781,9 @@ def find_requirement(self, req, upgrade): Raises DistributionNotFound or BestVersionAlreadyInstalled otherwise """ candidates = self.find_candidates(req.name, req.specifier) - best_candidate = candidates.get_best() + # Get any hashes supplied by the user to filter candidates. + hashes = req.hashes(trust_internet=False) + best_candidate = candidates.get_best(hashes) installed_version = None # type: Optional[_BaseVersion] if req.satisfied_by is not None: diff --git a/src/pip/_internal/utils/hashes.py b/src/pip/_internal/utils/hashes.py index a7142069fab..26045a876d9 100644 --- a/src/pip/_internal/utils/hashes.py +++ b/src/pip/_internal/utils/hashes.py @@ -86,6 +86,11 @@ def check_against_path(self, path): with open(path, 'rb') as file: return self.check_against_file(file) + def test_against_hash(self, hash_name, hash_value): + # type (str, str) -> bool + """Return whether a given hash is among the known-good hashes.""" + return hash_value in self._allowed.get(hash_name, []) + def __nonzero__(self): # type: () -> bool """Return whether I know any known-good hashes.""" From bcc8de3978d2c098dec1a53d30d501ba74e76f14 Mon Sep 17 00:00:00 2001 From: Alex Becker Date: Sun, 5 May 2019 17:12:33 -0700 Subject: [PATCH 2/7] handle candidates with no hash in their URL --- src/pip/_internal/index.py | 10 ++++++++-- 1 file changed, 8 insertions(+), 2 deletions(-) diff --git a/src/pip/_internal/index.py b/src/pip/_internal/index.py index 52c11d0221a..8c5310ab0fb 100644 --- a/src/pip/_internal/index.py +++ b/src/pip/_internal/index.py @@ -395,10 +395,16 @@ def get_best(self, hashes=None): candidates = list(self.iter_applicable()) if hashes: # If we are in hash-checking mode, filter out candidates that will - # fail the hash check. This prevents HashMismatch errors when a new - # distribution is uploaded for an old release. + # fail the hash check per the hash provided in their Link URL. + # This prevents HashMismatch errors when a new distribution is + # uploaded for an old release. + # This is not a security check: after download the contents will + # be hashed and compared to the known-good hashes. def test_against_hashes(candidate): link = candidate.location + if not link.hash: + # We can only filter candidates with hashes in their URLs. + return True is_match = hashes.test_against_hash(link.hash_name, link.hash) if not is_match: logger.warning( From b7042382e8533e780fa353197b0a1f2cb6f01b75 Mon Sep 17 00:00:00 2001 From: Alex Becker Date: Sun, 5 May 2019 17:13:11 -0700 Subject: [PATCH 3/7] fix fake hash causing broken test --- tests/unit/test_req.py | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) diff --git a/tests/unit/test_req.py b/tests/unit/test_req.py index 2f859b929aa..cb7c4a71258 100644 --- a/tests/unit/test_req.py +++ b/tests/unit/test_req.py @@ -115,7 +115,9 @@ def test_missing_hash_checking(self): )) # This flag activates --require-hashes mode: reqset.add_requirement(get_processed_req_from_line( - 'tracefront==0.1 --hash=sha256:somehash', lineno=2, + 'tracefront==0.1 --hash=sha256:d74cd6119a883bf15e2bae63ac0fea33b48' + '45e1a76b82aad586537d92a25bb51', + lineno=2, )) # This hash should be accepted because it came from the reqs file, not # from the internet: @@ -145,7 +147,8 @@ def test_missing_hash_checking(self): r' blessings==1.0 --hash=sha256:[0-9a-f]+\n' r'THESE PACKAGES DO NOT MATCH THE HASHES.*\n' r' tracefront==0.1 .*:\n' - r' Expected sha256 somehash\n' + r' Expected sha256 d74cd6119a883bf15e2bae63ac0fea33b4845e1a' + r'76b82aad586537d92a25bb51\n' r' Got [0-9a-f]+$', resolver.resolve, reqset From a97228615ca7deff5df9faaf3399c27ede60e4f4 Mon Sep 17 00:00:00 2001 From: Alex Becker Date: Sun, 5 May 2019 17:46:27 -0700 Subject: [PATCH 4/7] fix mypy import error --- src/pip/_internal/index.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/pip/_internal/index.py b/src/pip/_internal/index.py index 8c5310ab0fb..d2a09f96c32 100644 --- a/src/pip/_internal/index.py +++ b/src/pip/_internal/index.py @@ -51,7 +51,7 @@ from pip._internal.pep425tags import Pep425Tag from pip._internal.req import InstallRequirement from pip._internal.download import PipSession - from pip._internal.utils import Hashes + from pip._internal.utils.hashes import Hashes SecureOrigin = Tuple[str, str, Optional[str]] BuildTag = Tuple[Any, ...] # either empty tuple or Tuple[int, str] From 2785d08de6d92d66a77180c3517ae9c07e27c4cb Mon Sep 17 00:00:00 2001 From: Alex Becker Date: Thu, 9 May 2019 07:21:16 -0700 Subject: [PATCH 5/7] only warn when best candidate is skipped due to hash check --- src/pip/_internal/index.py | 24 ++++++++++++++++++------ 1 file changed, 18 insertions(+), 6 deletions(-) diff --git a/src/pip/_internal/index.py b/src/pip/_internal/index.py index d2a09f96c32..8ac33c4d66e 100644 --- a/src/pip/_internal/index.py +++ b/src/pip/_internal/index.py @@ -310,6 +310,12 @@ def _sort_key(self, candidate): pri = -(support_num) return (binary_preference, candidate.version, build_tag, pri) + def get_best_candidates(self, candidates): + """ + Return the candidates in descending order of preference. + """ + return sorted(candidates, key=self._sort_key, reverse=True) + def get_best_candidate(self, candidates): # type: (List[InstallationCandidate]) -> InstallationCandidate """ @@ -392,12 +398,13 @@ def get_best(self, hashes=None): """Return the best candidate available, or None if no applicable candidates are found. """ - candidates = list(self.iter_applicable()) + iter_candidates = self.iter_applicable() if hashes: # If we are in hash-checking mode, filter out candidates that will - # fail the hash check per the hash provided in their Link URL. - # This prevents HashMismatch errors when a new distribution is - # uploaded for an old release. + # fail the hash check per the hash provided in their Link URL to + # prevent HashMismatch errors. However, if no hashes are provided + # we don't want to filter out all candidates, but instead let + # a HashMissing error get raised later. # This is not a security check: after download the contents will # be hashed and compared to the known-good hashes. def test_against_hashes(candidate): @@ -414,8 +421,13 @@ def test_against_hashes(candidate): ) return is_match - candidates = [c for c in candidates if test_against_hashes(c)] - return self._evaluator.get_best_candidate(candidates) + candidates = self._evaluator.get_best_candidates(iter_candidates) + for c in candidates: + if test_against_hashes(c): + return c + else: + candidates = list(iter_candidates) + return self._evaluator.get_best_candidate(candidates) class PackageFinder(object): From 289e75e14b51c49a4ec69f3b49298f47982660b9 Mon Sep 17 00:00:00 2001 From: Alex Becker Date: Thu, 9 May 2019 07:40:54 -0700 Subject: [PATCH 6/7] remove local function test_against_hashes, it does not add clarity --- src/pip/_internal/index.py | 25 ++++++++++--------------- 1 file changed, 10 insertions(+), 15 deletions(-) diff --git a/src/pip/_internal/index.py b/src/pip/_internal/index.py index 8ac33c4d66e..45009311be3 100644 --- a/src/pip/_internal/index.py +++ b/src/pip/_internal/index.py @@ -407,24 +407,19 @@ def get_best(self, hashes=None): # a HashMissing error get raised later. # This is not a security check: after download the contents will # be hashed and compared to the known-good hashes. - def test_against_hashes(candidate): - link = candidate.location - if not link.hash: - # We can only filter candidates with hashes in their URLs. - return True - is_match = hashes.test_against_hash(link.hash_name, link.hash) - if not is_match: - logger.warning( - "candidate %s ignored: hash %s:%s not among provided " - "hashes", - link.filename, link.hash_name, link.hash, - ) - return is_match - candidates = self._evaluator.get_best_candidates(iter_candidates) for c in candidates: - if test_against_hashes(c): + link = c.location + if not link.hash: + # We can only filter candidates with hashes in their URLs. + return c + if hashes.test_against_hash(link.hash_name, link.hash): return c + logger.warning( + "candidate %s ignored: hash %s:%s not among provided " + "hashes", + link.filename, link.hash_name, link.hash, + ) else: candidates = list(iter_candidates) return self._evaluator.get_best_candidate(candidates) From a6ad9a618587969663d63e4838327e700d0f7ad2 Mon Sep 17 00:00:00 2001 From: Alex Becker Date: Sat, 25 May 2019 15:07:38 -0700 Subject: [PATCH 7/7] move filtering into get_best_candidate; remove get_best_candidates --- src/pip/_internal/index.py | 68 ++++++++++++++++++-------------------- 1 file changed, 32 insertions(+), 36 deletions(-) diff --git a/src/pip/_internal/index.py b/src/pip/_internal/index.py index 45009311be3..8800b18ced8 100644 --- a/src/pip/_internal/index.py +++ b/src/pip/_internal/index.py @@ -310,22 +310,41 @@ def _sort_key(self, candidate): pri = -(support_num) return (binary_preference, candidate.version, build_tag, pri) - def get_best_candidates(self, candidates): + def get_best_candidate(self, candidates, hashes=None): + # type: (List[InstallationCandidate], Optional[Hashes]) -> Optional[InstallationCandidate] # noqa: E501 """ - Return the candidates in descending order of preference. - """ - return sorted(candidates, key=self._sort_key, reverse=True) - - def get_best_candidate(self, candidates): - # type: (List[InstallationCandidate]) -> InstallationCandidate - """ - Return the best candidate per the instance's sort order, or None if - no candidates are given. + Return the best candidate per the instance's sort order, ignoring + any that do not match the provided hashes in hash-checking mode. + Returns None if no candidates are given or none match the hashes. """ if not candidates: return None - return max(candidates, key=self._sort_key) + # If we are in hash-checking mode, filter out candidates that will + # fail the hash check per the hash provided in their Link URL to + # prevent HashMismatch errors. However, if no hashes are provided + # we don't want to filter out all candidates, but instead let + # a HashMissing error get raised later. + # This is not a security check: after download the contents will + # be hashed and compared to the known-good hashes. + if not hashes: + return max(candidates, self._sort_key) + + candidates = sorted(candidates, key=self._sort_key, reverse=True) + for candidate in candidates: + link = candidate.location + if not link.hash: + # Candidates with no hash in their URLs probably still match + # the provided hashes, so we shouldn't filter them out. + return candidate + if hashes.test_against_hash(link.hash_name, link.hash): + return candidate + logger.warning( + "candidate %s ignored: hash %s:%s not among provided hashes", + link.filename, link.hash_name, link.hash, + ) + + return None class FoundCandidates(object): @@ -398,31 +417,8 @@ def get_best(self, hashes=None): """Return the best candidate available, or None if no applicable candidates are found. """ - iter_candidates = self.iter_applicable() - if hashes: - # If we are in hash-checking mode, filter out candidates that will - # fail the hash check per the hash provided in their Link URL to - # prevent HashMismatch errors. However, if no hashes are provided - # we don't want to filter out all candidates, but instead let - # a HashMissing error get raised later. - # This is not a security check: after download the contents will - # be hashed and compared to the known-good hashes. - candidates = self._evaluator.get_best_candidates(iter_candidates) - for c in candidates: - link = c.location - if not link.hash: - # We can only filter candidates with hashes in their URLs. - return c - if hashes.test_against_hash(link.hash_name, link.hash): - return c - logger.warning( - "candidate %s ignored: hash %s:%s not among provided " - "hashes", - link.filename, link.hash_name, link.hash, - ) - else: - candidates = list(iter_candidates) - return self._evaluator.get_best_candidate(candidates) + candidates = list(self.iter_applicable()) + return self._evaluator.get_best_candidate(candidates, hashes=hashes) class PackageFinder(object):