From 3a2ee333a91dcb7f947b714760e0b5832b311f29 Mon Sep 17 00:00:00 2001 From: Chris Jerdonek Date: Fri, 5 Jul 2019 00:42:51 -0700 Subject: [PATCH 1/5] Add Hashes.is_hash_allowed(). --- src/pip/_internal/utils/hashes.py | 8 ++++++++ tests/unit/test_utils.py | 16 ++++++++++++++++ 2 files changed, 24 insertions(+) diff --git a/src/pip/_internal/utils/hashes.py b/src/pip/_internal/utils/hashes.py index a7142069fab..5ffa330181c 100644 --- a/src/pip/_internal/utils/hashes.py +++ b/src/pip/_internal/utils/hashes.py @@ -44,6 +44,14 @@ def __init__(self, hashes=None): """ self._allowed = {} if hashes is None else hashes + def is_hash_allowed( + self, + hash_name, # type: str + hex_digest, # type: str + ): + """Return whether the given hex digest is allowed.""" + return hex_digest in self._allowed.get(hash_name, []) + def check_against_chunks(self, chunks): # type: (Iterator[bytes]) -> None """Check good hashes against ones built from iterable of chunks of diff --git a/tests/unit/test_utils.py b/tests/unit/test_utils.py index 903b0ec594a..290292aaab9 100644 --- a/tests/unit/test_utils.py +++ b/tests/unit/test_utils.py @@ -442,6 +442,22 @@ def test_resolve_symlinks(self, tmpdir): class TestHashes(object): """Tests for pip._internal.utils.hashes""" + @pytest.mark.parametrize('hash_name, hex_digest, expected', [ + # Test a value that matches but with the wrong hash_name. + ('sha384', 128 * 'a', False), + # Test matching values, including values other than the first. + ('sha512', 128 * 'a', True), + ('sha512', 128 * 'b', True), + # Test a matching hash_name with a value that doesn't match. + ('sha512', 128 * 'c', False), + ]) + def test_is_hash_allowed(self, hash_name, hex_digest, expected): + hashes_data = { + 'sha512': [128 * 'a', 128 * 'b'], + } + hashes = Hashes(hashes_data) + assert hashes.is_hash_allowed(hash_name, hex_digest) == expected + def test_success(self, tmpdir): """Make sure no error is raised when at least one hash matches. From e85a848ec8c9bd62ed05bc4d04bf33758df0ec48 Mon Sep 17 00:00:00 2001 From: Chris Jerdonek Date: Fri, 5 Jul 2019 00:48:17 -0700 Subject: [PATCH 2/5] Add Link.is_hash_allowed(). --- src/pip/_internal/models/link.py | 15 ++++++++++++++ tests/unit/test_link.py | 34 ++++++++++++++++++++++++++++++++ 2 files changed, 49 insertions(+) diff --git a/src/pip/_internal/models/link.py b/src/pip/_internal/models/link.py index 54c97f5263d..378e94797bb 100644 --- a/src/pip/_internal/models/link.py +++ b/src/pip/_internal/models/link.py @@ -13,6 +13,7 @@ if MYPY_CHECK_RUNNING: from typing import Optional, Text, Tuple, Union from pip._internal.index import HTMLPage + from pip._internal.utils.hashes import Hashes class Link(KeyBasedCompareMixin): @@ -193,3 +194,17 @@ def is_artifact(self): def is_yanked(self): # type: () -> bool return self.yanked_reason is not None + + @property + def has_hash(self): + return self.hash_name is not None + + def is_hash_allowed(self, hashes): + # type: (Hashes) -> bool + """ + Return True if the link has a hash and it is allowed. + """ + if not self.has_hash: + return False + + return hashes.is_hash_allowed(self.hash_name, hex_digest=self.hash) diff --git a/tests/unit/test_link.py b/tests/unit/test_link.py index edbbe84624b..a1d8d40a8bc 100644 --- a/tests/unit/test_link.py +++ b/tests/unit/test_link.py @@ -1,6 +1,7 @@ import pytest from pip._internal.models.link import Link +from pip._internal.utils.hashes import Hashes class TestLink: @@ -83,3 +84,36 @@ def test_is_yanked(self, yanked_reason, expected): yanked_reason=yanked_reason, ) assert link.is_yanked == expected + + @pytest.mark.parametrize('hash_name, hex_digest, expected', [ + # Test a value that matches but with the wrong hash_name. + ('sha384', 128 * 'a', False), + # Test matching values, including values other than the first. + ('sha512', 128 * 'a', True), + ('sha512', 128 * 'b', True), + # Test a matching hash_name with a value that doesn't match. + ('sha512', 128 * 'c', False), + # Test a link without a hash value. + ('sha512', '', False), + ]) + def test_is_hash_allowed(self, hash_name, hex_digest, expected): + url = ( + 'https://example.com/wheel.whl#{hash_name}={hex_digest}'.format( + hash_name=hash_name, + hex_digest=hex_digest, + ) + ) + link = Link(url) + hashes_data = { + 'sha512': [128 * 'a', 128 * 'b'], + } + hashes = Hashes(hashes_data) + assert link.is_hash_allowed(hashes) == expected + + def test_is_hash_allowed__no_hash(self): + link = Link('https://example.com/wheel.whl') + hashes_data = { + 'sha512': [128 * 'a'], + } + hashes = Hashes(hashes_data) + assert not link.is_hash_allowed(hashes) From e80fc233ff0c927e581b6803024975a548e582bc Mon Sep 17 00:00:00 2001 From: Chris Jerdonek Date: Wed, 10 Jul 2019 22:53:16 -0700 Subject: [PATCH 3/5] Add filter_unallowed_hashes(). --- src/pip/_internal/index.py | 40 +++++++++++++++++++++++ tests/unit/test_index.py | 66 +++++++++++++++++++++++++++----------- 2 files changed, 88 insertions(+), 18 deletions(-) diff --git a/src/pip/_internal/index.py b/src/pip/_internal/index.py index 1352de1a99e..e626ab73b67 100644 --- a/src/pip/_internal/index.py +++ b/src/pip/_internal/index.py @@ -50,6 +50,7 @@ from pip._internal.req import InstallRequirement from pip._internal.download import PipSession from pip._internal.pep425tags import Pep425Tag + from pip._internal.utils.hashes import Hashes BuildTag = Tuple[Any, ...] # either empty tuple or Tuple[int, str] CandidateSortingKey = ( @@ -440,6 +441,45 @@ def evaluate_link(self, link): return (True, version) +def filter_unallowed_hashes( + candidates, # type: List[InstallationCandidate] + hashes, # type: Hashes +): + # type: (...) -> List[InstallationCandidate] + """ + Filter out candidates whose hashes aren't allowed, and return a new + list of candidates. + + If at least one candidate has an allowed hash, then all candidates with + either an allowed hash or no hash specified are returned. Otherwise, + the given candidates are returned. + + Including the candidates with no hash specified when there is a match + allows a warning to be logged if there is a more preferred candidate + with no hash specified. Returning all candidates in the case of no + matches lets pip report the hash of the candidate that would otherwise + have been installed (e.g. permitting the user to more easily update + their requirements file with the desired hash). + """ + applicable = [] + found_allowed_hash = False + for candidate in candidates: + link = candidate.location + if not link.has_hash: + applicable.append(candidate) + continue + + if link.is_hash_allowed(hashes=hashes): + found_allowed_hash = True + applicable.append(candidate) + + if found_allowed_hash: + return applicable + + # Make sure we're not returning back the given value. + return list(candidates) + + class CandidatePreferences(object): """ diff --git a/tests/unit/test_index.py b/tests/unit/test_index.py index d6ebf939d3e..271c6bf8236 100644 --- a/tests/unit/test_index.py +++ b/tests/unit/test_index.py @@ -11,16 +11,29 @@ CandidateEvaluator, CandidatePreferences, FormatControl, HTMLPage, Link, LinkEvaluator, PackageFinder, _check_link_requires_python, _clean_link, _determine_base_url, _extract_version_from_fragment, - _find_name_version_sep, _get_html_page, + _find_name_version_sep, _get_html_page, filter_unallowed_hashes, ) from pip._internal.models.candidate import InstallationCandidate from pip._internal.models.search_scope import SearchScope from pip._internal.models.selection_prefs import SelectionPreferences from pip._internal.models.target_python import TargetPython from pip._internal.pep425tags import get_supported +from pip._internal.utils.hashes import Hashes from tests.lib import CURRENT_PY_VERSION_INFO, make_test_finder +def make_mock_candidate(version, yanked_reason=None, hex_digest=None): + url = 'https://example.com/pkg-{}.tar.gz'.format(version) + if hex_digest is not None: + assert len(hex_digest) == 64 + url += '#sha256={}'.format(hex_digest) + + link = Link(url, yanked_reason=yanked_reason) + candidate = InstallationCandidate('mypackage', version, link) + + return candidate + + @pytest.mark.parametrize('requires_python, expected', [ ('== 3.6.4', False), ('== 3.6.5', True), @@ -170,6 +183,30 @@ def test_evaluate_link__incompatible_wheel(self): assert actual == expected +@pytest.mark.parametrize('hex_digest, expected_versions', [ + (None, ['1.0', '1.1', '1.2']), + (64 * 'a', ['1.0', '1.1']), + (64 * 'b', ['1.0', '1.2']), + (64 * 'c', ['1.0', '1.1', '1.2']), +]) +def test_filter_unallowed_hashes(hex_digest, expected_versions): + candidates = [ + make_mock_candidate('1.0'), + make_mock_candidate('1.1', hex_digest=(64 * 'a')), + make_mock_candidate('1.2', hex_digest=(64 * 'b')), + ] + hashes_data = { + 'sha256': [hex_digest], + } + hashes = Hashes(hashes_data) + actual = filter_unallowed_hashes(candidates, hashes=hashes) + + actual_versions = [str(candidate.version) for candidate in actual] + assert actual_versions == expected_versions + # Check that the return value is always different from the given value. + assert actual is not candidates + + class TestCandidateEvaluator: @pytest.mark.parametrize('allow_all_prereleases, prefer_binary', [ @@ -198,18 +235,11 @@ def test_create__target_python_none(self): expected_tags = get_supported() assert evaluator._supported_tags == expected_tags - def make_mock_candidate(self, version, yanked_reason=None): - url = 'https://example.com/pkg-{}.tar.gz'.format(version) - link = Link(url, yanked_reason=yanked_reason) - candidate = InstallationCandidate('mypackage', version, link) - - return candidate - def test_get_applicable_candidates(self): specifier = SpecifierSet('<= 1.11') versions = ['1.10', '1.11', '1.12'] candidates = [ - self.make_mock_candidate(version) for version in versions + make_mock_candidate(version) for version in versions ] evaluator = CandidateEvaluator.create() actual = evaluator.get_applicable_candidates( @@ -226,7 +256,7 @@ def test_make_found_candidates(self): specifier = SpecifierSet('<= 1.11') versions = ['1.10', '1.11', '1.12'] candidates = [ - self.make_mock_candidate(version) for version in versions + make_mock_candidate(version) for version in versions ] evaluator = CandidateEvaluator.create() found_candidates = evaluator.make_found_candidates( @@ -275,10 +305,10 @@ def test_get_best_candidate__all_yanked(self, caplog): Test all candidates yanked. """ candidates = [ - self.make_mock_candidate('1.0', yanked_reason='bad metadata #1'), + make_mock_candidate('1.0', yanked_reason='bad metadata #1'), # Put the best candidate in the middle, to test sorting. - self.make_mock_candidate('3.0', yanked_reason='bad metadata #3'), - self.make_mock_candidate('2.0', yanked_reason='bad metadata #2'), + make_mock_candidate('3.0', yanked_reason='bad metadata #3'), + make_mock_candidate('2.0', yanked_reason='bad metadata #2'), ] expected_best = candidates[1] evaluator = CandidateEvaluator.create() @@ -310,7 +340,7 @@ def test_get_best_candidate__yanked_reason( Test the log message with various reason strings. """ candidates = [ - self.make_mock_candidate('1.0', yanked_reason=yanked_reason), + make_mock_candidate('1.0', yanked_reason=yanked_reason), ] evaluator = CandidateEvaluator.create() actual = evaluator.get_best_candidate(candidates) @@ -332,11 +362,11 @@ def test_get_best_candidate__best_yanked_but_not_all(self, caplog): Test the best candidates being yanked, but not all. """ candidates = [ - self.make_mock_candidate('4.0', yanked_reason='bad metadata #4'), + make_mock_candidate('4.0', yanked_reason='bad metadata #4'), # Put the best candidate in the middle, to test sorting. - self.make_mock_candidate('2.0'), - self.make_mock_candidate('3.0', yanked_reason='bad metadata #3'), - self.make_mock_candidate('1.0'), + make_mock_candidate('2.0'), + make_mock_candidate('3.0', yanked_reason='bad metadata #3'), + make_mock_candidate('1.0'), ] expected_best = candidates[1] evaluator = CandidateEvaluator.create() From 9ddc89a21d5b7c4878fddb84857bf7bb9eda89c0 Mon Sep 17 00:00:00 2001 From: Chris Jerdonek Date: Wed, 10 Jul 2019 20:35:30 -0700 Subject: [PATCH 4/5] Pass the hashes when creating the CandidateEvaluator. --- src/pip/_internal/index.py | 23 ++++++++++++++++++----- tests/unit/test_index.py | 35 ++++++++++++++++++++++++++++++++++- 2 files changed, 52 insertions(+), 6 deletions(-) diff --git a/src/pip/_internal/index.py b/src/pip/_internal/index.py index e626ab73b67..267277f0bbb 100644 --- a/src/pip/_internal/index.py +++ b/src/pip/_internal/index.py @@ -513,6 +513,7 @@ def create( target_python=None, # type: Optional[TargetPython] prefer_binary=False, # type: bool allow_all_prereleases=False, # type: bool + hashes=None, # type: Optional[Hashes] ): # type: (...) -> CandidateEvaluator """Create a CandidateEvaluator object. @@ -520,6 +521,7 @@ def create( :param target_python: The target Python interpreter to use when checking compatibility. If None (the default), a TargetPython object will be constructed from the running Python. + :param hashes: An optional collection of allowed hashes. """ if target_python is None: target_python = TargetPython() @@ -530,6 +532,7 @@ def create( supported_tags=supported_tags, prefer_binary=prefer_binary, allow_all_prereleases=allow_all_prereleases, + hashes=hashes, ) def __init__( @@ -537,6 +540,7 @@ def __init__( supported_tags, # type: List[Pep425Tag] prefer_binary=False, # type: bool allow_all_prereleases=False, # type: bool + hashes=None, # type: Optional[Hashes] ): # type: (...) -> None """ @@ -544,6 +548,7 @@ def __init__( Python in order of preference (most preferred first). """ self._allow_all_prereleases = allow_all_prereleases + self._hashes = hashes self._prefer_binary = prefer_binary self._supported_tags = supported_tags @@ -576,7 +581,10 @@ def get_applicable_candidates( applicable_candidates = [ c for c in candidates if str(c.version) in versions ] - return applicable_candidates + + return filter_unallowed_hashes( + candidates=applicable_candidates, hashes=self._hashes, + ) def make_found_candidates( self, @@ -1089,8 +1097,8 @@ def find_all_candidates(self, project_name): # This is an intentional priority ordering return file_versions + find_links_versions + page_versions - def make_candidate_evaluator(self): - # type: (...) -> CandidateEvaluator + def make_candidate_evaluator(self, hashes=None): + # type: (Optional[Hashes]) -> CandidateEvaluator """Create a CandidateEvaluator object to use. """ candidate_prefs = self._candidate_prefs @@ -1098,12 +1106,14 @@ def make_candidate_evaluator(self): target_python=self._target_python, prefer_binary=candidate_prefs.prefer_binary, allow_all_prereleases=candidate_prefs.allow_all_prereleases, + hashes=hashes, ) def find_candidates( self, project_name, # type: str specifier=None, # type: Optional[specifiers.BaseSpecifier] + hashes=None, # type: Optional[Hashes] ): # type: (...) -> FoundCandidates """Find matches for the given project and specifier. @@ -1115,7 +1125,7 @@ def find_candidates( :return: A `FoundCandidates` instance. """ candidates = self.find_all_candidates(project_name) - candidate_evaluator = self.make_candidate_evaluator() + candidate_evaluator = self.make_candidate_evaluator(hashes=hashes) return candidate_evaluator.make_found_candidates( candidates, specifier=specifier, ) @@ -1128,7 +1138,10 @@ def find_requirement(self, req, upgrade): Returns a Link if found, Raises DistributionNotFound or BestVersionAlreadyInstalled otherwise """ - candidates = self.find_candidates(req.name, req.specifier) + hashes = req.hashes(trust_internet=False) + candidates = self.find_candidates( + req.name, specifier=req.specifier, hashes=hashes, + ) best_candidate = candidates.get_best() installed_version = None # type: Optional[_BaseVersion] diff --git a/tests/unit/test_index.py b/tests/unit/test_index.py index 271c6bf8236..82bebdd408d 100644 --- a/tests/unit/test_index.py +++ b/tests/unit/test_index.py @@ -252,6 +252,36 @@ def test_get_applicable_candidates(self): ] assert actual == expected_applicable + @pytest.mark.parametrize('specifier, expected_versions', [ + # Test no version constraint. + (SpecifierSet(), ['1.0', '1.2']), + # Test a version constraint that excludes the candidate whose + # hash matches. Then the non-allowed hash is a candidate. + (SpecifierSet('<= 1.1'), ['1.0', '1.1']), + ]) + def test_get_applicable_candidates__hashes( + self, specifier, expected_versions, + ): + """ + Test a non-None hashes value. + """ + # specifier = SpecifierSet('<= 1.1') + candidates = [ + make_mock_candidate('1.0'), + make_mock_candidate('1.1', hex_digest=(64 * 'a')), + make_mock_candidate('1.2', hex_digest=(64 * 'b')), + ] + hashes_data = { + 'sha256': [64 * 'b'], + } + hashes = Hashes(hashes_data) + evaluator = CandidateEvaluator.create(hashes=hashes) + actual = evaluator.get_applicable_candidates( + candidates, specifier=specifier, + ) + actual_versions = [str(c.version) for c in actual] + assert actual_versions == expected_versions + def test_make_found_candidates(self): specifier = SpecifierSet('<= 1.11') versions = ['1.10', '1.11', '1.12'] @@ -644,8 +674,11 @@ def test_make_candidate_evaluator( candidate_prefs=candidate_prefs, ) - evaluator = finder.make_candidate_evaluator() + # Pass hashes to check that _hashes is set. + hashes = Hashes({'sha256': [64 * 'a']}) + evaluator = finder.make_candidate_evaluator(hashes=hashes) assert evaluator._allow_all_prereleases == allow_all_prereleases + assert evaluator._hashes == hashes assert evaluator._prefer_binary == prefer_binary assert evaluator._supported_tags == [('py36', 'none', 'any')] From 74504fff6c688b2792c0f5748926ab4a30cd870f Mon Sep 17 00:00:00 2001 From: Chris Jerdonek Date: Thu, 11 Jul 2019 01:13:17 -0700 Subject: [PATCH 5/5] Prefer candidates with allowed hashes when sorting. --- news/5874.feature | 2 ++ src/pip/_internal/index.py | 16 ++++++++++++---- tests/unit/test_index.py | 32 ++++++++++++++++++++++++++------ 3 files changed, 40 insertions(+), 10 deletions(-) create mode 100644 news/5874.feature diff --git a/news/5874.feature b/news/5874.feature new file mode 100644 index 00000000000..844e3790fb3 --- /dev/null +++ b/news/5874.feature @@ -0,0 +1,2 @@ +When choosing candidates to install, prefer candidates with a hash matching +one of the user-provided hashes. diff --git a/src/pip/_internal/index.py b/src/pip/_internal/index.py index 267277f0bbb..ad634e1c24c 100644 --- a/src/pip/_internal/index.py +++ b/src/pip/_internal/index.py @@ -54,7 +54,7 @@ BuildTag = Tuple[Any, ...] # either empty tuple or Tuple[int, str] CandidateSortingKey = ( - Tuple[int, int, _BaseVersion, BuildTag, Optional[int]] + Tuple[int, int, int, _BaseVersion, BuildTag, Optional[int]] ) HTMLElement = xml.etree.ElementTree.Element SecureOrigin = Tuple[str, str, Optional[str]] @@ -624,8 +624,14 @@ def _sort_key(self, candidate): The preference is as follows: - First and foremost, yanked candidates (in the sense of PEP 592) are - always less preferred than candidates that haven't been yanked. Then: + First and foremost, candidates with allowed (matching) hashes are + always preferred over candidates without matching hashes. This is + because e.g. if the only candidate with an allowed hash is yanked, + we still want to use that candidate. + + Second, excepting hash considerations, candidates that have been + yanked (in the sense of PEP 592) are always less preferred than + candidates that haven't been yanked. Then: If not finding wheels, they are sorted by version only. If finding wheels, then the sort order is by version, then: @@ -660,9 +666,11 @@ def _sort_key(self, candidate): build_tag = (int(build_tag_groups[0]), build_tag_groups[1]) else: # sdist pri = -(support_num) + has_allowed_hash = int(link.is_hash_allowed(self._hashes)) yank_value = -1 * int(link.is_yanked) # -1 for yanked. return ( - yank_value, binary_preference, candidate.version, build_tag, pri, + has_allowed_hash, yank_value, binary_preference, candidate.version, + build_tag, pri, ) def get_best_candidate( diff --git a/tests/unit/test_index.py b/tests/unit/test_index.py index 82bebdd408d..a8a63396c74 100644 --- a/tests/unit/test_index.py +++ b/tests/unit/test_index.py @@ -302,6 +302,29 @@ def test_make_found_candidates(self): ] assert found_candidates._applicable_candidates == expected_applicable + @pytest.mark.parametrize('hex_digest, expected', [ + # Test a link with no hash. + (None, 0), + # Test a link with an allowed hash. + (64 * 'a', 1), + # Test a link with a hash that isn't allowed. + (64 * 'b', 0), + ]) + def test_sort_key__hash(self, hex_digest, expected): + """ + Test the effect of the link's hash on _sort_key()'s return value. + """ + candidate = make_mock_candidate('1.0', hex_digest=hex_digest) + hashes_data = { + 'sha256': [64 * 'a'], + } + hashes = Hashes(hashes_data) + evaluator = CandidateEvaluator.create(hashes=hashes) + sort_value = evaluator._sort_key(candidate) + # The hash is reflected in the first element of the tuple. + actual = sort_value[0] + assert actual == expected + @pytest.mark.parametrize('yanked_reason, expected', [ # Test a non-yanked file. (None, 0), @@ -312,14 +335,11 @@ def test_sort_key__is_yanked(self, yanked_reason, expected): """ Test the effect of is_yanked on _sort_key()'s return value. """ - url = 'https://example.com/mypackage.tar.gz' - link = Link(url, yanked_reason=yanked_reason) - candidate = InstallationCandidate('mypackage', '1.0', link) - + candidate = make_mock_candidate('1.0', yanked_reason=yanked_reason) evaluator = CandidateEvaluator.create() sort_value = evaluator._sort_key(candidate) - # Yanked / non-yanked is reflected in the first element of the tuple. - actual = sort_value[0] + # Yanked / non-yanked is reflected in the second element of the tuple. + actual = sort_value[1] assert actual == expected def test_get_best_candidate__no_candidates(self):