Skip to content

Commit 1e41f01

Browse files
committed
Add checks against requirements-file-dwelling hashes for most kinds of packages. Close #1175.
* Add --require-hashes option. This is handy in deployment scripts to force application authors to hash their requirements. It is also a convenient way to get pip to show computed hashes for a virgin, unhashed requirements file. Eventually, additions to `pip freeze` should fill a superset of this use case. * In --require-hashes mode, at least one hash is required to match for each requirement. * Option-based requirements (--sha256=...) turn on --require-hashes mode implicitly. * Internet-derived URL-based hashes are "necessary but not sufficient": they do not satisfy --require-hashes mode when they match, but they are still used to guard against transmission errors. * Other URL-based requirements (#md5=...) are treated just like flag-based ones, except they don't turn on --require-hashes. * Complain informatively, with the most devastating errors first so you don't chase your tail all day only to run up against a brick wall at the end. This also means we don't complain that a hash is missing, only for the user to find, after fixing it, that we have no idea how to even compute a hash for that type of requirement. * Complain about unpinned requirements when hash-checking mode is on, lest they cause the user surprise later. * Complain about missing hashes. * Complain about requirement types we don't know how to hash (like VCS ones and local dirs). * Have InstallRequirement keep its original Link around (original_link) so we can differentiate between URL hashes from requirements files and ones downloaded from the (untrustworthy) internet. * Remove test_download_hashes, which is obsolete. Similar coverage is provided in test_utils.TestHashes and the various hash cases in test_req.py.
1 parent 3303be0 commit 1e41f01

14 files changed

+777
-378
lines changed

pip/commands/install.py

Lines changed: 10 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -159,6 +159,15 @@ def __init__(self, *args, **kw):
159159

160160
cmd_opts.add_option(cmdoptions.no_clean())
161161

162+
cmd_opts.add_option(
163+
'--require-hashes',
164+
dest='require_hashes',
165+
action='store_true',
166+
help='Perform a provably repeatable installation by requiring a '
167+
'hash to check each package against. Implied by the presence '
168+
'of a hash flag, like --sha256, on any individual '
169+
'requirement')
170+
162171
index_opts = cmdoptions.make_option_group(
163172
cmdoptions.index_group,
164173
self.parser,
@@ -266,6 +275,7 @@ def run(self, options, args):
266275
pycompile=options.compile,
267276
isolated=options.isolated_mode,
268277
wheel_cache=wheel_cache,
278+
require_hashes=options.require_hashes,
269279
)
270280

271281
self.populate_requirement_set(

pip/download.py

Lines changed: 57 additions & 71 deletions
Original file line numberDiff line numberDiff line change
@@ -29,7 +29,7 @@
2929
from pip.models import PyPI
3030
from pip.utils import (splitext, rmtree, format_size, display_path,
3131
backup_dir, ask_path_exists, unpack_file,
32-
call_subprocess, ARCHIVE_EXTENSIONS)
32+
call_subprocess, ARCHIVE_EXTENSIONS, consume)
3333
from pip.utils.filesystem import check_path_owner
3434
from pip.utils.logging import indent_log
3535
from pip.utils.ui import DownloadProgressBar, DownloadProgressSpinner
@@ -485,57 +485,22 @@ def is_file_url(link):
485485
return link.url.lower().startswith('file:')
486486

487487

488-
def _check_hash(download_hash, link):
489-
if download_hash.digest_size != hashlib.new(link.hash_name).digest_size:
490-
logger.critical(
491-
"Hash digest size of the package %d (%s) doesn't match the "
492-
"expected hash name %s!",
493-
download_hash.digest_size, link, link.hash_name,
494-
)
495-
raise HashMismatch('Hash name mismatch for package %s' % link)
496-
if download_hash.hexdigest() != link.hash:
497-
logger.critical(
498-
"Hash of the package %s (%s) doesn't match the expected hash %s!",
499-
link, download_hash.hexdigest(), link.hash,
500-
)
501-
raise HashMismatch(
502-
'Bad %s hash for package %s' % (link.hash_name, link)
503-
)
488+
def is_dir_url(link):
489+
"""Return whether a file:// Link points to a directory.
504490
491+
``link`` must not have any other scheme but file://. Call is_file_url()
492+
first.
505493
506-
def _get_hash_from_file(target_file, link):
507-
try:
508-
download_hash = hashlib.new(link.hash_name)
509-
except (ValueError, TypeError):
510-
logger.warning(
511-
"Unsupported hash name %s for package %s", link.hash_name, link,
512-
)
513-
return None
514-
515-
with open(target_file, 'rb') as fp:
516-
while True:
517-
chunk = fp.read(4096)
518-
if not chunk:
519-
break
520-
download_hash.update(chunk)
521-
return download_hash
494+
"""
495+
link_path = url_to_path(link.url_without_fragment)
496+
return os.path.isdir(link_path)
522497

523498

524499
def _progress_indicator(iterable, *args, **kwargs):
525500
return iterable
526501

527502

528-
def _download_url(resp, link, content_file):
529-
download_hash = None
530-
if link.hash and link.hash_name:
531-
try:
532-
download_hash = hashlib.new(link.hash_name)
533-
except ValueError:
534-
logger.warning(
535-
"Unsupported hash name %s for package %s",
536-
link.hash_name, link,
537-
)
538-
503+
def _download_url(resp, link, content_file, hashes):
539504
try:
540505
total_length = int(resp.headers['content-length'])
541506
except (ValueError, KeyError, TypeError):
@@ -593,6 +558,11 @@ def resp_read(chunk_size):
593558
break
594559
yield chunk
595560

561+
def written_chunks(chunks):
562+
for chunk in chunks:
563+
content_file.write(chunk)
564+
yield chunk
565+
596566
progress_indicator = _progress_indicator
597567

598568
if link.netloc == PyPI.netloc:
@@ -614,13 +584,12 @@ def resp_read(chunk_size):
614584

615585
logger.debug('Downloading from URL %s', link)
616586

617-
for chunk in progress_indicator(resp_read(4096), 4096):
618-
if download_hash is not None:
619-
download_hash.update(chunk)
620-
content_file.write(chunk)
621-
if link.hash and link.hash_name:
622-
_check_hash(download_hash, link)
623-
return download_hash
587+
downloaded_chunks = written_chunks(progress_indicator(resp_read(4096),
588+
4096))
589+
if hashes:
590+
hashes.check_against_chunks(downloaded_chunks)
591+
else:
592+
consume(downloaded_chunks)
624593

625594

626595
def _copy_file(filename, location, content_type, link):
@@ -648,7 +617,11 @@ def _copy_file(filename, location, content_type, link):
648617
logger.info('Saved %s', display_path(download_location))
649618

650619

651-
def unpack_http_url(link, location, download_dir=None, session=None):
620+
def unpack_http_url(link,
621+
location,
622+
download_dir=None,
623+
session=None,
624+
hashes=None):
652625
if session is None:
653626
raise TypeError(
654627
"unpack_http_url() missing 1 required keyword argument: 'session'"
@@ -659,14 +632,19 @@ def unpack_http_url(link, location, download_dir=None, session=None):
659632
# If a download dir is specified, is the file already downloaded there?
660633
already_downloaded_path = None
661634
if download_dir:
662-
already_downloaded_path = _check_download_dir(link, download_dir)
635+
already_downloaded_path = _check_download_dir(link,
636+
download_dir,
637+
hashes)
663638

664639
if already_downloaded_path:
665640
from_path = already_downloaded_path
666641
content_type = mimetypes.guess_type(from_path)[0]
667642
else:
668643
# let's download to a tmp dir
669-
from_path, content_type = _download_http_url(link, session, temp_dir)
644+
from_path, content_type = _download_http_url(link,
645+
session,
646+
temp_dir,
647+
hashes)
670648

671649
# unpack the archive to the build dir location. even when only downloading
672650
# archives, they have to be unpacked to parse dependencies
@@ -681,31 +659,34 @@ def unpack_http_url(link, location, download_dir=None, session=None):
681659
rmtree(temp_dir)
682660

683661

684-
def unpack_file_url(link, location, download_dir=None):
662+
def unpack_file_url(link, location, download_dir=None, hashes=None):
685663
"""Unpack link into location.
686-
If download_dir is provided and link points to a file, make a copy
687-
of the link file inside download_dir."""
688664
665+
If download_dir is provided and link points to a file, make a copy
666+
of the link file inside download_dir.
667+
"""
689668
link_path = url_to_path(link.url_without_fragment)
690669

691670
# If it's a url to a local directory
692-
if os.path.isdir(link_path):
671+
if is_dir_url(link):
693672
if os.path.isdir(location):
694673
rmtree(location)
695674
shutil.copytree(link_path, location, symlinks=True)
696675
if download_dir:
697676
logger.info('Link is a directory, ignoring download_dir')
698677
return
699678

700-
# if link has a hash, let's confirm it matches
701-
if link.hash:
702-
link_path_hash = _get_hash_from_file(link_path, link)
703-
_check_hash(link_path_hash, link)
679+
# If --require-hashes is off, `hashes` is either empty, the link hash, or
680+
# MissingHashes, and it's required to match. If --require-hashes is on, we
681+
# are satisfied by any hash in `hashes` matching: a URL-based or an
682+
# option-based one; no internet-sourced hash will be in `hashes`.
683+
if hashes:
684+
hashes.check_against_path(link_path)
704685

705686
# If a download dir is specified, is the file already there and valid?
706687
already_downloaded_path = None
707688
if download_dir:
708-
already_downloaded_path = _check_download_dir(link, download_dir)
689+
already_downloaded_path = _check_download_dir(link, download_dir, hashes)
709690

710691
if already_downloaded_path:
711692
from_path = already_downloaded_path
@@ -752,7 +733,7 @@ def request(self, host, handler, request_body, verbose=False):
752733

753734

754735
def unpack_url(link, location, download_dir=None,
755-
only_download=False, session=None):
736+
only_download=False, session=None, hashes=None):
756737
"""Unpack link.
757738
If link is a VCS link:
758739
if only_download, export into download_dir and ignore location
@@ -761,14 +742,19 @@ def unpack_url(link, location, download_dir=None,
761742
- unpack into location
762743
- if download_dir, copy the file into download_dir
763744
- if only_download, mark location for deletion
745+
746+
:param hashes: A Hashes object, one of whose embedded hashes must match,
747+
or I'll raise HashMismatch. If the Hashes is empty, no matches are
748+
required, and unhashable types of requirements (like VCS ones, which
749+
would ordinarily raise HashUnsupported) are allowed.
764750
"""
765751
# non-editable vcs urls
766752
if is_vcs_url(link):
767753
unpack_vcs_link(link, location)
768754

769755
# file urls
770756
elif is_file_url(link):
771-
unpack_file_url(link, location, download_dir)
757+
unpack_file_url(link, location, download_dir, hashes=hashes)
772758

773759
# http urls
774760
else:
@@ -780,12 +766,13 @@ def unpack_url(link, location, download_dir=None,
780766
location,
781767
download_dir,
782768
session,
769+
hashes=hashes
783770
)
784771
if only_download:
785772
write_delete_marker_file(location)
786773

787774

788-
def _download_http_url(link, session, temp_dir):
775+
def _download_http_url(link, session, temp_dir, hashes):
789776
"""Download link url into temp_dir using provided session"""
790777
target_url = link.url.split('#', 1)[0]
791778
try:
@@ -840,22 +827,21 @@ def _download_http_url(link, session, temp_dir):
840827
filename += ext
841828
file_path = os.path.join(temp_dir, filename)
842829
with open(file_path, 'wb') as content_file:
843-
_download_url(resp, link, content_file)
830+
_download_url(resp, link, content_file, hashes)
844831
return file_path, content_type
845832

846833

847-
def _check_download_dir(link, download_dir):
834+
def _check_download_dir(link, download_dir, hashes):
848835
""" Check download_dir for previously downloaded file with correct hash
849836
If a correct file is found return its path else None
850837
"""
851838
download_path = os.path.join(download_dir, link.filename)
852839
if os.path.exists(download_path):
853840
# If already downloaded, does its hash match?
854841
logger.info('File was already downloaded %s', download_path)
855-
if link.hash:
856-
download_hash = _get_hash_from_file(download_path, link)
842+
if hashes:
857843
try:
858-
_check_hash(download_hash, link)
844+
hashes.check_against_path(download_path)
859845
except HashMismatch:
860846
logger.warning(
861847
'Previously-downloaded file %s has bad hash. '

0 commit comments

Comments
 (0)