29
29
from pip .models import PyPI
30
30
from pip .utils import (splitext , rmtree , format_size , display_path ,
31
31
backup_dir , ask_path_exists , unpack_file ,
32
- call_subprocess , ARCHIVE_EXTENSIONS )
32
+ call_subprocess , ARCHIVE_EXTENSIONS , consume )
33
33
from pip .utils .filesystem import check_path_owner
34
34
from pip .utils .logging import indent_log
35
35
from pip .utils .ui import DownloadProgressBar , DownloadProgressSpinner
@@ -485,57 +485,22 @@ def is_file_url(link):
485
485
return link .url .lower ().startswith ('file:' )
486
486
487
487
488
- def _check_hash (download_hash , link ):
489
- if download_hash .digest_size != hashlib .new (link .hash_name ).digest_size :
490
- logger .critical (
491
- "Hash digest size of the package %d (%s) doesn't match the "
492
- "expected hash name %s!" ,
493
- download_hash .digest_size , link , link .hash_name ,
494
- )
495
- raise HashMismatch ('Hash name mismatch for package %s' % link )
496
- if download_hash .hexdigest () != link .hash :
497
- logger .critical (
498
- "Hash of the package %s (%s) doesn't match the expected hash %s!" ,
499
- link , download_hash .hexdigest (), link .hash ,
500
- )
501
- raise HashMismatch (
502
- 'Bad %s hash for package %s' % (link .hash_name , link )
503
- )
488
+ def is_dir_url (link ):
489
+ """Return whether a file:// Link points to a directory.
504
490
491
+ ``link`` must not have any other scheme but file://. Call is_file_url()
492
+ first.
505
493
506
- def _get_hash_from_file (target_file , link ):
507
- try :
508
- download_hash = hashlib .new (link .hash_name )
509
- except (ValueError , TypeError ):
510
- logger .warning (
511
- "Unsupported hash name %s for package %s" , link .hash_name , link ,
512
- )
513
- return None
514
-
515
- with open (target_file , 'rb' ) as fp :
516
- while True :
517
- chunk = fp .read (4096 )
518
- if not chunk :
519
- break
520
- download_hash .update (chunk )
521
- return download_hash
494
+ """
495
+ link_path = url_to_path (link .url_without_fragment )
496
+ return os .path .isdir (link_path )
522
497
523
498
524
499
def _progress_indicator (iterable , * args , ** kwargs ):
525
500
return iterable
526
501
527
502
528
- def _download_url (resp , link , content_file ):
529
- download_hash = None
530
- if link .hash and link .hash_name :
531
- try :
532
- download_hash = hashlib .new (link .hash_name )
533
- except ValueError :
534
- logger .warning (
535
- "Unsupported hash name %s for package %s" ,
536
- link .hash_name , link ,
537
- )
538
-
503
+ def _download_url (resp , link , content_file , hashes ):
539
504
try :
540
505
total_length = int (resp .headers ['content-length' ])
541
506
except (ValueError , KeyError , TypeError ):
@@ -593,6 +558,11 @@ def resp_read(chunk_size):
593
558
break
594
559
yield chunk
595
560
561
+ def written_chunks (chunks ):
562
+ for chunk in chunks :
563
+ content_file .write (chunk )
564
+ yield chunk
565
+
596
566
progress_indicator = _progress_indicator
597
567
598
568
if link .netloc == PyPI .netloc :
@@ -614,13 +584,12 @@ def resp_read(chunk_size):
614
584
615
585
logger .debug ('Downloading from URL %s' , link )
616
586
617
- for chunk in progress_indicator (resp_read (4096 ), 4096 ):
618
- if download_hash is not None :
619
- download_hash .update (chunk )
620
- content_file .write (chunk )
621
- if link .hash and link .hash_name :
622
- _check_hash (download_hash , link )
623
- return download_hash
587
+ downloaded_chunks = written_chunks (progress_indicator (resp_read (4096 ),
588
+ 4096 ))
589
+ if hashes :
590
+ hashes .check_against_chunks (downloaded_chunks )
591
+ else :
592
+ consume (downloaded_chunks )
624
593
625
594
626
595
def _copy_file (filename , location , content_type , link ):
@@ -648,7 +617,11 @@ def _copy_file(filename, location, content_type, link):
648
617
logger .info ('Saved %s' , display_path (download_location ))
649
618
650
619
651
- def unpack_http_url (link , location , download_dir = None , session = None ):
620
+ def unpack_http_url (link ,
621
+ location ,
622
+ download_dir = None ,
623
+ session = None ,
624
+ hashes = None ):
652
625
if session is None :
653
626
raise TypeError (
654
627
"unpack_http_url() missing 1 required keyword argument: 'session'"
@@ -659,14 +632,19 @@ def unpack_http_url(link, location, download_dir=None, session=None):
659
632
# If a download dir is specified, is the file already downloaded there?
660
633
already_downloaded_path = None
661
634
if download_dir :
662
- already_downloaded_path = _check_download_dir (link , download_dir )
635
+ already_downloaded_path = _check_download_dir (link ,
636
+ download_dir ,
637
+ hashes )
663
638
664
639
if already_downloaded_path :
665
640
from_path = already_downloaded_path
666
641
content_type = mimetypes .guess_type (from_path )[0 ]
667
642
else :
668
643
# let's download to a tmp dir
669
- from_path , content_type = _download_http_url (link , session , temp_dir )
644
+ from_path , content_type = _download_http_url (link ,
645
+ session ,
646
+ temp_dir ,
647
+ hashes )
670
648
671
649
# unpack the archive to the build dir location. even when only downloading
672
650
# archives, they have to be unpacked to parse dependencies
@@ -681,31 +659,34 @@ def unpack_http_url(link, location, download_dir=None, session=None):
681
659
rmtree (temp_dir )
682
660
683
661
684
- def unpack_file_url (link , location , download_dir = None ):
662
+ def unpack_file_url (link , location , download_dir = None , hashes = None ):
685
663
"""Unpack link into location.
686
- If download_dir is provided and link points to a file, make a copy
687
- of the link file inside download_dir."""
688
664
665
+ If download_dir is provided and link points to a file, make a copy
666
+ of the link file inside download_dir.
667
+ """
689
668
link_path = url_to_path (link .url_without_fragment )
690
669
691
670
# If it's a url to a local directory
692
- if os . path . isdir ( link_path ):
671
+ if is_dir_url ( link ):
693
672
if os .path .isdir (location ):
694
673
rmtree (location )
695
674
shutil .copytree (link_path , location , symlinks = True )
696
675
if download_dir :
697
676
logger .info ('Link is a directory, ignoring download_dir' )
698
677
return
699
678
700
- # if link has a hash, let's confirm it matches
701
- if link .hash :
702
- link_path_hash = _get_hash_from_file (link_path , link )
703
- _check_hash (link_path_hash , link )
679
+ # If --require-hashes is off, `hashes` is either empty, the link hash, or
680
+ # MissingHashes, and it's required to match. If --require-hashes is on, we
681
+ # are satisfied by any hash in `hashes` matching: a URL-based or an
682
+ # option-based one; no internet-sourced hash will be in `hashes`.
683
+ if hashes :
684
+ hashes .check_against_path (link_path )
704
685
705
686
# If a download dir is specified, is the file already there and valid?
706
687
already_downloaded_path = None
707
688
if download_dir :
708
- already_downloaded_path = _check_download_dir (link , download_dir )
689
+ already_downloaded_path = _check_download_dir (link , download_dir , hashes )
709
690
710
691
if already_downloaded_path :
711
692
from_path = already_downloaded_path
@@ -752,7 +733,7 @@ def request(self, host, handler, request_body, verbose=False):
752
733
753
734
754
735
def unpack_url (link , location , download_dir = None ,
755
- only_download = False , session = None ):
736
+ only_download = False , session = None , hashes = None ):
756
737
"""Unpack link.
757
738
If link is a VCS link:
758
739
if only_download, export into download_dir and ignore location
@@ -761,14 +742,19 @@ def unpack_url(link, location, download_dir=None,
761
742
- unpack into location
762
743
- if download_dir, copy the file into download_dir
763
744
- if only_download, mark location for deletion
745
+
746
+ :param hashes: A Hashes object, one of whose embedded hashes must match,
747
+ or I'll raise HashMismatch. If the Hashes is empty, no matches are
748
+ required, and unhashable types of requirements (like VCS ones, which
749
+ would ordinarily raise HashUnsupported) are allowed.
764
750
"""
765
751
# non-editable vcs urls
766
752
if is_vcs_url (link ):
767
753
unpack_vcs_link (link , location )
768
754
769
755
# file urls
770
756
elif is_file_url (link ):
771
- unpack_file_url (link , location , download_dir )
757
+ unpack_file_url (link , location , download_dir , hashes = hashes )
772
758
773
759
# http urls
774
760
else :
@@ -780,12 +766,13 @@ def unpack_url(link, location, download_dir=None,
780
766
location ,
781
767
download_dir ,
782
768
session ,
769
+ hashes = hashes
783
770
)
784
771
if only_download :
785
772
write_delete_marker_file (location )
786
773
787
774
788
- def _download_http_url (link , session , temp_dir ):
775
+ def _download_http_url (link , session , temp_dir , hashes ):
789
776
"""Download link url into temp_dir using provided session"""
790
777
target_url = link .url .split ('#' , 1 )[0 ]
791
778
try :
@@ -840,22 +827,21 @@ def _download_http_url(link, session, temp_dir):
840
827
filename += ext
841
828
file_path = os .path .join (temp_dir , filename )
842
829
with open (file_path , 'wb' ) as content_file :
843
- _download_url (resp , link , content_file )
830
+ _download_url (resp , link , content_file , hashes )
844
831
return file_path , content_type
845
832
846
833
847
- def _check_download_dir (link , download_dir ):
834
+ def _check_download_dir (link , download_dir , hashes ):
848
835
""" Check download_dir for previously downloaded file with correct hash
849
836
If a correct file is found return its path else None
850
837
"""
851
838
download_path = os .path .join (download_dir , link .filename )
852
839
if os .path .exists (download_path ):
853
840
# If already downloaded, does its hash match?
854
841
logger .info ('File was already downloaded %s' , download_path )
855
- if link .hash :
856
- download_hash = _get_hash_from_file (download_path , link )
842
+ if hashes :
857
843
try :
858
- _check_hash ( download_hash , link )
844
+ hashes . check_against_path ( download_path )
859
845
except HashMismatch :
860
846
logger .warning (
861
847
'Previously-downloaded file %s has bad hash. '
0 commit comments