@@ -513,6 +513,8 @@ def resp_read(chunk_size):
513
513
finally :
514
514
if show_progress :
515
515
logger .end_progress ('%s downloaded' % format_size (downloaded ))
516
+ if link .hash and link .hash_name :
517
+ _check_hash (download_hash , link )
516
518
return download_hash
517
519
518
520
@@ -547,102 +549,28 @@ def unpack_http_url(link, location, download_dir=None, session=None):
547
549
)
548
550
549
551
temp_dir = tempfile .mkdtemp ('-unpack' , 'pip-' )
550
- from_path = None
551
- target_url = link .url .split ('#' , 1 )[0 ]
552
-
553
- download_hash = None
554
552
555
553
# If a download dir is specified, is the file already downloaded there?
556
- already_downloaded = False
554
+ already_downloaded_path = None
557
555
if download_dir :
558
- download_path = os .path .join (download_dir , link .filename )
559
- if os .path .exists (download_path ):
560
- # If already downloaded, does its hash match?
561
- content_type = mimetypes .guess_type (download_path )[0 ]
562
- logger .notify ('File was already downloaded %s' % download_path )
563
- if link .hash :
564
- download_hash = _get_hash_from_file (download_path , link )
565
- try :
566
- _check_hash (download_hash , link )
567
- already_downloaded = True
568
- except HashMismatch :
569
- logger .warn (
570
- 'Previously-downloaded file %s has bad hash, '
571
- 're-downloading.' % download_path
572
- )
573
- os .unlink (download_path )
574
- already_downloaded = False
575
- else :
576
- already_downloaded = True
556
+ already_downloaded_path = _check_download_dir (link , download_dir )
577
557
578
- if already_downloaded :
579
- from_path = download_path
558
+ if already_downloaded_path :
559
+ from_path = already_downloaded_path
580
560
content_type = mimetypes .guess_type (from_path )[0 ]
581
561
else :
582
562
# let's download to a tmp dir
583
- try :
584
- resp = session .get (
585
- target_url ,
586
- # We use Accept-Encoding: identity here because requests
587
- # defaults to accepting compressed responses. This breaks in
588
- # a variety of ways depending on how the server is configured.
589
- # - Some servers will notice that the file isn't a compressible
590
- # file and will leave the file alone and with an empty
591
- # Content-Encoding
592
- # - Some servers will notice that the file is already
593
- # compressed and will leave the file alone and will add a
594
- # Content-Encoding: gzip header
595
- # - Some servers won't notice anything at all and will take
596
- # a file that's already been compressed and compress it again
597
- # and set the Content-Encoding: gzip header
598
- # By setting this to request only the identity encoding We're
599
- # hoping to eliminate the third case. Hopefully there does not
600
- # exist a server which when given a file will notice it is
601
- # already compressed and that you're not asking for a
602
- # compressed file and will then decompress it before sending
603
- # because if that's the case I don't think it'll ever be
604
- # possible to make this work.
605
- headers = {"Accept-Encoding" : "identity" },
606
- stream = True ,
607
- )
608
- resp .raise_for_status ()
609
- except requests .HTTPError as exc :
610
- logger .fatal ("HTTP error %s while getting %s" %
611
- (exc .response .status_code , link ))
612
- raise
613
-
614
- content_type = resp .headers .get ('content-type' , '' )
615
- filename = link .filename # fallback
616
- # Have a look at the Content-Disposition header for a better guess
617
- content_disposition = resp .headers .get ('content-disposition' )
618
- if content_disposition :
619
- type , params = cgi .parse_header (content_disposition )
620
- # We use ``or`` here because we don't want to use an "empty" value
621
- # from the filename param.
622
- filename = params .get ('filename' ) or filename
623
- ext = splitext (filename )[1 ]
624
- if not ext :
625
- ext = mimetypes .guess_extension (content_type )
626
- if ext :
627
- filename += ext
628
- if not ext and link .url != resp .url :
629
- ext = os .path .splitext (resp .url )[1 ]
630
- if ext :
631
- filename += ext
632
- from_path = os .path .join (temp_dir , filename )
633
- download_hash = _download_url (resp , link , from_path )
634
- if link .hash and link .hash_name :
635
- _check_hash (download_hash , link )
563
+ from_path , content_type = _download_http_url (link , session , temp_dir )
636
564
637
565
# unpack the archive to the build dir location. even when only downloading
638
566
# archives, they have to be unpacked to parse dependencies
639
567
unpack_file (from_path , location , content_type , link )
640
568
641
569
# a download dir is specified; let's copy the archive there
642
- if download_dir and not already_downloaded :
570
+ if download_dir and not already_downloaded_path :
643
571
_copy_file (from_path , download_dir , content_type , link )
644
572
645
- if not already_downloaded :
573
+ if not already_downloaded_path :
646
574
os .unlink (from_path )
647
575
os .rmdir (temp_dir )
648
576
@@ -653,7 +581,6 @@ def unpack_file_url(link, location, download_dir=None):
653
581
of the link file inside download_dir."""
654
582
655
583
link_path = url_to_path (link .url_without_fragment )
656
- already_downloaded = False
657
584
658
585
# If it's a url to a local directory
659
586
if os .path .isdir (link_path ):
@@ -670,27 +597,12 @@ def unpack_file_url(link, location, download_dir=None):
670
597
_check_hash (link_path_hash , link )
671
598
672
599
# If a download dir is specified, is the file already there and valid?
600
+ already_downloaded_path = None
673
601
if download_dir :
674
- download_path = os .path .join (download_dir , link .filename )
675
- if os .path .exists (download_path ):
676
- content_type = mimetypes .guess_type (download_path )[0 ]
677
- logger .notify ('File was already downloaded %s' % download_path )
678
- if link .hash :
679
- download_hash = _get_hash_from_file (download_path , link )
680
- try :
681
- _check_hash (download_hash , link )
682
- already_downloaded = True
683
- except HashMismatch :
684
- logger .warn (
685
- 'Previously-downloaded file %s has bad hash, '
686
- 're-downloading.' % link_path
687
- )
688
- os .unlink (download_path )
689
- else :
690
- already_downloaded = True
602
+ already_downloaded_path = _check_download_dir (link , download_dir )
691
603
692
- if already_downloaded :
693
- from_path = download_path
604
+ if already_downloaded_path :
605
+ from_path = already_downloaded_path
694
606
else :
695
607
from_path = link_path
696
608
@@ -701,7 +613,7 @@ def unpack_file_url(link, location, download_dir=None):
701
613
unpack_file (from_path , location , content_type , link )
702
614
703
615
# a download dir is specified and not already downloaded
704
- if download_dir and not already_downloaded :
616
+ if download_dir and not already_downloaded_path :
705
617
_copy_file (from_path , download_dir , content_type , link )
706
618
707
619
@@ -765,3 +677,83 @@ def unpack_url(link, location, download_dir=None,
765
677
)
766
678
if only_download :
767
679
write_delete_marker_file (location )
680
+
681
+
682
+ def _download_http_url (link , session , temp_dir ):
683
+ """Download link url into temp_dir using provided session"""
684
+ target_url = link .url .split ('#' , 1 )[0 ]
685
+ try :
686
+ resp = session .get (
687
+ target_url ,
688
+ # We use Accept-Encoding: identity here because requests
689
+ # defaults to accepting compressed responses. This breaks in
690
+ # a variety of ways depending on how the server is configured.
691
+ # - Some servers will notice that the file isn't a compressible
692
+ # file and will leave the file alone and with an empty
693
+ # Content-Encoding
694
+ # - Some servers will notice that the file is already
695
+ # compressed and will leave the file alone and will add a
696
+ # Content-Encoding: gzip header
697
+ # - Some servers won't notice anything at all and will take
698
+ # a file that's already been compressed and compress it again
699
+ # and set the Content-Encoding: gzip header
700
+ # By setting this to request only the identity encoding We're
701
+ # hoping to eliminate the third case. Hopefully there does not
702
+ # exist a server which when given a file will notice it is
703
+ # already compressed and that you're not asking for a
704
+ # compressed file and will then decompress it before sending
705
+ # because if that's the case I don't think it'll ever be
706
+ # possible to make this work.
707
+ headers = {"Accept-Encoding" : "identity" },
708
+ stream = True ,
709
+ )
710
+ resp .raise_for_status ()
711
+ except requests .HTTPError as exc :
712
+ logger .fatal ("HTTP error %s while getting %s" %
713
+ (exc .response .status_code , link ))
714
+ raise
715
+
716
+ content_type = resp .headers .get ('content-type' , '' )
717
+ filename = link .filename # fallback
718
+ # Have a look at the Content-Disposition header for a better guess
719
+ content_disposition = resp .headers .get ('content-disposition' )
720
+ if content_disposition :
721
+ type , params = cgi .parse_header (content_disposition )
722
+ # We use ``or`` here because we don't want to use an "empty" value
723
+ # from the filename param.
724
+ filename = params .get ('filename' ) or filename
725
+ ext = splitext (filename )[1 ]
726
+ if not ext :
727
+ ext = mimetypes .guess_extension (content_type )
728
+ if ext :
729
+ filename += ext
730
+ if not ext and link .url != resp .url :
731
+ ext = os .path .splitext (resp .url )[1 ]
732
+ if ext :
733
+ filename += ext
734
+ file_path = os .path .join (temp_dir , filename )
735
+ _download_url (resp , link , file_path )
736
+ return file_path , content_type
737
+
738
+
739
+ def _check_download_dir (link , download_dir ):
740
+ """ Check download_dir for previously downloaded file with correct hash
741
+ If a correct file is found return its path else None
742
+ """
743
+ download_path = os .path .join (download_dir , link .filename )
744
+ if os .path .exists (download_path ):
745
+ # If already downloaded, does its hash match?
746
+ logger .notify ('File was already downloaded %s' % download_path )
747
+ if link .hash :
748
+ download_hash = _get_hash_from_file (download_path , link )
749
+ try :
750
+ _check_hash (download_hash , link )
751
+ except HashMismatch :
752
+ logger .warn (
753
+ 'Previously-downloaded file %s has bad hash, '
754
+ 're-downloading.' % download_path
755
+ )
756
+ os .unlink (download_path )
757
+ return None
758
+ return download_path
759
+ return None
0 commit comments