|
1 |
| -""" |
2 |
| -<Program Name> |
3 |
| - fetcher.py |
4 |
| -
|
5 |
| -<Author> |
6 |
| - Teodora Sechkova <[email protected]> |
7 |
| -
|
8 |
| -<Started> |
9 |
| - December 14, 2020 |
10 |
| -
|
11 |
| -<Copyright> |
12 |
| - See LICENSE-MIT OR LICENSE for licensing information. |
| 1 | +# Copyright 2021, New York University and the TUF contributors |
| 2 | +# SPDX-License-Identifier: MIT OR Apache-2.0 |
13 | 3 |
|
14 |
| -<Purpose> |
15 |
| - Provides an implementation of FetcherInterface using the requests HTTP |
| 4 | +"""Provides an implementation of FetcherInterface using the Requests HTTP |
16 | 5 | library.
|
17 | 6 | """
|
18 | 7 |
|
| 8 | +# Imports |
19 | 9 | import requests
|
20 | 10 | import six
|
21 | 11 | import logging
|
22 | 12 | import time
|
23 | 13 |
|
24 | 14 | import urllib3.exceptions
|
| 15 | + |
25 | 16 | import tuf.exceptions
|
26 | 17 | import tuf.settings
|
27 |
| -import tuf.client.fetcher |
28 | 18 |
|
29 |
| -logger = logging.getLogger(__name__) |
| 19 | +from tuf.client.fetcher import FetcherInterface |
30 | 20 |
|
| 21 | +# Globals |
| 22 | +logger = logging.getLogger(__name__) |
31 | 23 |
|
32 |
| -class RequestsFetcher(tuf.client.fetcher.FetcherInterface): |
33 |
| - """ |
34 |
| - <Purpose> |
35 |
| - A concrete implementation of FetcherInterface based on the Requests |
| 24 | +# Classess |
| 25 | +class RequestsFetcher(FetcherInterface): |
| 26 | + """A concrete implementation of FetcherInterface based on the Requests |
36 | 27 | library.
|
| 28 | +
|
| 29 | + Attributes: |
| 30 | + _sessions: A dictionary of Requests.Session objects storing a separate |
| 31 | + session per scheme+hostname combination. |
37 | 32 | """
|
38 | 33 |
|
39 | 34 | def __init__(self):
|
40 | 35 | # From http://docs.python-requests.org/en/master/user/advanced/#session-objects:
|
41 | 36 | #
|
42 |
| - # "The Session object allows you to persist certain parameters across requests. |
43 |
| - # It also persists cookies across all requests made from the Session instance, |
44 |
| - # and will use urllib3's connection pooling. So if you're making several |
45 |
| - # requests to the same host, the underlying TCP connection will be reused, |
46 |
| - # which can result in a significant performance increase (see HTTP persistent |
47 |
| - # connection)." |
| 37 | + # "The Session object allows you to persist certain parameters across |
| 38 | + # requests. It also persists cookies across all requests made from the |
| 39 | + # Session instance, and will use urllib3's connection pooling. So if you're |
| 40 | + # making several requests to the same host, the underlying TCP connection |
| 41 | + # will be reused, which can result in a significant performance increase |
| 42 | + # (see HTTP persistent connection)." |
48 | 43 | #
|
49 |
| - # NOTE: We use a separate requests.Session per scheme+hostname combination, in |
50 |
| - # order to reuse connections to the same hostname to improve efficiency, but |
51 |
| - # avoiding sharing state between different hosts-scheme combinations to |
| 44 | + # NOTE: We use a separate requests.Session per scheme+hostname combination, |
| 45 | + # in order to reuse connections to the same hostname to improve efficiency, |
| 46 | + # but avoiding sharing state between different hosts-scheme combinations to |
52 | 47 | # minimize subtle security issues. Some cookies may not be HTTP-safe.
|
53 | 48 | self._sessions = {}
|
54 | 49 |
|
55 | 50 |
|
56 | 51 | def fetch(self, url, required_length):
|
| 52 | + """Fetches the contents of HTTP/HTTPS url from a remote server. |
| 53 | +
|
| 54 | + Ensures the length of the downloaded data is up to 'required_length'. |
| 55 | +
|
| 56 | + Arguments: |
| 57 | + url: A URL string that represents a file location. |
| 58 | + required_length: An integer value representing the file length in bytes. |
| 59 | +
|
| 60 | + Raises: |
| 61 | + tuf.exceptions.SlowRetrievalError: A timeout occurs while receiving data. |
| 62 | + tuf.exceptions.FetcherHTTPError: An HTTP error code is received. |
| 63 | +
|
| 64 | + Returns: |
| 65 | + A bytes iterator |
| 66 | + """ |
57 | 67 | # Get a customized session for each new schema+hostname combination.
|
58 | 68 | session = self._get_session(url)
|
59 | 69 |
|
@@ -81,10 +91,10 @@ def chunks():
|
81 | 91 | bytes_received = 0
|
82 | 92 | while True:
|
83 | 93 | # We download a fixed chunk of data in every round. This is so that we
|
84 |
| - # can defend against slow retrieval attacks. Furthermore, we do not wish |
85 |
| - # to download an extremely large file in one shot. |
86 |
| - # Before beginning the round, sleep (if set) for a short amount of time |
87 |
| - # so that the CPU is not hogged in the while loop. |
| 94 | + # can defend against slow retrieval attacks. Furthermore, we do not |
| 95 | + # wish to download an extremely large file in one shot. |
| 96 | + # Before beginning the round, sleep (if set) for a short amount of |
| 97 | + # time so that the CPU is not hogged in the while loop. |
88 | 98 | if tuf.settings.SLEEP_BEFORE_ROUND:
|
89 | 99 | time.sleep(tuf.settings.SLEEP_BEFORE_ROUND)
|
90 | 100 |
|
@@ -121,8 +131,7 @@ def chunks():
|
121 | 131 |
|
122 | 132 |
|
123 | 133 | def _get_session(self, url):
|
124 |
| - """ |
125 |
| - Returns a different customized requests.Session per schema+hostname |
| 134 | + """Returns a different customized requests.Session per schema+hostname |
126 | 135 | combination.
|
127 | 136 | """
|
128 | 137 | # Use a different requests.Session per schema+hostname combination, to
|
|
0 commit comments