From 84957c87b2aa6b032c06421dc920e0c2b1ab517f Mon Sep 17 00:00:00 2001 From: Teodora Sechkova Date: Mon, 14 Dec 2020 13:49:16 +0200 Subject: [PATCH 01/15] Add FetcherInterface class The new class FetcherInterface defines an interface for abstract network download which can be implemented for a variety of network libraries and configurations. Signed-off-by: Teodora Sechkova --- tuf/fetcher.py | 51 ++++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 51 insertions(+) create mode 100644 tuf/fetcher.py diff --git a/tuf/fetcher.py b/tuf/fetcher.py new file mode 100644 index 0000000000..5b07a464fe --- /dev/null +++ b/tuf/fetcher.py @@ -0,0 +1,51 @@ +""" + + fetcher.py + + + Teodora Sechkova + + + December 14, 2020 + + + See LICENSE-MIT OR LICENSE for licensing information. + + + Provides an interface for network IO abstraction. +""" + +import abc + + +class FetcherInterface(): + """ + + Defines an interface for abstract network download which can be implemented + for a variety of network libraries and configurations. + """ + __metaclass__ = abc.ABCMeta + + + @abc.abstractmethod + def fetch(self, url, required_length): + """ + + Fetches the contents of HTTP/HTTPS url from a remote server up to + required_length and returns a bytes iterator. + + + url: + A URL string that represents the location of the file. + + required_length: + An integer value representing the length of the file in bytes. + + + tuf.exceptions.SlowRetrievalError, if a timeout occurs while receiving + data from a server + + + A bytes iterator + """ + raise NotImplementedError # pragma: no cover From 41ffe7aab1ac97dcee2d7668acc457ac83c8e45b Mon Sep 17 00:00:00 2001 From: Teodora Sechkova Date: Tue, 22 Dec 2020 19:20:40 +0200 Subject: [PATCH 02/15] Implement RequestsFetcher class A concrete implementation of FetcherInterface based on the Requests library. Signed-off-by: Teodora Sechkova --- tuf/fetcher.py | 133 +++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 133 insertions(+) diff --git a/tuf/fetcher.py b/tuf/fetcher.py index 5b07a464fe..9ef09c2b7c 100644 --- a/tuf/fetcher.py +++ b/tuf/fetcher.py @@ -16,6 +16,16 @@ """ import abc +import requests +import six +import logging +import time + +import urllib3.exceptions +import tuf.exceptions +import tuf.settings + +logger = logging.getLogger(__name__) class FetcherInterface(): @@ -49,3 +59,126 @@ def fetch(self, url, required_length): A bytes iterator """ raise NotImplementedError # pragma: no cover + + + +class RequestsFetcher(FetcherInterface): + """ + + A concrete implementation of FetcherInterface based on the Requests + library. + """ + + def __init__(self): + # From http://docs.python-requests.org/en/master/user/advanced/#session-objects: + # + # "The Session object allows you to persist certain parameters across requests. + # It also persists cookies across all requests made from the Session instance, + # and will use urllib3's connection pooling. So if you're making several + # requests to the same host, the underlying TCP connection will be reused, + # which can result in a significant performance increase (see HTTP persistent + # connection)." + # + # NOTE: We use a separate requests.Session per scheme+hostname combination, in + # order to reuse connections to the same hostname to improve efficiency, but + # avoiding sharing state between different hosts-scheme combinations to + # minimize subtle security issues. Some cookies may not be HTTP-safe. + self._sessions = {} + + + def fetch(self, url, required_length): + # Get a customized session for each new schema+hostname combination. + session = self._get_session(url) + + # Get the requests.Response object for this URL. + # + # Defer downloading the response body with stream=True. + # Always set the timeout. This timeout value is interpreted by requests as: + # - connect timeout (max delay before first byte is received) + # - read (gap) timeout (max delay between bytes received) + with session.get(url, stream=True, + timeout=tuf.settings.SOCKET_TIMEOUT) as response: + + # Check response status. + response.raise_for_status() + + try: + bytes_received = 0 + while True: + # We download a fixed chunk of data in every round. This is so that we + # can defend against slow retrieval attacks. Furthermore, we do not wish + # to download an extremely large file in one shot. + # Before beginning the round, sleep (if set) for a short amount of time + # so that the CPU is not hogged in the while loop. + if tuf.settings.SLEEP_BEFORE_ROUND: + time.sleep(tuf.settings.SLEEP_BEFORE_ROUND) + + read_amount = min( + tuf.settings.CHUNK_SIZE, required_length - bytes_received) + + # NOTE: This may not handle some servers adding a Content-Encoding + # header, which may cause urllib3 to misbehave: + # https://github.com/pypa/pip/blob/404838abcca467648180b358598c597b74d568c9/src/pip/_internal/download.py#L547-L582 + data = response.raw.read(read_amount) + bytes_received += len(data) + + yield data + + if bytes_received == required_length: + break + + # We might have no more data to read. Check number of bytes downloaded. + if not data: + logger.debug('Downloaded ' + repr(bytes_received) + '/' + + repr(required_length) + ' bytes.') + + # Finally, we signal that the download is complete. + break + + except urllib3.exceptions.ReadTimeoutError as e: + raise tuf.exceptions.SlowRetrievalError(str(e)) + + + + + def _get_session(self, url): + """ + Returns a different customized requests.Session per schema+hostname + combination. + """ + # Use a different requests.Session per schema+hostname combination, to + # reuse connections while minimizing subtle security issues. + parsed_url = six.moves.urllib.parse.urlparse(url) + + if not parsed_url.scheme or not parsed_url.hostname: + raise tuf.exceptions.URLParsingError( + 'Could not get scheme and hostname from URL: ' + url) + + session_index = parsed_url.scheme + '+' + parsed_url.hostname + + logger.debug('url: ' + url) + logger.debug('session index: ' + session_index) + + session = self._sessions.get(session_index) + + if not session: + session = requests.Session() + self._sessions[session_index] = session + + # Attach some default headers to every Session. + requests_user_agent = session.headers['User-Agent'] + # Follows the RFC: https://tools.ietf.org/html/rfc7231#section-5.5.3 + tuf_user_agent = 'tuf/' + tuf.__version__ + ' ' + requests_user_agent + session.headers.update({ + # Tell the server not to compress or modify anything. + # https://developer.mozilla.org/en-US/docs/Web/HTTP/Headers/Accept-Encoding#Directives + 'Accept-Encoding': 'identity', + # The TUF user agent. + 'User-Agent': tuf_user_agent}) + + logger.debug('Made new session for ' + session_index) + + else: + logger.debug('Reusing session for ' + session_index) + + return session From 815fe24f00747e70dfe8d712aaa5b87eb0da1df3 Mon Sep 17 00:00:00 2001 From: Teodora Sechkova Date: Mon, 14 Dec 2020 13:55:32 +0200 Subject: [PATCH 03/15] Move network IO logic to RequestsFetcher Abstract the network IO. Move the network operations from tuf.download to the RequestsFercher class which is TUF's implementation of the abstract FetcherInterface. Signed-off-by: Teodora Sechkova --- tuf/download.py | 213 ++++++++---------------------------------------- 1 file changed, 34 insertions(+), 179 deletions(-) diff --git a/tuf/download.py b/tuf/download.py index b45e842d7c..c830f48a1f 100755 --- a/tuf/download.py +++ b/tuf/download.py @@ -32,42 +32,23 @@ from __future__ import unicode_literals import logging -import time import timeit import tempfile -import tuf -import requests - import securesystemslib import securesystemslib.util import six +import tuf +import tuf.fetcher import tuf.exceptions import tuf.formats -import urllib3.exceptions - # See 'log.py' to learn how logging is handled in TUF. logger = logging.getLogger(__name__) -# From http://docs.python-requests.org/en/master/user/advanced/#session-objects: -# -# "The Session object allows you to persist certain parameters across requests. -# It also persists cookies across all requests made from the Session instance, -# and will use urllib3's connection pooling. So if you're making several -# requests to the same host, the underlying TCP connection will be reused, -# which can result in a significant performance increase (see HTTP persistent -# connection)." -# -# NOTE: We use a separate requests.Session per scheme+hostname combination, in -# order to reuse connections to the same hostname to improve efficiency, but -# avoiding sharing state between different hosts-scheme combinations to -# minimize subtle security issues. Some cookies may not be HTTP-safe. -_sessions = {} - - -def safe_download(url, required_length): + +def safe_download(url, required_length, fetcher): """ Given the 'url' and 'required_length' of the desired file, open a connection @@ -84,6 +65,10 @@ def safe_download(url, required_length): An integer value representing the length of the file. This is an exact limit. + fetcher: + An object implementing FetcherInterface that performs the network IO + operations. + A file object is created on disk to store the contents of 'url'. @@ -105,13 +90,13 @@ def safe_download(url, required_length): securesystemslib.formats.URL_SCHEMA.check_match(url) tuf.formats.LENGTH_SCHEMA.check_match(required_length) - return _download_file(url, required_length, STRICT_REQUIRED_LENGTH=True) + return _download_file(url, required_length, fetcher, STRICT_REQUIRED_LENGTH=True) -def unsafe_download(url, required_length): +def unsafe_download(url, required_length, fetcher): """ Given the 'url' and 'required_length' of the desired file, open a connection @@ -128,6 +113,10 @@ def unsafe_download(url, required_length): An integer value representing the length of the file. This is an upper limit. + fetcher: + An object implementing FetcherInterface that performs the network IO + operations. + A file object is created on disk to store the contents of 'url'. @@ -149,13 +138,13 @@ def unsafe_download(url, required_length): securesystemslib.formats.URL_SCHEMA.check_match(url) tuf.formats.LENGTH_SCHEMA.check_match(required_length) - return _download_file(url, required_length, STRICT_REQUIRED_LENGTH=False) + return _download_file(url, required_length, fetcher, STRICT_REQUIRED_LENGTH=False) -def _download_file(url, required_length, STRICT_REQUIRED_LENGTH=True): +def _download_file(url, required_length, fetcher, STRICT_REQUIRED_LENGTH=True): """ Given the url and length of the desired file, this function opens a @@ -192,12 +181,6 @@ def _download_file(url, required_length, STRICT_REQUIRED_LENGTH=True): A file object that points to the contents of 'url'. """ - - # Do all of the arguments have the appropriate format? - # Raise 'securesystemslib.exceptions.FormatError' if there is a mismatch. - securesystemslib.formats.URL_SCHEMA.check_match(url) - tuf.formats.LENGTH_SCHEMA.check_match(required_length) - # 'url.replace('\\', '/')' is needed for compatibility with Windows-based # systems, because they might use back-slashes in place of forward-slashes. # This converts it to the common format. unquote() replaces %xx escapes in a @@ -209,152 +192,19 @@ def _download_file(url, required_length, STRICT_REQUIRED_LENGTH=True): # This is the temporary file that we will return to contain the contents of # the downloaded file. temp_file = tempfile.TemporaryFile() + start_time = timeit.default_timer() - try: - # Use a different requests.Session per schema+hostname combination, to - # reuse connections while minimizing subtle security issues. - parsed_url = six.moves.urllib.parse.urlparse(url) - - if not parsed_url.scheme or not parsed_url.hostname: - raise tuf.exceptions.URLParsingError( - 'Could not get scheme and hostname from URL: ' + url) - - session_index = parsed_url.scheme + '+' + parsed_url.hostname - - logger.debug('url: ' + url) - logger.debug('session index: ' + session_index) - - session = _sessions.get(session_index) - - if not session: - session = requests.Session() - _sessions[session_index] = session - - # Attach some default headers to every Session. - requests_user_agent = session.headers['User-Agent'] - # Follows the RFC: https://tools.ietf.org/html/rfc7231#section-5.5.3 - tuf_user_agent = 'tuf/' + tuf.__version__ + ' ' + requests_user_agent - session.headers.update({ - # Tell the server not to compress or modify anything. - # https://developer.mozilla.org/en-US/docs/Web/HTTP/Headers/Accept-Encoding#Directives - 'Accept-Encoding': 'identity', - # The TUF user agent. - 'User-Agent': tuf_user_agent}) - - logger.debug('Made new session for ' + session_index) - - else: - logger.debug('Reusing session for ' + session_index) - - # Get the requests.Response object for this URL. - # - # Defer downloading the response body with stream=True. - # Always set the timeout. This timeout value is interpreted by requests as: - # - connect timeout (max delay before first byte is received) - # - read (gap) timeout (max delay between bytes received) - with session.get(url, stream=True, - timeout=tuf.settings.SOCKET_TIMEOUT) as response: - - # Check response status. - response.raise_for_status() - - # Download the contents of the URL, up to the required length, to a - # temporary file, and get the total number of downloaded bytes. - total_downloaded, average_download_speed = \ - _download_fixed_amount_of_data(response, temp_file, required_length) - - # Does the total number of downloaded bytes match the required length? - _check_downloaded_length(total_downloaded, required_length, - STRICT_REQUIRED_LENGTH=STRICT_REQUIRED_LENGTH, - average_download_speed=average_download_speed) - - except Exception: - # Close 'temp_file'. Any written data is lost. - temp_file.close() - logger.debug('Could not download URL: ' + repr(url)) - raise - - else: - return temp_file - - - - - -def _download_fixed_amount_of_data(response, temp_file, required_length): - """ - - This is a helper function, where the download really happens. While-block - reads data from response a fixed chunk of data at a time, or less, until - 'required_length' is reached. - - - response: - The object for communicating with the server about the contents of a URL. - - temp_file: - A temporary file where the contents at the URL specified by the - 'response' object will be stored. - - required_length: - The number of bytes that we must download for the file. This is almost - always specified by the TUF metadata for the data file in question - (except in the case of timestamp metadata, in which case we would fix a - reasonable upper bound). - - - Data from the server will be written to 'temp_file'. - - - tuf.exceptions.SlowRetrievalError - will be raised if urllib3.exceptions.ReadTimeoutError is caught (if the - download times out). - - Otherwise, runtime or network exceptions will be raised without question. - - - A (total_downloaded, average_download_speed) tuple, where - 'total_downloaded' is the total number of bytes downloaded for the desired - file and the 'average_download_speed' calculated for the download - attempt. - """ - - # Keep track of total bytes downloaded. - number_of_bytes_received = 0 average_download_speed = 0 - - start_time = timeit.default_timer() + number_of_bytes_received = 0 try: - while True: - # We download a fixed chunk of data in every round. This is so that we - # can defend against slow retrieval attacks. Furthermore, we do not wish - # to download an extremely large file in one shot. - # Before beginning the round, sleep (if set) for a short amount of time - # so that the CPU is not hogged in the while loop. - if tuf.settings.SLEEP_BEFORE_ROUND: - time.sleep(tuf.settings.SLEEP_BEFORE_ROUND) - - read_amount = min( - tuf.settings.CHUNK_SIZE, required_length - number_of_bytes_received) - # NOTE: This may not handle some servers adding a Content-Encoding - # header, which may cause urllib3 to misbehave: - # https://github.com/pypa/pip/blob/404838abcca467648180b358598c597b74d568c9/src/pip/_internal/download.py#L547-L582 - data = response.raw.read(read_amount) - - number_of_bytes_received = number_of_bytes_received + len(data) - - # Data successfully read from the response. Store it. - temp_file.write(data) - - if number_of_bytes_received == required_length: - break + for chunk in fetcher.fetch(url, required_length): stop_time = timeit.default_timer() seconds_spent_receiving = stop_time - start_time - # Measure the average download speed. + number_of_bytes_received += len(chunk) average_download_speed = number_of_bytes_received / seconds_spent_receiving if average_download_speed < tuf.settings.MIN_AVERAGE_DOWNLOAD_SPEED: @@ -366,18 +216,23 @@ def _download_fixed_amount_of_data(response, temp_file, required_length): logger.debug('The average download speed has not dipped below the' ' minimum average download speed set in tuf.settings.py.') - # We might have no more data to read. Check number of bytes downloaded. - if not data: - logger.debug('Downloaded ' + repr(number_of_bytes_received) + '/' + - repr(required_length) + ' bytes.') - # Finally, we signal that the download is complete. - break + temp_file.write(chunk) - except urllib3.exceptions.ReadTimeoutError as e: - raise tuf.exceptions.SlowRetrievalError(str(e)) + # Does the total number of downloaded bytes match the required length? + _check_downloaded_length(number_of_bytes_received, required_length, + STRICT_REQUIRED_LENGTH=STRICT_REQUIRED_LENGTH, + average_download_speed=average_download_speed) + + except Exception: + # Close 'temp_file'. Any written data is lost. + temp_file.close() + logger.debug('Could not download URL: ' + repr(url)) + raise + + else: + return temp_file - return number_of_bytes_received, average_download_speed From 33b89a8b99cc11a063d5f1932d85a441985fc71e Mon Sep 17 00:00:00 2001 From: Teodora Sechkova Date: Mon, 14 Dec 2020 13:57:35 +0200 Subject: [PATCH 04/15] Add fetcher as parameter to Updater class Initialize Updater with an external implementation of FetcherInterface. If not provided, tuf.fetcher.RequestsFetcher is used. Signed-off-by: Teodora Sechkova --- tuf/client/updater.py | 20 +++++++++++++++++--- 1 file changed, 17 insertions(+), 3 deletions(-) diff --git a/tuf/client/updater.py b/tuf/client/updater.py index 9ada0974e2..25890d031f 100755 --- a/tuf/client/updater.py +++ b/tuf/client/updater.py @@ -131,6 +131,7 @@ import tuf import tuf.download +import tuf.fetcher import tuf.formats import tuf.settings import tuf.keydb @@ -619,7 +620,7 @@ class Updater(object): http://www.python.org/dev/peps/pep-0008/#method-names-and-instance-variables """ - def __init__(self, repository_name, repository_mirrors): + def __init__(self, repository_name, repository_mirrors, fetcher=None): """ Constructor. Instantiating an updater object causes all the metadata @@ -659,6 +660,11 @@ def __init__(self, repository_name, repository_mirrors): 'targets_path': 'targets', 'confined_target_dirs': ['']}} + fetcher: + A concrete 'FetcherInterface' implementation. Performs the network + related download operations. If an external implementation is not + provided, tuf.fetcher.RequestsFetcher is used. + securesystemslib.exceptions.FormatError: If the arguments are improperly formatted. @@ -688,6 +694,13 @@ def __init__(self, repository_name, repository_mirrors): self.repository_name = repository_name self.mirrors = repository_mirrors + # Initialize Updater with an externally provided 'fetcher' implementing + # the network download. By default tuf.fetcher.RequestsFetcher is used. + if fetcher is None: + self.fetcher = tuf.fetcher.RequestsFetcher() + else: + self.fetcher = fetcher + # Store the trusted metadata read from disk. self.metadata = {} @@ -1311,7 +1324,8 @@ def _get_target_file(self, target_filepath, file_length, file_hashes, for file_mirror in file_mirrors: try: - file_object = tuf.download.safe_download(file_mirror, file_length) + file_object = tuf.download.safe_download(file_mirror, + file_length, self.fetcher) # Verify 'file_object' against the expected length and hashes. self._check_file_length(file_object, file_length) @@ -1509,7 +1523,7 @@ def _get_metadata_file(self, metadata_role, remote_filename, for file_mirror in file_mirrors: try: file_object = tuf.download.unsafe_download(file_mirror, - upperbound_filelength) + upperbound_filelength, self.fetcher) file_object.seek(0) # Verify 'file_object' according to the callable function. From 6c497927762c87b3c1fdf0c560a09b68322a2a01 Mon Sep 17 00:00:00 2001 From: Teodora Sechkova Date: Tue, 22 Dec 2020 16:43:41 +0200 Subject: [PATCH 05/15] Update tests importing tuf.download Pass RequestsFetcher object to tuf.download functions. Signed-off-by: Teodora Sechkova --- tests/test_download.py | 63 +++++++++++++++++++++-------------------- tests/test_proxy_use.py | 29 ++++++++++--------- 2 files changed, 49 insertions(+), 43 deletions(-) diff --git a/tests/test_download.py b/tests/test_download.py index c6933fcb1b..9a4eace6d1 100755 --- a/tests/test_download.py +++ b/tests/test_download.py @@ -43,6 +43,7 @@ import tuf import tuf.download as download +import tuf.fetcher import tuf.log import tuf.unittest_toolbox as unittest_toolbox import tuf.exceptions @@ -85,6 +86,10 @@ def setUp(self): digest = m.hexdigest() self.target_hash = {'md5':digest} + # Initialize the default fetcher for the download + self.fetcher = tuf.fetcher.RequestsFetcher() + + # Stop server process and perform clean up. def tearDown(self): @@ -100,7 +105,7 @@ def tearDown(self): def test_download_url_to_tempfileobj(self): download_file = download.safe_download - with download_file(self.url, self.target_data_length) as temp_fileobj: + with download_file(self.url, self.target_data_length, self.fetcher) as temp_fileobj: temp_fileobj.seek(0) temp_file_data = temp_fileobj.read().decode('utf-8') self.assertEqual(self.target_data, temp_file_data) @@ -118,18 +123,18 @@ def test_download_url_to_tempfileobj_and_lengths(self): # the server-reported length of the file does not match the # required_length. 'updater.py' *does* verify the hashes of downloaded # content. - download.safe_download(self.url, self.target_data_length - 4).close() - download.unsafe_download(self.url, self.target_data_length - 4).close() + download.safe_download(self.url, self.target_data_length - 4, self.fetcher).close() + download.unsafe_download(self.url, self.target_data_length - 4, self.fetcher).close() # We catch 'tuf.exceptions.DownloadLengthMismatchError' for safe_download() # because it will not download more bytes than requested (in this case, a # length greater than the size of the target file). self.assertRaises(tuf.exceptions.DownloadLengthMismatchError, - download.safe_download, self.url, self.target_data_length + 1) + download.safe_download, self.url, self.target_data_length + 1, self.fetcher) # Calling unsafe_download() with a mismatched length should not raise an # exception. - download.unsafe_download(self.url, self.target_data_length + 1).close() + download.unsafe_download(self.url, self.target_data_length + 1, self.fetcher).close() @@ -165,31 +170,29 @@ def test_download_url_to_tempfileobj_and_urls(self): unsafe_download_file = download.unsafe_download self.assertRaises(securesystemslib.exceptions.FormatError, - download_file, None, self.target_data_length) - - self.assertRaises(tuf.exceptions.URLParsingError, - download_file, - self.random_string(), self.target_data_length) + download_file, None, self.target_data_length, self.fetcher) url = 'http://localhost:' \ + str(self.server_process_handler.port) + '/' + self.random_string() self.assertRaises(requests.exceptions.HTTPError, download_file, url, - self.target_data_length) + self.target_data_length, + self.fetcher) url1 = 'http://localhost:' \ + str(self.server_process_handler.port + 1) + '/' + self.random_string() self.assertRaises(requests.exceptions.ConnectionError, download_file, url1, - self.target_data_length) + self.target_data_length, + self.fetcher) # Specify an unsupported URI scheme. url_with_unsupported_uri = self.url.replace('http', 'file') self.assertRaises(requests.exceptions.InvalidSchema, download_file, url_with_unsupported_uri, - self.target_data_length) + self.target_data_length, self.fetcher) self.assertRaises(requests.exceptions.InvalidSchema, unsafe_download_file, - url_with_unsupported_uri, self.target_data_length) + url_with_unsupported_uri, self.target_data_length, self.fetcher) @@ -290,7 +293,7 @@ def test_https_connection(self): os.environ['REQUESTS_CA_BUNDLE'] = bad_cert_fname # Clear sessions to ensure that the certificate we just specified is used. # TODO: Confirm necessity of this session clearing and lay out mechanics. - tuf.download._sessions = {} + self.fetcher._sessions = {} # Try connecting to the server process with the bad cert while trusting # the bad cert. Expect failure because even though we trust it, the @@ -302,9 +305,9 @@ def test_https_connection(self): category=urllib3.exceptions.SubjectAltNameWarning) with self.assertRaises(requests.exceptions.SSLError): - download.safe_download(bad_https_url, target_data_length) + download.safe_download(bad_https_url, target_data_length, self.fetcher) with self.assertRaises(requests.exceptions.SSLError): - download.unsafe_download(bad_https_url, target_data_length) + download.unsafe_download(bad_https_url, target_data_length, self.fetcher) # Try connecting to the server processes with the good certs while not # trusting the good certs (trusting the bad cert instead). Expect failure @@ -312,31 +315,31 @@ def test_https_connection(self): # trust it. logger.info('Trying HTTPS download of target file: ' + good_https_url) with self.assertRaises(requests.exceptions.SSLError): - download.safe_download(good_https_url, target_data_length) + download.safe_download(good_https_url, target_data_length, self.fetcher) with self.assertRaises(requests.exceptions.SSLError): - download.unsafe_download(good_https_url, target_data_length) + download.unsafe_download(good_https_url, target_data_length, self.fetcher) logger.info('Trying HTTPS download of target file: ' + good2_https_url) with self.assertRaises(requests.exceptions.SSLError): - download.safe_download(good2_https_url, target_data_length) + download.safe_download(good2_https_url, target_data_length, self.fetcher) with self.assertRaises(requests.exceptions.SSLError): - download.unsafe_download(good2_https_url, target_data_length) + download.unsafe_download(good2_https_url, target_data_length, self.fetcher) # Configure environment to now trust the certfile that is expired. os.environ['REQUESTS_CA_BUNDLE'] = expired_cert_fname # Clear sessions to ensure that the certificate we just specified is used. # TODO: Confirm necessity of this session clearing and lay out mechanics. - tuf.download._sessions = {} + self.fetcher._sessions = {} # Try connecting to the server process with the expired cert while # trusting the expired cert. Expect failure because even though we trust # it, it is expired. logger.info('Trying HTTPS download of target file: ' + expired_https_url) with self.assertRaises(requests.exceptions.SSLError): - download.safe_download(expired_https_url, target_data_length) + download.safe_download(expired_https_url, target_data_length, self.fetcher) with self.assertRaises(requests.exceptions.SSLError): - download.unsafe_download(expired_https_url, target_data_length) + download.unsafe_download(expired_https_url, target_data_length, self.fetcher) # Try connecting to the server processes with the good certs while @@ -346,18 +349,18 @@ def test_https_connection(self): os.environ['REQUESTS_CA_BUNDLE'] = good_cert_fname # Clear sessions to ensure that the certificate we just specified is used. # TODO: Confirm necessity of this session clearing and lay out mechanics. - tuf.download._sessions = {} + self.fetcher._sessions = {} logger.info('Trying HTTPS download of target file: ' + good_https_url) - download.safe_download(good_https_url, target_data_length).close() - download.unsafe_download(good_https_url, target_data_length).close() + download.safe_download(good_https_url, target_data_length, self.fetcher).close() + download.unsafe_download(good_https_url, target_data_length,self.fetcher).close() os.environ['REQUESTS_CA_BUNDLE'] = good2_cert_fname # Clear sessions to ensure that the certificate we just specified is used. # TODO: Confirm necessity of this session clearing and lay out mechanics. - tuf.download._sessions = {} + self.fetcher._sessions = {} logger.info('Trying HTTPS download of target file: ' + good2_https_url) - download.safe_download(good2_https_url, target_data_length).close() - download.unsafe_download(good2_https_url, target_data_length).close() + download.safe_download(good2_https_url, target_data_length, self.fetcher).close() + download.unsafe_download(good2_https_url, target_data_length, self.fetcher).close() finally: for proc_handler in [ diff --git a/tests/test_proxy_use.py b/tests/test_proxy_use.py index b3305d97e2..1f02785589 100755 --- a/tests/test_proxy_use.py +++ b/tests/test_proxy_use.py @@ -121,6 +121,9 @@ def setUpClass(cls): # the type of connection used with the target server. cls.https_proxy_addr = 'https://localhost:' + str(cls.https_proxy_handler.port) + # Initialize the default fetcher for the download + self.fetcher = tuf.fetcher.RequestsFetcher() + @classmethod @@ -202,8 +205,8 @@ def test_baseline_no_proxy(self): """ logger.info('Trying HTTP download with no proxy: ' + self.url) - download.safe_download(self.url, self.target_data_length) - download.unsafe_download(self.url, self.target_data_length) + download.safe_download(self.url, self.target_data_length, self.fetcher) + download.unsafe_download(self.url, self.target_data_length, self.fetcher) @@ -218,8 +221,8 @@ def test_http_dl_via_smart_http_proxy(self): self.set_env_value('HTTP_PROXY', self.http_proxy_addr) logger.info('Trying HTTP download via HTTP proxy: ' + self.url) - download.safe_download(self.url, self.target_data_length) - download.unsafe_download(self.url, self.target_data_length) + download.safe_download(self.url, self.target_data_length, self.fetcher) + download.unsafe_download(self.url, self.target_data_length, self.fetcher) @@ -243,11 +246,11 @@ def test_https_dl_via_smart_http_proxy(self): os.path.join('ssl_certs', 'ssl_cert.crt')) # Clear sessions to ensure that the certificate we just specified is used. # TODO: Confirm necessity of this session clearing and lay out mechanics. - tuf.download._sessions = {} + self.fetcher._sessions = {} logger.info('Trying HTTPS download via HTTP proxy: ' + self.url_https) - download.safe_download(self.url_https, self.target_data_length) - download.unsafe_download(self.url_https, self.target_data_length) + download.safe_download(self.url_https, self.target_data_length, self.fetcher) + download.unsafe_download(self.url_https, self.target_data_length, self.fetcher) @@ -267,11 +270,11 @@ def test_http_dl_via_https_proxy(self): os.path.join('ssl_certs', 'proxy_ca.crt')) # Clear sessions to ensure that the certificate we just specified is used. # TODO: Confirm necessity of this session clearing and lay out mechanics. - tuf.download._sessions = {} + self.fetcher._sessions = {} logger.info('Trying HTTP download via HTTPS proxy: ' + self.url_https) - download.safe_download(self.url, self.target_data_length) - download.unsafe_download(self.url, self.target_data_length) + download.safe_download(self.url, self.target_data_length, self.fetcher) + download.unsafe_download(self.url, self.target_data_length, self.fetcher) @@ -293,11 +296,11 @@ def test_https_dl_via_https_proxy(self): os.path.join('ssl_certs', 'proxy_ca.crt')) # Clear sessions to ensure that the certificate we just specified is used. # TODO: Confirm necessity of this session clearing and lay out mechanics. - tuf.download._sessions = {} + self.fetcher._sessions = {} logger.info('Trying HTTPS download via HTTPS proxy: ' + self.url_https) - download.safe_download(self.url_https, self.target_data_length) - download.unsafe_download(self.url_https, self.target_data_length) + download.safe_download(self.url_https, self.target_data_length, self.fetcher) + download.unsafe_download(self.url_https, self.target_data_length, self.fetcher) From 7dc5ef6e1cbd275af44251ca52035bd4f0d2dc1a Mon Sep 17 00:00:00 2001 From: Teodora Sechkova Date: Tue, 22 Dec 2020 16:44:16 +0200 Subject: [PATCH 06/15] Add test_fetcher Add unit test for requests_fetcher.py Signed-off-by: Teodora Sechkova --- tests/test_fetcher.py | 119 ++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 119 insertions(+) create mode 100644 tests/test_fetcher.py diff --git a/tests/test_fetcher.py b/tests/test_fetcher.py new file mode 100644 index 0000000000..df1930f551 --- /dev/null +++ b/tests/test_fetcher.py @@ -0,0 +1,119 @@ +#!/usr/bin/env python + +""" + + test_download.py + + + Teodora Sechkova tsechkova@vmware.com + + + December 18, 2020. + + + See LICENSE-MIT OR LICENSE for licensing information. + + + Unit test for RequestsFetcher. +""" + +import logging +import os +import io +import sys +import unittest +import tempfile + +import tuf +import tuf.exceptions +import tuf.fetcher +import tuf.unittest_toolbox as unittest_toolbox + +from tests import utils + + +logger = logging.getLogger(__name__) + + +class TestFetcher(unittest_toolbox.Modified_TestCase): + def setUp(self): + """ + Create a temporary file and launch a simple server in the + current working directory. + """ + + unittest_toolbox.Modified_TestCase.setUp(self) + + # Making a temporary file. + current_dir = os.getcwd() + target_filepath = self.make_temp_data_file(directory=current_dir) + self.target_fileobj = open(target_filepath, 'r') + self.file_contents = self.target_fileobj.read() + self.file_length = len(self.file_contents) + + # Launch a SimpleHTTPServer (serves files in the current dir). + self.server_process_handler = utils.TestServerProcess(log=logger) + + rel_target_filepath = os.path.basename(target_filepath) + self.url = 'http://localhost:' \ + + str(self.server_process_handler.port) + '/' + rel_target_filepath + + # Create a temporary file where the target file chunks are written + # during fetching + self.temp_file = tempfile.TemporaryFile() + self.fetcher = tuf.fetcher.RequestsFetcher() + + + # Stop server process and perform clean up. + def tearDown(self): + unittest_toolbox.Modified_TestCase.tearDown(self) + + # Cleans the resources and flush the logged lines (if any). + self.server_process_handler.clean() + + self.target_fileobj.close() + self.temp_file.close() + + + # Test: Normal case. + def test_fetch(self): + + for chunk in self.fetcher.fetch(self.url, self.file_length): + self.temp_file.write(chunk) + + self.temp_file.seek(0) + temp_file_data = self.temp_file.read().decode('utf-8') + self.assertEqual(self.file_contents, temp_file_data) + self.assertEqual(self.file_length, len(temp_file_data)) + + + # Test if fetcher downloads file up to a required length + def test_fetch_restricted_length(self): + for chunk in self.fetcher.fetch(self.url, self.file_length-4): + self.temp_file.write(chunk) + + self.temp_file.seek(0, io.SEEK_END) + self.assertEqual(self.temp_file.tell(), self.file_length-4) + + + # Test if fetcher does not downlad more than actual file length + def test_fetch_upper_length(self): + for chunk in self.fetcher.fetch(self.url, self.file_length+4): + self.temp_file.write(chunk) + + self.temp_file.seek(0, io.SEEK_END) + self.assertEqual(self.temp_file.tell(), self.file_length) + + + # Test incorrect URL parsing + def test_url_parsing(self): + with self.assertRaises(tuf.exceptions.URLParsingError) as cm: + for chunk in self.fetcher.fetch(self.random_string(), self.file_length): + self.temp_file.write(chunk) + + + +# Run unit test. +if __name__ == '__main__': + utils.configure_test_logging(sys.argv) + unittest.main() From aaddbd3f64c09473a64d9668c612c3cceb554a31 Mon Sep 17 00:00:00 2001 From: Teodora Sechkova Date: Thu, 7 Jan 2021 13:38:01 +0200 Subject: [PATCH 07/15] Take out connection time from download speed calculation - Update RequestsFetcher.fetch to return a generator object. - Update _download_file to skip connection time when calculating average download speed. - Write chunk to temp file before exiting the fetcher loop on error. Signed-off-by: Teodora Sechkova --- tuf/download.py | 16 ++++++++-------- tuf/fetcher.py | 24 ++++++++++++++---------- 2 files changed, 22 insertions(+), 18 deletions(-) diff --git a/tuf/download.py b/tuf/download.py index c830f48a1f..a6d86e1c9d 100755 --- a/tuf/download.py +++ b/tuf/download.py @@ -192,33 +192,33 @@ def _download_file(url, required_length, fetcher, STRICT_REQUIRED_LENGTH=True): # This is the temporary file that we will return to contain the contents of # the downloaded file. temp_file = tempfile.TemporaryFile() - start_time = timeit.default_timer() average_download_speed = 0 number_of_bytes_received = 0 try: - - for chunk in fetcher.fetch(url, required_length): + chunks = fetcher.fetch(url, required_length) + start_time = timeit.default_timer() + for chunk in chunks: stop_time = timeit.default_timer() - seconds_spent_receiving = stop_time - start_time + temp_file.write(chunk) + # Measure the average download speed. number_of_bytes_received += len(chunk) + seconds_spent_receiving = stop_time - start_time average_download_speed = number_of_bytes_received / seconds_spent_receiving if average_download_speed < tuf.settings.MIN_AVERAGE_DOWNLOAD_SPEED: logger.debug('The average download speed dropped below the minimum' - ' average download speed set in tuf.settings.py.') + ' average download speed set in tuf.settings.py. Stopping the' + ' download!') break else: logger.debug('The average download speed has not dipped below the' ' minimum average download speed set in tuf.settings.py.') - - temp_file.write(chunk) - # Does the total number of downloaded bytes match the required length? _check_downloaded_length(number_of_bytes_received, required_length, STRICT_REQUIRED_LENGTH=STRICT_REQUIRED_LENGTH, diff --git a/tuf/fetcher.py b/tuf/fetcher.py index 9ef09c2b7c..46a64d4f98 100644 --- a/tuf/fetcher.py +++ b/tuf/fetcher.py @@ -96,12 +96,12 @@ def fetch(self, url, required_length): # Always set the timeout. This timeout value is interpreted by requests as: # - connect timeout (max delay before first byte is received) # - read (gap) timeout (max delay between bytes received) - with session.get(url, stream=True, - timeout=tuf.settings.SOCKET_TIMEOUT) as response: - - # Check response status. - response.raise_for_status() + response = session.get(url, stream=True, + timeout=tuf.settings.SOCKET_TIMEOUT) + # Check response status. + response.raise_for_status() + def chunks(): try: bytes_received = 0 while True: @@ -122,11 +122,6 @@ def fetch(self, url, required_length): data = response.raw.read(read_amount) bytes_received += len(data) - yield data - - if bytes_received == required_length: - break - # We might have no more data to read. Check number of bytes downloaded. if not data: logger.debug('Downloaded ' + repr(bytes_received) + '/' + @@ -135,9 +130,18 @@ def fetch(self, url, required_length): # Finally, we signal that the download is complete. break + yield data + + if bytes_received == required_length: + break + except urllib3.exceptions.ReadTimeoutError as e: raise tuf.exceptions.SlowRetrievalError(str(e)) + finally: + response.close() + + return chunks() From 4f02e1ee4c11b1ed5c62bb5500acc34d7730d6af Mon Sep 17 00:00:00 2001 From: Teodora Sechkova Date: Mon, 18 Jan 2021 14:31:14 +0200 Subject: [PATCH 08/15] Avoid 'localhost' lookup in tests On Windows (Github Actions) the lookup for 'localhost' takes 1 second. This is because: - Windows retries connect() with a timeout - the machine has IPv6 and IPv4 but Testserver only binds the port on IPv4 - the test clients connect to 'localhost' Since socketserver.TCPServer does not seem to support IPv6 before 3.8, just replace 'localhost' with '127.0.0.1' in client-side URLs. See #1257 Signed-off-by: Teodora Sechkova --- tests/test_arbitrary_package_attack.py | 2 +- tests/test_download.py | 6 +++--- tests/test_endless_data_attack.py | 2 +- tests/test_extraneous_dependencies_attack.py | 2 +- tests/test_fetcher.py | 2 +- tests/test_indefinite_freeze_attack.py | 2 +- tests/test_key_revocation_integration.py | 2 +- tests/test_mix_and_match_attack.py | 2 +- tests/test_multiple_repositories_integration.py | 4 ++-- tests/test_proxy_use.py | 4 ++-- tests/test_replay_attack.py | 2 +- tests/test_slow_retrieval_attack.py | 2 +- tests/test_updater.py | 12 ++++++------ tests/test_updater_root_rotation_integration.py | 2 +- 14 files changed, 23 insertions(+), 23 deletions(-) diff --git a/tests/test_arbitrary_package_attack.py b/tests/test_arbitrary_package_attack.py index 2dd329ced5..44d49a623e 100755 --- a/tests/test_arbitrary_package_attack.py +++ b/tests/test_arbitrary_package_attack.py @@ -124,7 +124,7 @@ def setUp(self): # Set the url prefix required by the 'tuf/client/updater.py' updater. # 'path/to/tmp/repository' -> 'localhost:8001/tmp/repository'. repository_basepath = self.repository_directory[len(os.getcwd()):] - url_prefix = 'http://localhost:' \ + url_prefix = 'http://127.0.0.1:' \ + str(self.server_process_handler.port) + repository_basepath # Setting 'tuf.settings.repository_directory' with the temporary client diff --git a/tests/test_download.py b/tests/test_download.py index 9a4eace6d1..c69397ff14 100755 --- a/tests/test_download.py +++ b/tests/test_download.py @@ -77,7 +77,7 @@ def setUp(self): self.server_process_handler = utils.TestServerProcess(log=logger) rel_target_filepath = os.path.basename(target_filepath) - self.url = 'http://localhost:' \ + self.url = 'http://127.0.0.1:' \ + str(self.server_process_handler.port) + '/' + rel_target_filepath # Computing hash of target file data. @@ -172,14 +172,14 @@ def test_download_url_to_tempfileobj_and_urls(self): self.assertRaises(securesystemslib.exceptions.FormatError, download_file, None, self.target_data_length, self.fetcher) - url = 'http://localhost:' \ + url = 'http://127.0.0.1:' \ + str(self.server_process_handler.port) + '/' + self.random_string() self.assertRaises(requests.exceptions.HTTPError, download_file, url, self.target_data_length, self.fetcher) - url1 = 'http://localhost:' \ + url1 = 'http://127.0.0.1:' \ + str(self.server_process_handler.port + 1) + '/' + self.random_string() self.assertRaises(requests.exceptions.ConnectionError, download_file, diff --git a/tests/test_endless_data_attack.py b/tests/test_endless_data_attack.py index 759119ae9d..5566f5e5e4 100755 --- a/tests/test_endless_data_attack.py +++ b/tests/test_endless_data_attack.py @@ -126,7 +126,7 @@ def setUp(self): # Set the url prefix required by the 'tuf/client/updater.py' updater. # 'path/to/tmp/repository' -> 'localhost:8001/tmp/repository'. repository_basepath = self.repository_directory[len(os.getcwd()):] - url_prefix = 'http://localhost:' \ + url_prefix = 'http://127.0.0.1:' \ + str(self.server_process_handler.port) + repository_basepath # Setting 'tuf.settings.repository_directory' with the temporary client diff --git a/tests/test_extraneous_dependencies_attack.py b/tests/test_extraneous_dependencies_attack.py index 8eece9971d..953fb4f4bf 100755 --- a/tests/test_extraneous_dependencies_attack.py +++ b/tests/test_extraneous_dependencies_attack.py @@ -133,7 +133,7 @@ def setUp(self): # Set the url prefix required by the 'tuf/client/updater.py' updater. # 'path/to/tmp/repository' -> 'localhost:8001/tmp/repository'. repository_basepath = self.repository_directory[len(os.getcwd()):] - url_prefix = 'http://localhost:' \ + url_prefix = 'http://127.0.0.1:' \ + str(self.server_process_handler.port) + repository_basepath # Setting 'tuf.settings.repository_directory' with the temporary client diff --git a/tests/test_fetcher.py b/tests/test_fetcher.py index df1930f551..c90b7997c8 100644 --- a/tests/test_fetcher.py +++ b/tests/test_fetcher.py @@ -55,7 +55,7 @@ def setUp(self): self.server_process_handler = utils.TestServerProcess(log=logger) rel_target_filepath = os.path.basename(target_filepath) - self.url = 'http://localhost:' \ + self.url = 'http://127.0.0.1:' \ + str(self.server_process_handler.port) + '/' + rel_target_filepath # Create a temporary file where the target file chunks are written diff --git a/tests/test_indefinite_freeze_attack.py b/tests/test_indefinite_freeze_attack.py index e72299c456..8890efd084 100755 --- a/tests/test_indefinite_freeze_attack.py +++ b/tests/test_indefinite_freeze_attack.py @@ -146,7 +146,7 @@ def setUp(self): # Set the url prefix required by the 'tuf/client/updater.py' updater. # 'path/to/tmp/repository' -> 'localhost:8001/tmp/repository'. repository_basepath = self.repository_directory[len(os.getcwd()):] - url_prefix = 'http://localhost:' \ + url_prefix = 'http://127.0.0.1:' \ + str(self.server_process_handler.port) + repository_basepath # Setting 'tuf.settings.repository_directory' with the temporary client diff --git a/tests/test_key_revocation_integration.py b/tests/test_key_revocation_integration.py index ed0909b14f..6e1534b027 100755 --- a/tests/test_key_revocation_integration.py +++ b/tests/test_key_revocation_integration.py @@ -132,7 +132,7 @@ def setUp(self): # 'path/to/tmp/repository' -> 'localhost:8001/tmp/repository'. repository_basepath = self.repository_directory[len(os.getcwd()):] - url_prefix = 'http://localhost:' \ + url_prefix = 'http://127.0.0.1:' \ + str(self.server_process_handler.port) + repository_basepath # Setting 'tuf.settings.repository_directory' with the temporary client diff --git a/tests/test_mix_and_match_attack.py b/tests/test_mix_and_match_attack.py index 472d88b940..a02b598b7a 100755 --- a/tests/test_mix_and_match_attack.py +++ b/tests/test_mix_and_match_attack.py @@ -132,7 +132,7 @@ def setUp(self): # Set the url prefix required by the 'tuf/client/updater.py' updater. # 'path/to/tmp/repository' -> 'localhost:8001/tmp/repository'. repository_basepath = self.repository_directory[len(os.getcwd()):] - url_prefix = 'http://localhost:' \ + url_prefix = 'http://127.0.0.1:' \ + str(self.server_process_handler.port) + repository_basepath # Setting 'tuf.settings.repository_directory' with the temporary client diff --git a/tests/test_multiple_repositories_integration.py b/tests/test_multiple_repositories_integration.py index 24bd48e5bd..6c77517b03 100755 --- a/tests/test_multiple_repositories_integration.py +++ b/tests/test_multiple_repositories_integration.py @@ -136,8 +136,8 @@ def setUp(self): logger.debug('Server process 2 started.') - url_prefix = 'http://localhost:' + str(self.server_process_handler.port) - url_prefix2 = 'http://localhost:' + str(self.server_process_handler2.port) + url_prefix = 'http://127.0.0.1:' + str(self.server_process_handler.port) + url_prefix2 = 'http://127.0.0.1:' + str(self.server_process_handler2.port) self.repository_mirrors = {'mirror1': {'url_prefix': url_prefix, 'metadata_path': 'metadata', diff --git a/tests/test_proxy_use.py b/tests/test_proxy_use.py index 1f02785589..4f26b8de87 100755 --- a/tests/test_proxy_use.py +++ b/tests/test_proxy_use.py @@ -167,10 +167,10 @@ def setUp(self): suffix = '/' + os.path.basename(target_filepath) self.url = \ - 'http://localhost:' + str(self.http_server_handler.port) + suffix + 'http://127.0.0.1:' + str(self.http_server_handler.port) + suffix self.url_https = \ - 'https://localhost:' + str(self.https_server_handler.port) + suffix + 'https://127.0.0.1:' + str(self.https_server_handler.port) + suffix diff --git a/tests/test_replay_attack.py b/tests/test_replay_attack.py index 3c5fa389f8..64b73229f0 100755 --- a/tests/test_replay_attack.py +++ b/tests/test_replay_attack.py @@ -132,7 +132,7 @@ def setUp(self): # Set the url prefix required by the 'tuf/client/updater.py' updater. # 'path/to/tmp/repository' -> 'localhost:8001/tmp/repository'. repository_basepath = self.repository_directory[len(os.getcwd()):] - url_prefix = 'http://localhost:' \ + url_prefix = 'http://127.0.0.1:' \ + str(self.server_process_handler.port) + repository_basepath # Setting 'tuf.settings.repository_directory' with the temporary client diff --git a/tests/test_slow_retrieval_attack.py b/tests/test_slow_retrieval_attack.py index 3c87e0d912..ca931f382f 100755 --- a/tests/test_slow_retrieval_attack.py +++ b/tests/test_slow_retrieval_attack.py @@ -171,7 +171,7 @@ def setUp(self): logger.info('Slow Retrieval Server process started.') - url_prefix = 'http://localhost:' \ + url_prefix = 'http://127.0.0.1:' \ + str(self.server_process_handler.port) + repository_basepath # Setting 'tuf.settings.repository_directory' with the temporary client diff --git a/tests/test_updater.py b/tests/test_updater.py index 69c67044ea..58790cc42e 100755 --- a/tests/test_updater.py +++ b/tests/test_updater.py @@ -167,7 +167,7 @@ def setUp(self): # 'path/to/tmp/repository' -> 'localhost:8001/tmp/repository'. repository_basepath = self.repository_directory[len(os.getcwd()):] - url_prefix = 'http://localhost:' \ + url_prefix = 'http://127.0.0.1:' \ + str(self.server_process_handler.port) + repository_basepath # Setting 'tuf.settings.repository_directory' with the temporary client @@ -1110,7 +1110,7 @@ def test_6_get_one_valid_targetinfo(self): # 'path/to/tmp/repository' -> 'localhost:8001/tmp/repository'. repository_basepath = self.repository_directory[len(os.getcwd()):] - url_prefix = 'http://localhost:' \ + url_prefix = 'http://127.0.0.1:' \ + str(self.server_process_handler.port) + repository_basepath self.repository_mirrors = {'mirror1': {'url_prefix': url_prefix, @@ -1406,7 +1406,7 @@ def test_7_updated_targets(self): # 'path/to/tmp/repository' -> 'localhost:8001/tmp/repository'. repository_basepath = self.repository_directory[len(os.getcwd()):] - url_prefix = 'http://localhost:' \ + url_prefix = 'http://127.0.0.1:' \ + str(self.server_process_handler.port) + repository_basepath # Setting 'tuf.settings.repository_directory' with the temporary client @@ -1533,7 +1533,7 @@ def test_8_remove_obsolete_targets(self): # 'path/to/tmp/repository' -> 'localhost:8001/tmp/repository'. repository_basepath = self.repository_directory[len(os.getcwd()):] - url_prefix = 'http://localhost:' \ + url_prefix = 'http://127.0.0.1:' \ + str(self.server_process_handler.port) + repository_basepath # Setting 'tuf.settings.repository_directory' with the temporary client @@ -1861,8 +1861,8 @@ def setUp(self): logger.debug('Server process 2 started.') - url_prefix = 'http://localhost:' + str(self.server_process_handler.port) - url_prefix2 = 'http://localhost:' + str(self.server_process_handler2.port) + url_prefix = 'http://127.0.0.1:' + str(self.server_process_handler.port) + url_prefix2 = 'http://127.0.0.1:' + str(self.server_process_handler2.port) # We have all of the necessary information for two repository mirrors # in map.json, except for url prefixes. diff --git a/tests/test_updater_root_rotation_integration.py b/tests/test_updater_root_rotation_integration.py index a59e12de89..24df3b38d2 100755 --- a/tests/test_updater_root_rotation_integration.py +++ b/tests/test_updater_root_rotation_integration.py @@ -140,7 +140,7 @@ def setUp(self): # 'path/to/tmp/repository' -> 'localhost:8001/tmp/repository'. repository_basepath = self.repository_directory[len(os.getcwd()):] - url_prefix = 'http://localhost:' \ + url_prefix = 'http://127.0.0.1:' \ + str(self.server_process_handler.port) + repository_basepath # Setting 'tuf.settings.repository_directory' with the temporary client From 29e3419c7a62ac49d37214af3adbfd523d5663ce Mon Sep 17 00:00:00 2001 From: Teodora Sechkova Date: Wed, 27 Jan 2021 13:54:07 +0200 Subject: [PATCH 09/15] Apply a defensive data length check in fetch() Use '>=' as the defensive version of the equality check. Add a comment describing the need of a chunks() generator. Signed-off-by: Teodora Sechkova --- tuf/fetcher.py | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/tuf/fetcher.py b/tuf/fetcher.py index 46a64d4f98..fe0154b7af 100644 --- a/tuf/fetcher.py +++ b/tuf/fetcher.py @@ -101,6 +101,9 @@ def fetch(self, url, required_length): # Check response status. response.raise_for_status() + # Define a generator function to be returned by fetch. This way the caller + # of fetch can differentiate between connection and actual data download + # and measure download times accordingly. def chunks(): try: bytes_received = 0 @@ -132,7 +135,7 @@ def chunks(): yield data - if bytes_received == required_length: + if bytes_received >= required_length: break except urllib3.exceptions.ReadTimeoutError as e: From 50b3b193925d62ee52bd32a33b22f59677d2917a Mon Sep 17 00:00:00 2001 From: Teodora Sechkova Date: Wed, 27 Jan 2021 14:09:34 +0200 Subject: [PATCH 10/15] Test downloading data in more than one chunk Add test cases to test_fetcher and test_download that decrease default chunk size and download data in more than one chunk. Small code-style improvements. Signed-off-by: Teodora Sechkova --- tests/test_download.py | 23 +++++++++++++++++++++++ tests/test_fetcher.py | 41 +++++++++++++++++++++++++++++++++-------- 2 files changed, 56 insertions(+), 8 deletions(-) diff --git a/tests/test_download.py b/tests/test_download.py index c69397ff14..6805120e05 100755 --- a/tests/test_download.py +++ b/tests/test_download.py @@ -112,6 +112,29 @@ def test_download_url_to_tempfileobj(self): self.assertEqual(self.target_data_length, len(temp_file_data)) + # Test: Download url in more than one chunk. + def test_download_url_in_chunks(self): + + # Set smaller chunk size to ensure that the file will be downloaded + # in more than one chunk + default_chunk_size = tuf.settings.CHUNK_SIZE + tuf.settings.CHUNK_SIZE = 4 + # We don't have access to chunks from download_file() + # so we just confirm that the expectation of more than one chunk is + # correct and verify that no errors are raised during download + chunks_count = self.target_data_length/tuf.settings.CHUNK_SIZE + self.assertGreater(chunks_count, 1) + + download_file = download.safe_download + with download_file(self.url, self.target_data_length, self.fetcher) as temp_fileobj: + temp_fileobj.seek(0) + temp_file_data = temp_fileobj.read().decode('utf-8') + self.assertEqual(self.target_data, temp_file_data) + self.assertEqual(self.target_data_length, len(temp_file_data)) + + # Restore default settings + tuf.settings.CHUNK_SIZE = default_chunk_size + # Test: Incorrect lengths. def test_download_url_to_tempfileobj_and_lengths(self): diff --git a/tests/test_fetcher.py b/tests/test_fetcher.py index c90b7997c8..c2d3d60740 100644 --- a/tests/test_fetcher.py +++ b/tests/test_fetcher.py @@ -16,6 +16,9 @@ Unit test for RequestsFetcher. """ +# Help with Python 2 compatibility, where the '/' operator performs +# integer division. +from __future__ import division import logging import os @@ -23,6 +26,7 @@ import sys import unittest import tempfile +import math import tuf import tuf.exceptions @@ -31,7 +35,6 @@ from tests import utils - logger = logging.getLogger(__name__) @@ -77,15 +80,12 @@ def tearDown(self): # Test: Normal case. def test_fetch(self): - for chunk in self.fetcher.fetch(self.url, self.file_length): self.temp_file.write(chunk) self.temp_file.seek(0) temp_file_data = self.temp_file.read().decode('utf-8') self.assertEqual(self.file_contents, temp_file_data) - self.assertEqual(self.file_length, len(temp_file_data)) - # Test if fetcher downloads file up to a required length def test_fetch_restricted_length(self): @@ -96,7 +96,7 @@ def test_fetch_restricted_length(self): self.assertEqual(self.temp_file.tell(), self.file_length-4) - # Test if fetcher does not downlad more than actual file length + # Test that fetcher does not download more than actual file length def test_fetch_upper_length(self): for chunk in self.fetcher.fetch(self.url, self.file_length+4): self.temp_file.write(chunk) @@ -107,9 +107,34 @@ def test_fetch_upper_length(self): # Test incorrect URL parsing def test_url_parsing(self): - with self.assertRaises(tuf.exceptions.URLParsingError) as cm: - for chunk in self.fetcher.fetch(self.random_string(), self.file_length): - self.temp_file.write(chunk) + with self.assertRaises(tuf.exceptions.URLParsingError): + self.fetcher.fetch(self.random_string(), self.file_length) + + + # Test: Normal case with url data downloaded in more than one chunk + def test_fetch_in_chunks(self): + # Set smaller chunk size to ensure that the file will be downloaded + # in more than one chunk + default_chunk_size = tuf.settings.CHUNK_SIZE + tuf.settings.CHUNK_SIZE = 4 + + # expected_chunks_count: 3 + expected_chunks_count = math.ceil(self.file_length/tuf.settings.CHUNK_SIZE) + self.assertEqual(expected_chunks_count, 3) + + chunks_count = 0 + for chunk in self.fetcher.fetch(self.url, self.file_length): + self.temp_file.write(chunk) + chunks_count+=1 + + self.temp_file.seek(0) + temp_file_data = self.temp_file.read().decode('utf-8') + self.assertEqual(self.file_contents, temp_file_data) + # Check that we calculate chunks as expected + self.assertEqual(chunks_count, expected_chunks_count) + + # Restore default settings + tuf.settings.CHUNK_SIZE = default_chunk_size From 055280b2afc2f868a02a606f53300426b083fffd Mon Sep 17 00:00:00 2001 From: Teodora Sechkova Date: Wed, 10 Feb 2021 17:51:51 +0200 Subject: [PATCH 11/15] Close temp file in test_proxy_use.py Calls to safe_download and unsafe_download leave a temporary file unclosed. Signed-off-by: Teodora Sechkova --- tests/test_proxy_use.py | 20 ++++++++++---------- 1 file changed, 10 insertions(+), 10 deletions(-) diff --git a/tests/test_proxy_use.py b/tests/test_proxy_use.py index 4f26b8de87..56c81e2767 100755 --- a/tests/test_proxy_use.py +++ b/tests/test_proxy_use.py @@ -205,8 +205,8 @@ def test_baseline_no_proxy(self): """ logger.info('Trying HTTP download with no proxy: ' + self.url) - download.safe_download(self.url, self.target_data_length, self.fetcher) - download.unsafe_download(self.url, self.target_data_length, self.fetcher) + download.safe_download(self.url, self.target_data_length, self.fetcher).close() + download.unsafe_download(self.url, self.target_data_length, self.fetcher).close() @@ -221,8 +221,8 @@ def test_http_dl_via_smart_http_proxy(self): self.set_env_value('HTTP_PROXY', self.http_proxy_addr) logger.info('Trying HTTP download via HTTP proxy: ' + self.url) - download.safe_download(self.url, self.target_data_length, self.fetcher) - download.unsafe_download(self.url, self.target_data_length, self.fetcher) + download.safe_download(self.url, self.target_data_length, self.fetcher).close() + download.unsafe_download(self.url, self.target_data_length, self.fetcher).close() @@ -249,8 +249,8 @@ def test_https_dl_via_smart_http_proxy(self): self.fetcher._sessions = {} logger.info('Trying HTTPS download via HTTP proxy: ' + self.url_https) - download.safe_download(self.url_https, self.target_data_length, self.fetcher) - download.unsafe_download(self.url_https, self.target_data_length, self.fetcher) + download.safe_download(self.url_https, self.target_data_length, self.fetcher).close() + download.unsafe_download(self.url_https, self.target_data_length, self.fetcher).close() @@ -273,8 +273,8 @@ def test_http_dl_via_https_proxy(self): self.fetcher._sessions = {} logger.info('Trying HTTP download via HTTPS proxy: ' + self.url_https) - download.safe_download(self.url, self.target_data_length, self.fetcher) - download.unsafe_download(self.url, self.target_data_length, self.fetcher) + download.safe_download(self.url, self.target_data_length, self.fetcher).close() + download.unsafe_download(self.url, self.target_data_length, self.fetcher).close() @@ -299,8 +299,8 @@ def test_https_dl_via_https_proxy(self): self.fetcher._sessions = {} logger.info('Trying HTTPS download via HTTPS proxy: ' + self.url_https) - download.safe_download(self.url_https, self.target_data_length, self.fetcher) - download.unsafe_download(self.url_https, self.target_data_length, self.fetcher) + download.safe_download(self.url_https, self.target_data_length, self.fetcher).close() + download.unsafe_download(self.url_https, self.target_data_length, self.fetcher).close() From e9b294b57cf4cb9af02307506c9af126371da522 Mon Sep 17 00:00:00 2001 From: Jussi Kukkonen Date: Mon, 1 Feb 2021 14:36:27 +0200 Subject: [PATCH 12/15] Add an HTTP error for Fetcher interface A custom error is required so that updater is able to special case 403 & 404 status codes. Rewrite the test case a bit to be more readable. Signed-off-by: Jussi Kukkonen --- tests/test_download.py | 20 ++++++++------------ tuf/client/updater.py | 5 ++--- tuf/exceptions.py | 11 +++++++++++ tuf/fetcher.py | 8 +++++++- 4 files changed, 28 insertions(+), 16 deletions(-) diff --git a/tests/test_download.py b/tests/test_download.py index 6805120e05..90e9cb9406 100755 --- a/tests/test_download.py +++ b/tests/test_download.py @@ -192,23 +192,19 @@ def test_download_url_to_tempfileobj_and_urls(self): download_file = download.safe_download unsafe_download_file = download.unsafe_download - self.assertRaises(securesystemslib.exceptions.FormatError, - download_file, None, self.target_data_length, self.fetcher) + with self.assertRaises(securesystemslib.exceptions.FormatError): + download_file(None, self.target_data_length, self.fetcher) url = 'http://127.0.0.1:' \ + str(self.server_process_handler.port) + '/' + self.random_string() - self.assertRaises(requests.exceptions.HTTPError, - download_file, - url, - self.target_data_length, - self.fetcher) + with self.assertRaises(tuf.exceptions.FetcherHTTPError) as cm: + download_file(url, self.target_data_length, self.fetcher) + self.assertEqual(cm.exception.status_code, 404) + url1 = 'http://127.0.0.1:' \ + str(self.server_process_handler.port + 1) + '/' + self.random_string() - self.assertRaises(requests.exceptions.ConnectionError, - download_file, - url1, - self.target_data_length, - self.fetcher) + with self.assertRaises(requests.exceptions.ConnectionError): + download_file(url1, self.target_data_length, self.fetcher) # Specify an unsupported URI scheme. url_with_unsupported_uri = self.url.replace('http', 'file') diff --git a/tuf/client/updater.py b/tuf/client/updater.py index 25890d031f..ea6d5bb938 100755 --- a/tuf/client/updater.py +++ b/tuf/client/updater.py @@ -146,7 +146,6 @@ import securesystemslib.keys import securesystemslib.util import six -import requests.exceptions # The Timestamp role does not have signed metadata about it; otherwise we # would need an infinite regress of metadata. Therefore, we use some @@ -1125,8 +1124,8 @@ def _update_root_metadata(self, current_root_metadata): """ def neither_403_nor_404(mirror_error): - if isinstance(mirror_error, requests.exceptions.HTTPError): - if mirror_error.response.status_code in {403, 404}: + if isinstance(mirror_error, tuf.exceptions.FetcherHTTPError): + if mirror_error.status_code in {403, 404}: return False return True diff --git a/tuf/exceptions.py b/tuf/exceptions.py index 177e7a2981..5b2a345c9c 100755 --- a/tuf/exceptions.py +++ b/tuf/exceptions.py @@ -335,3 +335,14 @@ class URLParsingError(Error): class InvalidConfigurationError(Error): """If a configuration object does not match the expected format.""" +class FetcherHTTPError(Exception): + """ + Returned by FetcherInterface implementations for HTTP errors. + + Args: + message (str): The HTTP error messsage + status_code (int): The HTTP status code + """ + def __init__(self, message, status_code): + super(FetcherHTTPError, self).__init__(message) + self.status_code = status_code diff --git a/tuf/fetcher.py b/tuf/fetcher.py index fe0154b7af..a55aff224c 100644 --- a/tuf/fetcher.py +++ b/tuf/fetcher.py @@ -55,6 +55,7 @@ def fetch(self, url, required_length): tuf.exceptions.SlowRetrievalError, if a timeout occurs while receiving data from a server + tuf.exceptions.FetcherHTTPError, if an HTTP error code was received A bytes iterator """ @@ -99,7 +100,12 @@ def fetch(self, url, required_length): response = session.get(url, stream=True, timeout=tuf.settings.SOCKET_TIMEOUT) # Check response status. - response.raise_for_status() + try: + response.raise_for_status() + except requests.HTTPError as e: + status = e.response.status_code + raise tuf.exceptions.FetcherHTTPError(str(e), status) + # Define a generator function to be returned by fetch. This way the caller # of fetch can differentiate between connection and actual data download From 1677ce0bf8d5986c9e2c3add6d1b510851ef8603 Mon Sep 17 00:00:00 2001 From: Jussi Kukkonen Date: Wed, 3 Feb 2021 11:40:11 +0200 Subject: [PATCH 13/15] Move fetcher components to make API boundary clearer * Move FetcherInterface to tuf/client/ directory: This way everything inside that directory is clearly part of client API, and everything outside _may_ be more of an implementation detail (settings is still an unfortunate exception) * Keep RequestsFetcher in tuf/ for same reasons: it's just the default implementation, not explicitly part of client API An even clearer division would be if we moved all the client specific implementation details (download.py, mirrors.py, requests_fetcher.py) to tuf/client/_internal/ but that's a larger change... Signed-off-by: Jussi Kukkonen --- tests/test_download.py | 4 +- tests/test_fetcher.py | 4 +- tuf/client/fetcher.py | 53 +++++++++++++++++++++++++ tuf/client/updater.py | 4 +- tuf/download.py | 1 - tuf/{fetcher.py => requests_fetcher.py} | 42 ++------------------ 6 files changed, 63 insertions(+), 45 deletions(-) create mode 100644 tuf/client/fetcher.py rename tuf/{fetcher.py => requests_fetcher.py} (84%) diff --git a/tests/test_download.py b/tests/test_download.py index 90e9cb9406..f60e74b5f3 100755 --- a/tests/test_download.py +++ b/tests/test_download.py @@ -43,7 +43,7 @@ import tuf import tuf.download as download -import tuf.fetcher +import tuf.requests_fetcher import tuf.log import tuf.unittest_toolbox as unittest_toolbox import tuf.exceptions @@ -87,7 +87,7 @@ def setUp(self): self.target_hash = {'md5':digest} # Initialize the default fetcher for the download - self.fetcher = tuf.fetcher.RequestsFetcher() + self.fetcher = tuf.requests_fetcher.RequestsFetcher() diff --git a/tests/test_fetcher.py b/tests/test_fetcher.py index c2d3d60740..e05f4d7125 100644 --- a/tests/test_fetcher.py +++ b/tests/test_fetcher.py @@ -30,7 +30,7 @@ import tuf import tuf.exceptions -import tuf.fetcher +import tuf.requests_fetcher import tuf.unittest_toolbox as unittest_toolbox from tests import utils @@ -64,7 +64,7 @@ def setUp(self): # Create a temporary file where the target file chunks are written # during fetching self.temp_file = tempfile.TemporaryFile() - self.fetcher = tuf.fetcher.RequestsFetcher() + self.fetcher = tuf.requests_fetcher.RequestsFetcher() # Stop server process and perform clean up. diff --git a/tuf/client/fetcher.py b/tuf/client/fetcher.py new file mode 100644 index 0000000000..c7ce99ee0b --- /dev/null +++ b/tuf/client/fetcher.py @@ -0,0 +1,53 @@ +""" + + fetcher.py + + + Teodora Sechkova + + + December 14, 2020 + + + See LICENSE-MIT OR LICENSE for licensing information. + + + Provides an interface for network IO abstraction. +""" + +import abc + + +class FetcherInterface(): + """ + + Defines an interface for abstract network download which can be implemented + for a variety of network libraries and configurations. + """ + __metaclass__ = abc.ABCMeta + + + @abc.abstractmethod + def fetch(self, url, required_length): + """ + + Fetches the contents of HTTP/HTTPS url from a remote server up to + required_length and returns a bytes iterator. + + + url: + A URL string that represents the location of the file. + + required_length: + An integer value representing the length of the file in bytes. + + + tuf.exceptions.SlowRetrievalError, if a timeout occurs while receiving + data from a server + + tuf.exceptions.FetcherHTTPError, if an HTTP error code was received + + A bytes iterator + """ + raise NotImplementedError # pragma: no cover + diff --git a/tuf/client/updater.py b/tuf/client/updater.py index ea6d5bb938..3496889089 100755 --- a/tuf/client/updater.py +++ b/tuf/client/updater.py @@ -131,7 +131,7 @@ import tuf import tuf.download -import tuf.fetcher +import tuf.requests_fetcher import tuf.formats import tuf.settings import tuf.keydb @@ -696,7 +696,7 @@ def __init__(self, repository_name, repository_mirrors, fetcher=None): # Initialize Updater with an externally provided 'fetcher' implementing # the network download. By default tuf.fetcher.RequestsFetcher is used. if fetcher is None: - self.fetcher = tuf.fetcher.RequestsFetcher() + self.fetcher = tuf.requests_fetcher.RequestsFetcher() else: self.fetcher = fetcher diff --git a/tuf/download.py b/tuf/download.py index a6d86e1c9d..2d946ef891 100755 --- a/tuf/download.py +++ b/tuf/download.py @@ -40,7 +40,6 @@ import six import tuf -import tuf.fetcher import tuf.exceptions import tuf.formats diff --git a/tuf/fetcher.py b/tuf/requests_fetcher.py similarity index 84% rename from tuf/fetcher.py rename to tuf/requests_fetcher.py index a55aff224c..8bc70eaeb7 100644 --- a/tuf/fetcher.py +++ b/tuf/requests_fetcher.py @@ -12,10 +12,10 @@ See LICENSE-MIT OR LICENSE for licensing information. - Provides an interface for network IO abstraction. + Provides an implementation of FetcherInterface using the requests HTTP + library. """ -import abc import requests import six import logging @@ -24,46 +24,12 @@ import urllib3.exceptions import tuf.exceptions import tuf.settings +import tuf.client.fetcher logger = logging.getLogger(__name__) -class FetcherInterface(): - """ - - Defines an interface for abstract network download which can be implemented - for a variety of network libraries and configurations. - """ - __metaclass__ = abc.ABCMeta - - - @abc.abstractmethod - def fetch(self, url, required_length): - """ - - Fetches the contents of HTTP/HTTPS url from a remote server up to - required_length and returns a bytes iterator. - - - url: - A URL string that represents the location of the file. - - required_length: - An integer value representing the length of the file in bytes. - - - tuf.exceptions.SlowRetrievalError, if a timeout occurs while receiving - data from a server - - tuf.exceptions.FetcherHTTPError, if an HTTP error code was received - - A bytes iterator - """ - raise NotImplementedError # pragma: no cover - - - -class RequestsFetcher(FetcherInterface): +class RequestsFetcher(tuf.client.fetcher.FetcherInterface): """ A concrete implementation of FetcherInterface based on the Requests From 2af63cfd8d6fda0c17dd30b70fe9d2b25d50cc97 Mon Sep 17 00:00:00 2001 From: Teodora Sechkova Date: Wed, 10 Feb 2021 19:03:51 +0200 Subject: [PATCH 14/15] Add host address as a test level constant Use a common test level constant for defining the host address forming the download URL on the client side. Signed-off-by: Teodora Sechkova --- tests/test_arbitrary_package_attack.py | 2 +- tests/test_download.py | 6 +++--- tests/test_endless_data_attack.py | 2 +- tests/test_extraneous_dependencies_attack.py | 2 +- tests/test_fetcher.py | 2 +- tests/test_indefinite_freeze_attack.py | 2 +- tests/test_key_revocation_integration.py | 2 +- tests/test_mix_and_match_attack.py | 2 +- tests/test_multiple_repositories_integration.py | 8 ++++++-- tests/test_proxy_use.py | 9 ++++++--- tests/test_replay_attack.py | 2 +- tests/test_slow_retrieval_attack.py | 2 +- tests/test_updater.py | 16 ++++++++++------ tests/test_updater_root_rotation_integration.py | 2 +- tests/utils.py | 3 +++ 15 files changed, 38 insertions(+), 24 deletions(-) diff --git a/tests/test_arbitrary_package_attack.py b/tests/test_arbitrary_package_attack.py index 44d49a623e..21096fb4bf 100755 --- a/tests/test_arbitrary_package_attack.py +++ b/tests/test_arbitrary_package_attack.py @@ -124,7 +124,7 @@ def setUp(self): # Set the url prefix required by the 'tuf/client/updater.py' updater. # 'path/to/tmp/repository' -> 'localhost:8001/tmp/repository'. repository_basepath = self.repository_directory[len(os.getcwd()):] - url_prefix = 'http://127.0.0.1:' \ + url_prefix = 'http://' + utils.TEST_HOST_ADDRESS + ':' \ + str(self.server_process_handler.port) + repository_basepath # Setting 'tuf.settings.repository_directory' with the temporary client diff --git a/tests/test_download.py b/tests/test_download.py index f60e74b5f3..6ae0e4c4e4 100755 --- a/tests/test_download.py +++ b/tests/test_download.py @@ -77,7 +77,7 @@ def setUp(self): self.server_process_handler = utils.TestServerProcess(log=logger) rel_target_filepath = os.path.basename(target_filepath) - self.url = 'http://127.0.0.1:' \ + self.url = 'http://' + utils.TEST_HOST_ADDRESS + ':' \ + str(self.server_process_handler.port) + '/' + rel_target_filepath # Computing hash of target file data. @@ -195,13 +195,13 @@ def test_download_url_to_tempfileobj_and_urls(self): with self.assertRaises(securesystemslib.exceptions.FormatError): download_file(None, self.target_data_length, self.fetcher) - url = 'http://127.0.0.1:' \ + url = 'http://' + utils.TEST_HOST_ADDRESS + ':' \ + str(self.server_process_handler.port) + '/' + self.random_string() with self.assertRaises(tuf.exceptions.FetcherHTTPError) as cm: download_file(url, self.target_data_length, self.fetcher) self.assertEqual(cm.exception.status_code, 404) - url1 = 'http://127.0.0.1:' \ + url1 = 'http://' + utils.TEST_HOST_ADDRESS + ':' \ + str(self.server_process_handler.port + 1) + '/' + self.random_string() with self.assertRaises(requests.exceptions.ConnectionError): download_file(url1, self.target_data_length, self.fetcher) diff --git a/tests/test_endless_data_attack.py b/tests/test_endless_data_attack.py index 5566f5e5e4..01701d9830 100755 --- a/tests/test_endless_data_attack.py +++ b/tests/test_endless_data_attack.py @@ -126,7 +126,7 @@ def setUp(self): # Set the url prefix required by the 'tuf/client/updater.py' updater. # 'path/to/tmp/repository' -> 'localhost:8001/tmp/repository'. repository_basepath = self.repository_directory[len(os.getcwd()):] - url_prefix = 'http://127.0.0.1:' \ + url_prefix = 'http://' + utils.TEST_HOST_ADDRESS + ':' \ + str(self.server_process_handler.port) + repository_basepath # Setting 'tuf.settings.repository_directory' with the temporary client diff --git a/tests/test_extraneous_dependencies_attack.py b/tests/test_extraneous_dependencies_attack.py index 953fb4f4bf..2e71ee800b 100755 --- a/tests/test_extraneous_dependencies_attack.py +++ b/tests/test_extraneous_dependencies_attack.py @@ -133,7 +133,7 @@ def setUp(self): # Set the url prefix required by the 'tuf/client/updater.py' updater. # 'path/to/tmp/repository' -> 'localhost:8001/tmp/repository'. repository_basepath = self.repository_directory[len(os.getcwd()):] - url_prefix = 'http://127.0.0.1:' \ + url_prefix = 'http://' + utils.TEST_HOST_ADDRESS + ':' \ + str(self.server_process_handler.port) + repository_basepath # Setting 'tuf.settings.repository_directory' with the temporary client diff --git a/tests/test_fetcher.py b/tests/test_fetcher.py index e05f4d7125..3bfb578464 100644 --- a/tests/test_fetcher.py +++ b/tests/test_fetcher.py @@ -58,7 +58,7 @@ def setUp(self): self.server_process_handler = utils.TestServerProcess(log=logger) rel_target_filepath = os.path.basename(target_filepath) - self.url = 'http://127.0.0.1:' \ + self.url = 'http://' + utils.TEST_HOST_ADDRESS + ':' \ + str(self.server_process_handler.port) + '/' + rel_target_filepath # Create a temporary file where the target file chunks are written diff --git a/tests/test_indefinite_freeze_attack.py b/tests/test_indefinite_freeze_attack.py index 8890efd084..6e10f3a075 100755 --- a/tests/test_indefinite_freeze_attack.py +++ b/tests/test_indefinite_freeze_attack.py @@ -146,7 +146,7 @@ def setUp(self): # Set the url prefix required by the 'tuf/client/updater.py' updater. # 'path/to/tmp/repository' -> 'localhost:8001/tmp/repository'. repository_basepath = self.repository_directory[len(os.getcwd()):] - url_prefix = 'http://127.0.0.1:' \ + url_prefix = 'http://' + utils.TEST_HOST_ADDRESS + ':' \ + str(self.server_process_handler.port) + repository_basepath # Setting 'tuf.settings.repository_directory' with the temporary client diff --git a/tests/test_key_revocation_integration.py b/tests/test_key_revocation_integration.py index 6e1534b027..b235760e4a 100755 --- a/tests/test_key_revocation_integration.py +++ b/tests/test_key_revocation_integration.py @@ -132,7 +132,7 @@ def setUp(self): # 'path/to/tmp/repository' -> 'localhost:8001/tmp/repository'. repository_basepath = self.repository_directory[len(os.getcwd()):] - url_prefix = 'http://127.0.0.1:' \ + url_prefix = 'http://' + utils.TEST_HOST_ADDRESS + ':' \ + str(self.server_process_handler.port) + repository_basepath # Setting 'tuf.settings.repository_directory' with the temporary client diff --git a/tests/test_mix_and_match_attack.py b/tests/test_mix_and_match_attack.py index a02b598b7a..123c447325 100755 --- a/tests/test_mix_and_match_attack.py +++ b/tests/test_mix_and_match_attack.py @@ -132,7 +132,7 @@ def setUp(self): # Set the url prefix required by the 'tuf/client/updater.py' updater. # 'path/to/tmp/repository' -> 'localhost:8001/tmp/repository'. repository_basepath = self.repository_directory[len(os.getcwd()):] - url_prefix = 'http://127.0.0.1:' \ + url_prefix = 'http://' + utils.TEST_HOST_ADDRESS + ':' \ + str(self.server_process_handler.port) + repository_basepath # Setting 'tuf.settings.repository_directory' with the temporary client diff --git a/tests/test_multiple_repositories_integration.py b/tests/test_multiple_repositories_integration.py index 6c77517b03..de9920244d 100755 --- a/tests/test_multiple_repositories_integration.py +++ b/tests/test_multiple_repositories_integration.py @@ -136,8 +136,12 @@ def setUp(self): logger.debug('Server process 2 started.') - url_prefix = 'http://127.0.0.1:' + str(self.server_process_handler.port) - url_prefix2 = 'http://127.0.0.1:' + str(self.server_process_handler2.port) + url_prefix = \ + 'http://' + utils.TEST_HOST_ADDRESS + ':' + \ + str(self.server_process_handler.port) + url_prefix2 = \ + 'http://' + utils.TEST_HOST_ADDRESS + ':' + \ + str(self.server_process_handler2.port) self.repository_mirrors = {'mirror1': {'url_prefix': url_prefix, 'metadata_path': 'metadata', diff --git a/tests/test_proxy_use.py b/tests/test_proxy_use.py index 56c81e2767..8731b407a6 100755 --- a/tests/test_proxy_use.py +++ b/tests/test_proxy_use.py @@ -97,7 +97,8 @@ def setUpClass(cls): # Note that the HTTP proxy server's address uses http://, regardless of the # type of connection used with the target server. - cls.http_proxy_addr = 'http://127.0.0.1:' + str(cls.http_proxy_handler.port) + cls.http_proxy_addr = 'http://' + utils.TEST_HOST_ADDRESS + ':' + \ + str(cls.http_proxy_handler.port) # Launch an HTTPS proxy server, also derived from inaz2/proxy2. @@ -167,10 +168,12 @@ def setUp(self): suffix = '/' + os.path.basename(target_filepath) self.url = \ - 'http://127.0.0.1:' + str(self.http_server_handler.port) + suffix + 'http://' + utils.TEST_HOST_ADDRESS + ':' + \ + str(self.http_server_handler.port) + suffix self.url_https = \ - 'https://127.0.0.1:' + str(self.https_server_handler.port) + suffix + 'https://' + utils.TEST_HOST_ADDRESS + ':' + \ + str(self.https_server_handler.port) + suffix diff --git a/tests/test_replay_attack.py b/tests/test_replay_attack.py index 64b73229f0..1195d99b22 100755 --- a/tests/test_replay_attack.py +++ b/tests/test_replay_attack.py @@ -132,7 +132,7 @@ def setUp(self): # Set the url prefix required by the 'tuf/client/updater.py' updater. # 'path/to/tmp/repository' -> 'localhost:8001/tmp/repository'. repository_basepath = self.repository_directory[len(os.getcwd()):] - url_prefix = 'http://127.0.0.1:' \ + url_prefix = 'http://' + utils.TEST_HOST_ADDRESS + ':' \ + str(self.server_process_handler.port) + repository_basepath # Setting 'tuf.settings.repository_directory' with the temporary client diff --git a/tests/test_slow_retrieval_attack.py b/tests/test_slow_retrieval_attack.py index ca931f382f..8a56e483ba 100755 --- a/tests/test_slow_retrieval_attack.py +++ b/tests/test_slow_retrieval_attack.py @@ -171,7 +171,7 @@ def setUp(self): logger.info('Slow Retrieval Server process started.') - url_prefix = 'http://127.0.0.1:' \ + url_prefix = 'http://' + utils.TEST_HOST_ADDRESS + ':' \ + str(self.server_process_handler.port) + repository_basepath # Setting 'tuf.settings.repository_directory' with the temporary client diff --git a/tests/test_updater.py b/tests/test_updater.py index 58790cc42e..4654cfab23 100755 --- a/tests/test_updater.py +++ b/tests/test_updater.py @@ -167,7 +167,7 @@ def setUp(self): # 'path/to/tmp/repository' -> 'localhost:8001/tmp/repository'. repository_basepath = self.repository_directory[len(os.getcwd()):] - url_prefix = 'http://127.0.0.1:' \ + url_prefix = 'http://' + utils.TEST_HOST_ADDRESS + ':' \ + str(self.server_process_handler.port) + repository_basepath # Setting 'tuf.settings.repository_directory' with the temporary client @@ -1110,7 +1110,7 @@ def test_6_get_one_valid_targetinfo(self): # 'path/to/tmp/repository' -> 'localhost:8001/tmp/repository'. repository_basepath = self.repository_directory[len(os.getcwd()):] - url_prefix = 'http://127.0.0.1:' \ + url_prefix = 'http://' + utils.TEST_HOST_ADDRESS + ':' \ + str(self.server_process_handler.port) + repository_basepath self.repository_mirrors = {'mirror1': {'url_prefix': url_prefix, @@ -1406,7 +1406,7 @@ def test_7_updated_targets(self): # 'path/to/tmp/repository' -> 'localhost:8001/tmp/repository'. repository_basepath = self.repository_directory[len(os.getcwd()):] - url_prefix = 'http://127.0.0.1:' \ + url_prefix = 'http://' + utils.TEST_HOST_ADDRESS + ':' \ + str(self.server_process_handler.port) + repository_basepath # Setting 'tuf.settings.repository_directory' with the temporary client @@ -1533,7 +1533,7 @@ def test_8_remove_obsolete_targets(self): # 'path/to/tmp/repository' -> 'localhost:8001/tmp/repository'. repository_basepath = self.repository_directory[len(os.getcwd()):] - url_prefix = 'http://127.0.0.1:' \ + url_prefix = 'http://' + utils.TEST_HOST_ADDRESS + ':' \ + str(self.server_process_handler.port) + repository_basepath # Setting 'tuf.settings.repository_directory' with the temporary client @@ -1861,8 +1861,12 @@ def setUp(self): logger.debug('Server process 2 started.') - url_prefix = 'http://127.0.0.1:' + str(self.server_process_handler.port) - url_prefix2 = 'http://127.0.0.1:' + str(self.server_process_handler2.port) + url_prefix = \ + 'http://' + utils.TEST_HOST_ADDRESS + ':' + \ + str(self.server_process_handler.port) + url_prefix2 = \ + 'http://' + utils.TEST_HOST_ADDRESS + ':' + \ + str(self.server_process_handler2.port) # We have all of the necessary information for two repository mirrors # in map.json, except for url prefixes. diff --git a/tests/test_updater_root_rotation_integration.py b/tests/test_updater_root_rotation_integration.py index 24df3b38d2..9182aa6c1f 100755 --- a/tests/test_updater_root_rotation_integration.py +++ b/tests/test_updater_root_rotation_integration.py @@ -140,7 +140,7 @@ def setUp(self): # 'path/to/tmp/repository' -> 'localhost:8001/tmp/repository'. repository_basepath = self.repository_directory[len(os.getcwd()):] - url_prefix = 'http://127.0.0.1:' \ + url_prefix = 'http://' + utils.TEST_HOST_ADDRESS + ':' \ + str(self.server_process_handler.port) + repository_basepath # Setting 'tuf.settings.repository_directory' with the temporary client diff --git a/tests/utils.py b/tests/utils.py index 8e8c07bde0..10a12436ac 100644 --- a/tests/utils.py +++ b/tests/utils.py @@ -39,6 +39,9 @@ logger = logging.getLogger(__name__) +# Used when forming URLs on the client side +TEST_HOST_ADDRESS = '127.0.0.1' + try: # is defined in Python 3 TimeoutError From 93c657300853666a043c07a3adee1c0116627e30 Mon Sep 17 00:00:00 2001 From: Teodora Sechkova Date: Wed, 17 Feb 2021 13:28:22 +0200 Subject: [PATCH 15/15] Apply the new code style to fetcher docstrings Ensure that the newly added files' docstrings adhere to the recently adopted code style guideline (#1232). Small code style improvements in comments and imports. Signed-off-by: Teodora Sechkova --- tests/test_fetcher.py | 17 ++------- tuf/client/fetcher.py | 55 +++++++++++----------------- tuf/requests_fetcher.py | 79 +++++++++++++++++++++++------------------ 3 files changed, 67 insertions(+), 84 deletions(-) diff --git a/tests/test_fetcher.py b/tests/test_fetcher.py index 3bfb578464..312ef80959 100644 --- a/tests/test_fetcher.py +++ b/tests/test_fetcher.py @@ -1,20 +1,9 @@ #!/usr/bin/env python -""" - - test_download.py - - - Teodora Sechkova tsechkova@vmware.com - - - December 18, 2020. - - - See LICENSE-MIT OR LICENSE for licensing information. +# Copyright 2021, New York University and the TUF contributors +# SPDX-License-Identifier: MIT OR Apache-2.0 - - Unit test for RequestsFetcher. +"""Unit test for RequestsFetcher. """ # Help with Python 2 compatibility, where the '/' operator performs # integer division. diff --git a/tuf/client/fetcher.py b/tuf/client/fetcher.py index c7ce99ee0b..8768bdd4b9 100644 --- a/tuf/client/fetcher.py +++ b/tuf/client/fetcher.py @@ -1,53 +1,38 @@ -""" - - fetcher.py - - - Teodora Sechkova - - - December 14, 2020 +# Copyright 2021, New York University and the TUF contributors +# SPDX-License-Identifier: MIT OR Apache-2.0 - - See LICENSE-MIT OR LICENSE for licensing information. - - - Provides an interface for network IO abstraction. +"""Provides an interface for network IO abstraction. """ +# Imports import abc - +# Classes class FetcherInterface(): + """Defines an interface for abstract network download. + + By providing a concrete implementation of the abstract interface, + users of the framework can plug-in their preferred/customized + network stack. """ - - Defines an interface for abstract network download which can be implemented - for a variety of network libraries and configurations. - """ - __metaclass__ = abc.ABCMeta + __metaclass__ = abc.ABCMeta @abc.abstractmethod def fetch(self, url, required_length): - """ - - Fetches the contents of HTTP/HTTPS url from a remote server up to - required_length and returns a bytes iterator. + """Fetches the contents of HTTP/HTTPS url from a remote server. - - url: - A URL string that represents the location of the file. + Ensures the length of the downloaded data is up to 'required_length'. - required_length: - An integer value representing the length of the file in bytes. + Arguments: + url: A URL string that represents a file location. + required_length: An integer value representing the file length in bytes. - - tuf.exceptions.SlowRetrievalError, if a timeout occurs while receiving - data from a server + Raises: + tuf.exceptions.SlowRetrievalError: A timeout occurs while receiving data. + tuf.exceptions.FetcherHTTPError: An HTTP error code is received. - tuf.exceptions.FetcherHTTPError, if an HTTP error code was received - + Returns: A bytes iterator """ raise NotImplementedError # pragma: no cover - diff --git a/tuf/requests_fetcher.py b/tuf/requests_fetcher.py index 8bc70eaeb7..e7d0ef812d 100644 --- a/tuf/requests_fetcher.py +++ b/tuf/requests_fetcher.py @@ -1,59 +1,69 @@ -""" - - fetcher.py - - - Teodora Sechkova - - - December 14, 2020 - - - See LICENSE-MIT OR LICENSE for licensing information. +# Copyright 2021, New York University and the TUF contributors +# SPDX-License-Identifier: MIT OR Apache-2.0 - - Provides an implementation of FetcherInterface using the requests HTTP +"""Provides an implementation of FetcherInterface using the Requests HTTP library. """ +# Imports import requests import six import logging import time import urllib3.exceptions + import tuf.exceptions import tuf.settings -import tuf.client.fetcher -logger = logging.getLogger(__name__) +from tuf.client.fetcher import FetcherInterface +# Globals +logger = logging.getLogger(__name__) -class RequestsFetcher(tuf.client.fetcher.FetcherInterface): - """ - - A concrete implementation of FetcherInterface based on the Requests +# Classess +class RequestsFetcher(FetcherInterface): + """A concrete implementation of FetcherInterface based on the Requests library. + + Attributes: + _sessions: A dictionary of Requests.Session objects storing a separate + session per scheme+hostname combination. """ def __init__(self): # From http://docs.python-requests.org/en/master/user/advanced/#session-objects: # - # "The Session object allows you to persist certain parameters across requests. - # It also persists cookies across all requests made from the Session instance, - # and will use urllib3's connection pooling. So if you're making several - # requests to the same host, the underlying TCP connection will be reused, - # which can result in a significant performance increase (see HTTP persistent - # connection)." + # "The Session object allows you to persist certain parameters across + # requests. It also persists cookies across all requests made from the + # Session instance, and will use urllib3's connection pooling. So if you're + # making several requests to the same host, the underlying TCP connection + # will be reused, which can result in a significant performance increase + # (see HTTP persistent connection)." # - # NOTE: We use a separate requests.Session per scheme+hostname combination, in - # order to reuse connections to the same hostname to improve efficiency, but - # avoiding sharing state between different hosts-scheme combinations to + # NOTE: We use a separate requests.Session per scheme+hostname combination, + # in order to reuse connections to the same hostname to improve efficiency, + # but avoiding sharing state between different hosts-scheme combinations to # minimize subtle security issues. Some cookies may not be HTTP-safe. self._sessions = {} def fetch(self, url, required_length): + """Fetches the contents of HTTP/HTTPS url from a remote server. + + Ensures the length of the downloaded data is up to 'required_length'. + + Arguments: + url: A URL string that represents a file location. + required_length: An integer value representing the file length in bytes. + + Raises: + tuf.exceptions.SlowRetrievalError: A timeout occurs while receiving data. + tuf.exceptions.FetcherHTTPError: An HTTP error code is received. + + Returns: + A bytes iterator + """ # Get a customized session for each new schema+hostname combination. session = self._get_session(url) @@ -81,10 +91,10 @@ def chunks(): bytes_received = 0 while True: # We download a fixed chunk of data in every round. This is so that we - # can defend against slow retrieval attacks. Furthermore, we do not wish - # to download an extremely large file in one shot. - # Before beginning the round, sleep (if set) for a short amount of time - # so that the CPU is not hogged in the while loop. + # can defend against slow retrieval attacks. Furthermore, we do not + # wish to download an extremely large file in one shot. + # Before beginning the round, sleep (if set) for a short amount of + # time so that the CPU is not hogged in the while loop. if tuf.settings.SLEEP_BEFORE_ROUND: time.sleep(tuf.settings.SLEEP_BEFORE_ROUND) @@ -121,8 +131,7 @@ def chunks(): def _get_session(self, url): - """ - Returns a different customized requests.Session per schema+hostname + """Returns a different customized requests.Session per schema+hostname combination. """ # Use a different requests.Session per schema+hostname combination, to