From 66b43fc9d77c4dd5039fafbef35144e77ee5cdbb Mon Sep 17 00:00:00 2001 From: Teodora Sechkova Date: Thu, 25 Mar 2021 16:38:03 +0200 Subject: [PATCH 1/9] Move download modules inside client directory The modules performing network download are used only by the client side of TUF. Move them inside the client directory for the refactored client. Move the _mirror_*download functions from Updater to mirrors.py. Signed-off-by: Teodora Sechkova --- tuf/client_rework/download.py | 323 ++++++++++++++++++++++++++ tuf/client_rework/fetcher.py | 38 +++ tuf/client_rework/mirrors.py | 194 ++++++++++++++++ tuf/client_rework/requests_fetcher.py | 173 ++++++++++++++ tuf/client_rework/updater_rework.py | 102 +++----- 5 files changed, 759 insertions(+), 71 deletions(-) create mode 100644 tuf/client_rework/download.py create mode 100644 tuf/client_rework/fetcher.py create mode 100644 tuf/client_rework/mirrors.py create mode 100644 tuf/client_rework/requests_fetcher.py diff --git a/tuf/client_rework/download.py b/tuf/client_rework/download.py new file mode 100644 index 0000000000..2d946ef891 --- /dev/null +++ b/tuf/client_rework/download.py @@ -0,0 +1,323 @@ +#!/usr/bin/env python + +# Copyright 2012 - 2017, New York University and the TUF contributors +# SPDX-License-Identifier: MIT OR Apache-2.0 + +""" + + download.py + + + February 21, 2012. Based on previous version by Geremy Condra. + + + Konstantin Andrianov + Vladimir Diaz + + + See LICENSE-MIT OR LICENSE for licensing information. + + + Download metadata and target files and check their validity. The hash and + length of a downloaded file has to match the hash and length supplied by the + metadata of that file. +""" + +# Help with Python 3 compatibility, where the print statement is a function, an +# implicit relative import is invalid, and the '/' operator performs true +# division. Example: print 'hello world' raises a 'SyntaxError' exception. +from __future__ import print_function +from __future__ import absolute_import +from __future__ import division +from __future__ import unicode_literals + +import logging +import timeit +import tempfile + +import securesystemslib +import securesystemslib.util +import six + +import tuf +import tuf.exceptions +import tuf.formats + +# See 'log.py' to learn how logging is handled in TUF. +logger = logging.getLogger(__name__) + + +def safe_download(url, required_length, fetcher): + """ + + Given the 'url' and 'required_length' of the desired file, open a connection + to 'url', download it, and return the contents of the file. Also ensure + the length of the downloaded file matches 'required_length' exactly. + tuf.download.unsafe_download() may be called if an upper download limit is + preferred. + + + url: + A URL string that represents the location of the file. + + required_length: + An integer value representing the length of the file. This is an exact + limit. + + fetcher: + An object implementing FetcherInterface that performs the network IO + operations. + + + A file object is created on disk to store the contents of 'url'. + + + tuf.ssl_commons.exceptions.DownloadLengthMismatchError, if there was a + mismatch of observed vs expected lengths while downloading the file. + + securesystemslib.exceptions.FormatError, if any of the arguments are + improperly formatted. + + Any other unforeseen runtime exception. + + + A file object that points to the contents of 'url'. + """ + + # Do all of the arguments have the appropriate format? + # Raise 'securesystemslib.exceptions.FormatError' if there is a mismatch. + securesystemslib.formats.URL_SCHEMA.check_match(url) + tuf.formats.LENGTH_SCHEMA.check_match(required_length) + + return _download_file(url, required_length, fetcher, STRICT_REQUIRED_LENGTH=True) + + + + + +def unsafe_download(url, required_length, fetcher): + """ + + Given the 'url' and 'required_length' of the desired file, open a connection + to 'url', download it, and return the contents of the file. Also ensure + the length of the downloaded file is up to 'required_length', and no larger. + tuf.download.safe_download() may be called if an exact download limit is + preferred. + + + url: + A URL string that represents the location of the file. + + required_length: + An integer value representing the length of the file. This is an upper + limit. + + fetcher: + An object implementing FetcherInterface that performs the network IO + operations. + + + A file object is created on disk to store the contents of 'url'. + + + tuf.ssl_commons.exceptions.DownloadLengthMismatchError, if there was a + mismatch of observed vs expected lengths while downloading the file. + + securesystemslib.exceptions.FormatError, if any of the arguments are + improperly formatted. + + Any other unforeseen runtime exception. + + + A file object that points to the contents of 'url'. + """ + + # Do all of the arguments have the appropriate format? + # Raise 'securesystemslib.exceptions.FormatError' if there is a mismatch. + securesystemslib.formats.URL_SCHEMA.check_match(url) + tuf.formats.LENGTH_SCHEMA.check_match(required_length) + + return _download_file(url, required_length, fetcher, STRICT_REQUIRED_LENGTH=False) + + + + + +def _download_file(url, required_length, fetcher, STRICT_REQUIRED_LENGTH=True): + """ + + Given the url and length of the desired file, this function opens a + connection to 'url' and downloads the file while ensuring its length + matches 'required_length' if 'STRICT_REQUIRED_LENGH' is True (If False, + the file's length is not checked and a slow retrieval exception is raised + if the downloaded rate falls below the acceptable rate). + + + url: + A URL string that represents the location of the file. + + required_length: + An integer value representing the length of the file. + + STRICT_REQUIRED_LENGTH: + A Boolean indicator used to signal whether we should perform strict + checking of required_length. True by default. We explicitly set this to + False when we know that we want to turn this off for downloading the + timestamp metadata, which has no signed required_length. + + + A file object is created on disk to store the contents of 'url'. + + + tuf.exceptions.DownloadLengthMismatchError, if there was a + mismatch of observed vs expected lengths while downloading the file. + + securesystemslib.exceptions.FormatError, if any of the arguments are + improperly formatted. + + Any other unforeseen runtime exception. + + + A file object that points to the contents of 'url'. + """ + # 'url.replace('\\', '/')' is needed for compatibility with Windows-based + # systems, because they might use back-slashes in place of forward-slashes. + # This converts it to the common format. unquote() replaces %xx escapes in a + # url with their single-character equivalent. A back-slash may be encoded as + # %5c in the url, which should also be replaced with a forward slash. + url = six.moves.urllib.parse.unquote(url).replace('\\', '/') + logger.info('Downloading: ' + repr(url)) + + # This is the temporary file that we will return to contain the contents of + # the downloaded file. + temp_file = tempfile.TemporaryFile() + + average_download_speed = 0 + number_of_bytes_received = 0 + + try: + chunks = fetcher.fetch(url, required_length) + start_time = timeit.default_timer() + for chunk in chunks: + + stop_time = timeit.default_timer() + temp_file.write(chunk) + + # Measure the average download speed. + number_of_bytes_received += len(chunk) + seconds_spent_receiving = stop_time - start_time + average_download_speed = number_of_bytes_received / seconds_spent_receiving + + if average_download_speed < tuf.settings.MIN_AVERAGE_DOWNLOAD_SPEED: + logger.debug('The average download speed dropped below the minimum' + ' average download speed set in tuf.settings.py. Stopping the' + ' download!') + break + + else: + logger.debug('The average download speed has not dipped below the' + ' minimum average download speed set in tuf.settings.py.') + + # Does the total number of downloaded bytes match the required length? + _check_downloaded_length(number_of_bytes_received, required_length, + STRICT_REQUIRED_LENGTH=STRICT_REQUIRED_LENGTH, + average_download_speed=average_download_speed) + + except Exception: + # Close 'temp_file'. Any written data is lost. + temp_file.close() + logger.debug('Could not download URL: ' + repr(url)) + raise + + else: + return temp_file + + + + +def _check_downloaded_length(total_downloaded, required_length, + STRICT_REQUIRED_LENGTH=True, + average_download_speed=None): + """ + + A helper function which checks whether the total number of downloaded bytes + matches our expectation. + + + total_downloaded: + The total number of bytes supposedly downloaded for the file in question. + + required_length: + The total number of bytes expected of the file as seen from its metadata. + The Timestamp role is always downloaded without a known file length, and + the Root role when the client cannot download any of the required + top-level roles. In both cases, 'required_length' is actually an upper + limit on the length of the downloaded file. + + STRICT_REQUIRED_LENGTH: + A Boolean indicator used to signal whether we should perform strict + checking of required_length. True by default. We explicitly set this to + False when we know that we want to turn this off for downloading the + timestamp metadata, which has no signed required_length. + + average_download_speed: + The average download speed for the downloaded file. + + + None. + + + securesystemslib.exceptions.DownloadLengthMismatchError, if + STRICT_REQUIRED_LENGTH is True and total_downloaded is not equal + required_length. + + tuf.exceptions.SlowRetrievalError, if the total downloaded was + done in less than the acceptable download speed (as set in + tuf.settings.py). + + + None. + """ + + if total_downloaded == required_length: + logger.info('Downloaded ' + str(total_downloaded) + ' bytes out of the' + ' expected ' + str(required_length) + ' bytes.') + + else: + difference_in_bytes = abs(total_downloaded - required_length) + + # What we downloaded is not equal to the required length, but did we ask + # for strict checking of required length? + if STRICT_REQUIRED_LENGTH: + logger.info('Downloaded ' + str(total_downloaded) + ' bytes, but' + ' expected ' + str(required_length) + ' bytes. There is a difference' + ' of ' + str(difference_in_bytes) + ' bytes.') + + # If the average download speed is below a certain threshold, we flag + # this as a possible slow-retrieval attack. + logger.debug('Average download speed: ' + repr(average_download_speed)) + logger.debug('Minimum average download speed: ' + repr(tuf.settings.MIN_AVERAGE_DOWNLOAD_SPEED)) + + if average_download_speed < tuf.settings.MIN_AVERAGE_DOWNLOAD_SPEED: + raise tuf.exceptions.SlowRetrievalError(average_download_speed) + + else: + logger.debug('Good average download speed: ' + + repr(average_download_speed) + ' bytes per second') + + raise tuf.exceptions.DownloadLengthMismatchError(required_length, total_downloaded) + + else: + # We specifically disabled strict checking of required length, but we + # will log a warning anyway. This is useful when we wish to download the + # Timestamp or Root metadata, for which we have no signed metadata; so, + # we must guess a reasonable required_length for it. + if average_download_speed < tuf.settings.MIN_AVERAGE_DOWNLOAD_SPEED: + raise tuf.exceptions.SlowRetrievalError(average_download_speed) + + else: + logger.debug('Good average download speed: ' + + repr(average_download_speed) + ' bytes per second') + + logger.info('Downloaded ' + str(total_downloaded) + ' bytes out of an' + ' upper limit of ' + str(required_length) + ' bytes.') diff --git a/tuf/client_rework/fetcher.py b/tuf/client_rework/fetcher.py new file mode 100644 index 0000000000..8768bdd4b9 --- /dev/null +++ b/tuf/client_rework/fetcher.py @@ -0,0 +1,38 @@ +# Copyright 2021, New York University and the TUF contributors +# SPDX-License-Identifier: MIT OR Apache-2.0 + +"""Provides an interface for network IO abstraction. +""" + +# Imports +import abc + +# Classes +class FetcherInterface(): + """Defines an interface for abstract network download. + + By providing a concrete implementation of the abstract interface, + users of the framework can plug-in their preferred/customized + network stack. + """ + + __metaclass__ = abc.ABCMeta + + @abc.abstractmethod + def fetch(self, url, required_length): + """Fetches the contents of HTTP/HTTPS url from a remote server. + + Ensures the length of the downloaded data is up to 'required_length'. + + Arguments: + url: A URL string that represents a file location. + required_length: An integer value representing the file length in bytes. + + Raises: + tuf.exceptions.SlowRetrievalError: A timeout occurs while receiving data. + tuf.exceptions.FetcherHTTPError: An HTTP error code is received. + + Returns: + A bytes iterator + """ + raise NotImplementedError # pragma: no cover diff --git a/tuf/client_rework/mirrors.py b/tuf/client_rework/mirrors.py new file mode 100644 index 0000000000..a9e4dd266b --- /dev/null +++ b/tuf/client_rework/mirrors.py @@ -0,0 +1,194 @@ +#!/usr/bin/env python + +# Copyright 2012 - 2017, New York University and the TUF contributors +# SPDX-License-Identifier: MIT OR Apache-2.0 + +""" + + mirrors.py + + + Konstantin Andrianov. + Derived from original mirrors.py written by Geremy Condra. + + + March 12, 2012. + + + See LICENSE-MIT OR LICENSE for licensing information. + + + Extract a list of mirror urls corresponding to the file type and the location + of the file with respect to the base url. +""" + +# Help with Python 3 compatibility, where the print statement is a function, an +# implicit relative import is invalid, and the '/' operator performs true +# division. Example: print 'hello world' raises a 'SyntaxError' exception. +from __future__ import print_function +from __future__ import absolute_import +from __future__ import division +from __future__ import unicode_literals + +from typing import TextIO, BinaryIO, Dict + +import os + +import tuf +import tuf.formats +import tuf.client_rework.download as download + +import securesystemslib +import six + +# The type of file to be downloaded from a repository. The +# 'get_list_of_mirrors' function supports these file types. +_SUPPORTED_FILE_TYPES = ['meta', 'target'] + + +def get_list_of_mirrors(file_type, file_path, mirrors_dict): + """ + + Get a list of mirror urls from a mirrors dictionary, provided the type + and the path of the file with respect to the base url. + + + file_type: + Type of data needed for download, must correspond to one of the strings + in the list ['meta', 'target']. 'meta' for metadata file type or + 'target' for target file type. It should correspond to + NAME_SCHEMA format. + + file_path: + A relative path to the file that corresponds to RELPATH_SCHEMA format. + Ex: 'http://url_prefix/targets_path/file_path' + + mirrors_dict: + A mirrors_dict object that corresponds to MIRRORDICT_SCHEMA, where + keys are strings and values are MIRROR_SCHEMA. An example format + of MIRROR_SCHEMA: + + {'url_prefix': 'http://localhost:8001', + 'metadata_path': 'metadata/', + 'targets_path': 'targets/', + 'confined_target_dirs': ['targets/snapshot1/', ...], + 'custom': {...}} + + The 'custom' field is optional. + + + securesystemslib.exceptions.Error, on unsupported 'file_type'. + + securesystemslib.exceptions.FormatError, on bad argument. + + + List of mirror urls corresponding to the file_type and file_path. If no + match is found, empty list is returned. + """ + + # Checking if all the arguments have appropriate format. + tuf.formats.RELPATH_SCHEMA.check_match(file_path) + tuf.formats.MIRRORDICT_SCHEMA.check_match(mirrors_dict) + securesystemslib.formats.NAME_SCHEMA.check_match(file_type) + + # Verify 'file_type' is supported. + if file_type not in _SUPPORTED_FILE_TYPES: + raise securesystemslib.exceptions.Error('Invalid file_type argument.' + ' Supported file types: ' + repr(_SUPPORTED_FILE_TYPES)) + path_key = 'metadata_path' if file_type == 'meta' else 'targets_path' + + # Reference to 'securesystemslib.util.file_in_confined_directories()' (improve + # readability). This function checks whether a mirror should serve a file to + # the client. A client may be confined to certain paths on a repository + # mirror when fetching target files. This field may be set by the client + # when the repository mirror is added to the 'tuf.client.updater.Updater' + # object. + in_confined_directory = securesystemslib.util.file_in_confined_directories + + list_of_mirrors = [] + for junk, mirror_info in six.iteritems(mirrors_dict): + # Does mirror serve this file type at all? + path = mirror_info.get(path_key) + if path is None: + continue + + # for targets, ensure directory confinement + if path_key == 'targets_path': + full_filepath = os.path.join(path, file_path) + confined_target_dirs = mirror_info.get('confined_target_dirs') + # confined_target_dirs is an optional field + if confined_target_dirs and not in_confined_directory(full_filepath, + confined_target_dirs): + continue + + # urllib.quote(string) replaces special characters in string using the %xx + # escape. This is done to avoid parsing issues of the URL on the server + # side. Do *NOT* pass URLs with Unicode characters without first encoding + # the URL as UTF-8. We need a long-term solution with #61. + # http://bugs.python.org/issue1712522 + file_path = six.moves.urllib.parse.quote(file_path) + url = os.path.join(mirror_info['url_prefix'], path, file_path) + + # The above os.path.join() result as well as input file_path may be + # invalid on windows (might contain both separator types), see #1077. + # Make sure the URL doesn't contain backward slashes on Windows. + list_of_mirrors.append(url.replace('\\', '/')) + + return list_of_mirrors + + +def _mirror_meta_download(filename: str, upper_length: int, + mirrors_config: Dict, + fetcher: "FetcherInterface") -> TextIO: + """ + Download metadata file from the list of metadata mirrors + """ + file_mirrors = get_list_of_mirrors('meta', filename, mirrors_config) + + file_mirror_errors = {} + for file_mirror in file_mirrors: + try: + temp_obj = download.unsafe_download( + file_mirror, + upper_length, + fetcher) + + temp_obj.seek(0) + yield temp_obj + + except Exception as exception: + file_mirror_errors[file_mirror] = exception + + finally: + if file_mirror_errors: + raise tuf.exceptions.NoWorkingMirrorError( + file_mirror_errors) + + +def _mirror_target_download(fileinfo: str, mirrors_config: Dict, + fetcher: "FetcherInterface") -> BinaryIO: + """ + Download target file from the list of target mirrors + """ + # full_filename = _get_full_name(filename) + file_mirrors = get_list_of_mirrors('target', fileinfo['filepath'], + mirrors_config) + + file_mirror_errors = {} + for file_mirror in file_mirrors: + try: + temp_obj = download.safe_download( + file_mirror, + fileinfo['fileinfo']['length'], + fetcher) + + temp_obj.seek(0) + yield temp_obj + + except Exception as exception: + file_mirror_errors[file_mirror] = exception + + finally: + if file_mirror_errors: + raise tuf.exceptions.NoWorkingMirrorError( + file_mirror_errors) diff --git a/tuf/client_rework/requests_fetcher.py b/tuf/client_rework/requests_fetcher.py new file mode 100644 index 0000000000..8074890d25 --- /dev/null +++ b/tuf/client_rework/requests_fetcher.py @@ -0,0 +1,173 @@ +# Copyright 2021, New York University and the TUF contributors +# SPDX-License-Identifier: MIT OR Apache-2.0 + +"""Provides an implementation of FetcherInterface using the Requests HTTP + library. +""" + +# Imports +import requests +import six +import logging +import time + +import urllib3.exceptions + +import tuf.exceptions +import tuf.settings + +from tuf.client_rework.fetcher import FetcherInterface + +# Globals +logger = logging.getLogger(__name__) + +# Classess +class RequestsFetcher(FetcherInterface): + """A concrete implementation of FetcherInterface based on the Requests + library. + + Attributes: + _sessions: A dictionary of Requests.Session objects storing a separate + session per scheme+hostname combination. + """ + + def __init__(self): + # From http://docs.python-requests.org/en/master/user/advanced/#session-objects: + # + # "The Session object allows you to persist certain parameters across + # requests. It also persists cookies across all requests made from the + # Session instance, and will use urllib3's connection pooling. So if you're + # making several requests to the same host, the underlying TCP connection + # will be reused, which can result in a significant performance increase + # (see HTTP persistent connection)." + # + # NOTE: We use a separate requests.Session per scheme+hostname combination, + # in order to reuse connections to the same hostname to improve efficiency, + # but avoiding sharing state between different hosts-scheme combinations to + # minimize subtle security issues. Some cookies may not be HTTP-safe. + self._sessions = {} + + + def fetch(self, url, required_length): + """Fetches the contents of HTTP/HTTPS url from a remote server. + + Ensures the length of the downloaded data is up to 'required_length'. + + Arguments: + url: A URL string that represents a file location. + required_length: An integer value representing the file length in bytes. + + Raises: + tuf.exceptions.SlowRetrievalError: A timeout occurs while receiving data. + tuf.exceptions.FetcherHTTPError: An HTTP error code is received. + + Returns: + A bytes iterator + """ + # Get a customized session for each new schema+hostname combination. + session = self._get_session(url) + + # Get the requests.Response object for this URL. + # + # Defer downloading the response body with stream=True. + # Always set the timeout. This timeout value is interpreted by requests as: + # - connect timeout (max delay before first byte is received) + # - read (gap) timeout (max delay between bytes received) + response = session.get(url, stream=True, + timeout=tuf.settings.SOCKET_TIMEOUT) + # Check response status. + try: + response.raise_for_status() + except requests.HTTPError as e: + response.close() + status = e.response.status_code + raise tuf.exceptions.FetcherHTTPError(str(e), status) + + + # Define a generator function to be returned by fetch. This way the caller + # of fetch can differentiate between connection and actual data download + # and measure download times accordingly. + def chunks(): + try: + bytes_received = 0 + while True: + # We download a fixed chunk of data in every round. This is so that we + # can defend against slow retrieval attacks. Furthermore, we do not + # wish to download an extremely large file in one shot. + # Before beginning the round, sleep (if set) for a short amount of + # time so that the CPU is not hogged in the while loop. + if tuf.settings.SLEEP_BEFORE_ROUND: + time.sleep(tuf.settings.SLEEP_BEFORE_ROUND) + + read_amount = min( + tuf.settings.CHUNK_SIZE, required_length - bytes_received) + + # NOTE: This may not handle some servers adding a Content-Encoding + # header, which may cause urllib3 to misbehave: + # https://github.com/pypa/pip/blob/404838abcca467648180b358598c597b74d568c9/src/pip/_internal/download.py#L547-L582 + data = response.raw.read(read_amount) + bytes_received += len(data) + + # We might have no more data to read. Check number of bytes downloaded. + if not data: + logger.debug('Downloaded ' + repr(bytes_received) + '/' + + repr(required_length) + ' bytes.') + + # Finally, we signal that the download is complete. + break + + yield data + + if bytes_received >= required_length: + break + + except urllib3.exceptions.ReadTimeoutError as e: + raise tuf.exceptions.SlowRetrievalError(str(e)) + + finally: + response.close() + + return chunks() + + + + def _get_session(self, url): + """Returns a different customized requests.Session per schema+hostname + combination. + """ + # Use a different requests.Session per schema+hostname combination, to + # reuse connections while minimizing subtle security issues. + parsed_url = six.moves.urllib.parse.urlparse(url) + + if not parsed_url.scheme or not parsed_url.hostname: + raise tuf.exceptions.URLParsingError( + 'Could not get scheme and hostname from URL: ' + url) + + session_index = parsed_url.scheme + '+' + parsed_url.hostname + + logger.debug('url: ' + url) + logger.debug('session index: ' + session_index) + + session = self._sessions.get(session_index) + + if not session: + session = requests.Session() + self._sessions[session_index] = session + + # Attach some default headers to every Session. + requests_user_agent = session.headers['User-Agent'] + # Follows the RFC: https://tools.ietf.org/html/rfc7231#section-5.5.3 + tuf_user_agent = 'tuf/' + tuf.__version__ + ' ' + requests_user_agent + session.headers.update({ + # Tell the server not to compress or modify anything. + # https://developer.mozilla.org/en-US/docs/Web/HTTP/Headers/Accept-Encoding#Directives + 'Accept-Encoding': 'identity', + # The TUF user agent. + 'User-Agent': tuf_user_agent}) + + logger.debug('Made new session for ' + session_index) + + else: + logger.debug('Reusing session for ' + session_index) + + return session diff --git a/tuf/client_rework/updater_rework.py b/tuf/client_rework/updater_rework.py index 10fdcc415f..0346a34fe9 100644 --- a/tuf/client_rework/updater_rework.py +++ b/tuf/client_rework/updater_rework.py @@ -15,11 +15,11 @@ import securesystemslib.exceptions import securesystemslib.util -import tuf.download import tuf.exceptions import tuf.formats -import tuf.mirrors import tuf.settings + +from tuf.client_rework import mirrors from tuf.client.fetcher import FetcherInterface from tuf.requests_fetcher import RequestsFetcher @@ -30,6 +30,7 @@ TimestampWrapper, ) + # Globals logger = logging.getLogger(__name__) @@ -151,8 +152,11 @@ def download_target(self, target: Dict, destination_directory: str): The file is saved to the 'destination_directory' argument. """ - try: - for temp_obj in self._mirror_target_download(target): + for temp_obj in mirrors._mirror_target_download(target, + self._mirrors, + self._fetcher): + + try: self._verify_target_file(temp_obj, target) # break? should we break after first successful download? @@ -160,66 +164,13 @@ def download_target(self, target: Dict, destination_directory: str): destination_directory, target["filepath"] ) securesystemslib.util.persist_temp_file(temp_obj, filepath) - # pylint: disable=try-except-raise - except Exception: - # TODO: do something with exceptions - raise - - def _mirror_meta_download(self, filename: str, upper_length: int) -> TextIO: - """ - Download metadata file from the list of metadata mirrors - """ - file_mirrors = tuf.mirrors.get_list_of_mirrors( - "meta", filename, self._mirrors - ) - - file_mirror_errors = {} - for file_mirror in file_mirrors: - try: - temp_obj = tuf.download.unsafe_download( - file_mirror, upper_length, self._fetcher - ) - - temp_obj.seek(0) - yield temp_obj - - # pylint: disable=broad-except - except Exception as exception: - file_mirror_errors[file_mirror] = exception - - finally: - if file_mirror_errors: - raise tuf.exceptions.NoWorkingMirrorError( - file_mirror_errors - ) - - def _mirror_target_download(self, fileinfo: str) -> BinaryIO: - """ - Download target file from the list of target mirrors - """ - # full_filename = _get_full_name(filename) - file_mirrors = tuf.mirrors.get_list_of_mirrors( - "target", fileinfo["filepath"], self._mirrors - ) + # pylint: disable=try-except-raise + except Exception: + # TODO: do something with exceptions + raise - file_mirror_errors = {} - for file_mirror in file_mirrors: - try: - temp_obj = tuf.download.safe_download( - file_mirror, fileinfo["fileinfo"]["length"], self._fetcher - ) - temp_obj.seek(0) - yield temp_obj - # pylint: disable=broad-except - except Exception as exception: - file_mirror_errors[file_mirror] = exception - finally: - if file_mirror_errors: - raise tuf.exceptions.NoWorkingMirrorError( - file_mirror_errors - ) def _get_full_meta_name( self, role: str, extension: str = ".json", version: int = None @@ -274,10 +225,11 @@ def _load_root(self) -> None: verified_root = None for next_version in range(lower_bound, upper_bound): try: - mirror_download = self._mirror_meta_download( - self._get_relative_meta_name("root", version=next_version), + mirror_download = mirrors._mirror_meta_download( + self._get_relative_meta_name('root', version=next_version), tuf.settings.DEFAULT_ROOT_REQUIRED_LENGTH, - ) + self._mirrors, + self._fetcher) for temp_obj in mirror_download: try: @@ -335,9 +287,11 @@ def _load_timestamp(self) -> None: TODO """ # TODO Check if timestamp exists locally - for temp_obj in self._mirror_meta_download( - "timestamp.json", tuf.settings.DEFAULT_TIMESTAMP_REQUIRED_LENGTH - ): + for temp_obj in mirrors._mirror_meta_download('timestamp.json', + tuf.settings.DEFAULT_TIMESTAMP_REQUIRED_LENGTH, + self._mirrors, + self._fetcher): + try: verified_tampstamp = self._verify_timestamp(temp_obj) # break? should we break after first successful download? @@ -372,7 +326,11 @@ def _load_snapshot(self) -> None: # Check if exists locally # self.loadLocal('snapshot', snapshotVerifier) - for temp_obj in self._mirror_meta_download("snapshot.json", length): + for temp_obj in mirrors._mirror_meta_download( + 'snapshot.json', length, + self._mirrors, + self._fetcher): + try: verified_snapshot = self._verify_snapshot(temp_obj) # break? should we break after first successful download? @@ -408,9 +366,11 @@ def _load_targets(self, targets_role: str, parent_role: str) -> None: # Check if exists locally # self.loadLocal('snapshot', targetsVerifier) - for temp_obj in self._mirror_meta_download( - targets_role + ".json", length - ): + for temp_obj in mirrors._mirror_meta_download( + targets_role + '.json', length, + self._mirrors, + self._fetcher): + try: verified_targets = self._verify_targets( temp_obj, targets_role, parent_role From ec402cfab1abf6bb43fa53a8c98d23a88ae12a73 Mon Sep 17 00:00:00 2001 From: Teodora Sechkova Date: Thu, 25 Mar 2021 17:02:53 +0200 Subject: [PATCH 2/9] Convert helpers to static methods of Updater Make the helper methods used by the refactored Updater part of the class. Mark them as "staticmethod". Signed-off-by: Teodora Sechkova --- tuf/client_rework/updater_rework.py | 308 ++++++++++++++-------------- 1 file changed, 154 insertions(+), 154 deletions(-) diff --git a/tuf/client_rework/updater_rework.py b/tuf/client_rework/updater_rework.py index 0346a34fe9..4254425d10 100644 --- a/tuf/client_rework/updater_rework.py +++ b/tuf/client_rework/updater_rework.py @@ -240,7 +240,7 @@ def _load_root(self) -> None: except tuf.exceptions.NoWorkingMirrorError as exception: for mirror_error in exception.mirror_errors.values(): - if neither_403_nor_404(mirror_error): + if self.neither_403_nor_404(mirror_error): temp_obj.close() raise @@ -464,7 +464,7 @@ def _verify_snapshot(self, temp_obj: TextIO) -> SnapshotWrapper: # Check against timestamp metadata if self._metadata["timestamp"].snapshot.get("hash"): - _check_hashes( + self._check_hashes( temp_obj, self._metadata["timestamp"].snapshot.get("hash") ) @@ -506,7 +506,7 @@ def _verify_targets( # Check against timestamp metadata if self._metadata["snapshot"].role(filename).get("hash"): - _check_hashes( + self._check_hashes( temp_obj, self._metadata["snapshot"].targets.get("hash") ) @@ -529,14 +529,14 @@ def _verify_targets( return intermediate_targets - @staticmethod - def _verify_target_file(temp_obj: BinaryIO, targetinfo: Dict) -> None: + + def _verify_target_file(self, temp_obj: BinaryIO, targetinfo: Dict) -> None: """ TODO """ - _check_file_length(temp_obj, targetinfo["fileinfo"]["length"]) - _check_hashes(temp_obj, targetinfo["fileinfo"]["hashes"]) + self._check_file_length(temp_obj, targetinfo["fileinfo"]["length"]) + self._check_hashes(temp_obj, targetinfo["fileinfo"]["hashes"]) def _preorder_depth_first_walk(self, target_filepath) -> Dict: """ @@ -596,7 +596,7 @@ def _preorder_depth_first_walk(self, target_filepath) -> Dict: # NOTE: This may be a slow operation if there are many # delegated roles. for child_role in child_roles: - child_role_name = _visit_child_role( + child_role_name = self._visit_child_role( child_role, target_filepath ) @@ -651,167 +651,167 @@ def _preorder_depth_first_walk(self, target_filepath) -> Dict: return {"filepath": target_filepath, "fileinfo": target} + @staticmethod + def _visit_child_role(child_role: Dict, target_filepath: str) -> str: + """ + + Non-public method that determines whether the given 'target_filepath' + is an allowed path of 'child_role'. -def _visit_child_role(child_role: Dict, target_filepath: str) -> str: - """ - - Non-public method that determines whether the given 'target_filepath' - is an allowed path of 'child_role'. - - Ensure that we explore only delegated roles trusted with the target. The - metadata for 'child_role' should have been refreshed prior to this point, - however, the paths/targets that 'child_role' signs for have not been - verified (as intended). The paths/targets that 'child_role' is allowed - to specify in its metadata depends on the delegating role, and thus is - left to the caller to verify. We verify here that 'target_filepath' - is an allowed path according to the delegated 'child_role'. - - TODO: Should the TUF spec restrict the repository to one particular - algorithm? Should we allow the repository to specify in the role - dictionary the algorithm used for these generated hashed paths? - - - child_role: - The delegation targets role object of 'child_role', containing its - paths, path_hash_prefixes, keys, and so on. - - target_filepath: - The path to the target file on the repository. This will be relative to - the 'targets' (or equivalent) directory on a given mirror. - - - None. - - - None. - - - If 'child_role' has been delegated the target with the name - 'target_filepath', then we return the role name of 'child_role'. - - Otherwise, we return None. - """ + Ensure that we explore only delegated roles trusted with the target. The + metadata for 'child_role' should have been refreshed prior to this point, + however, the paths/targets that 'child_role' signs for have not been + verified (as intended). The paths/targets that 'child_role' is allowed + to specify in its metadata depends on the delegating role, and thus is + left to the caller to verify. We verify here that 'target_filepath' + is an allowed path according to the delegated 'child_role'. - child_role_name = child_role["name"] - child_role_paths = child_role.get("paths") - child_role_path_hash_prefixes = child_role.get("path_hash_prefixes") + TODO: Should the TUF spec restrict the repository to one particular + algorithm? Should we allow the repository to specify in the role + dictionary the algorithm used for these generated hashed paths? - if child_role_path_hash_prefixes is not None: - target_filepath_hash = _get_target_hash(target_filepath) - for child_role_path_hash_prefix in child_role_path_hash_prefixes: - if not target_filepath_hash.startswith(child_role_path_hash_prefix): - continue + + child_role: + The delegation targets role object of 'child_role', containing its + paths, path_hash_prefixes, keys, and so on. - return child_role_name - - elif child_role_paths is not None: - # Is 'child_role_name' allowed to sign for 'target_filepath'? - for child_role_path in child_role_paths: - # A child role path may be an explicit path or glob pattern (Unix - # shell-style wildcards). The child role 'child_role_name' is - # returned if 'target_filepath' is equal to or matches - # 'child_role_path'. Explicit filepaths are also considered - # matches. A repo maintainer might delegate a glob pattern with a - # leading path separator, while the client requests a matching - # target without a leading path separator - make sure to strip any - # leading path separators so that a match is made. - # Example: "foo.tgz" should match with "/*.tgz". - if fnmatch.fnmatch( - target_filepath.lstrip(os.sep), child_role_path.lstrip(os.sep) - ): - logger.debug( - "Child role " - + repr(child_role_name) - + " is allowed to sign for " - + repr(target_filepath) - ) + target_filepath: + The path to the target file on the repository. This will be relative to + the 'targets' (or equivalent) directory on a given mirror. - return child_role_name + + None. - logger.debug( - "The given target path " - + repr(target_filepath) - + " does not match the trusted path or glob pattern: " - + repr(child_role_path) - ) - continue - - else: - # 'role_name' should have been validated when it was downloaded. - # The 'paths' or 'path_hash_prefixes' fields should not be missing, - # so we raise a format error here in case they are both missing. - raise tuf.exceptions.FormatError( - repr(child_role_name) + " " - 'has neither a "paths" nor "path_hash_prefixes". At least' - " one of these attributes must be present." - ) + + None. - return None + + If 'child_role' has been delegated the target with the name + 'target_filepath', then we return the role name of 'child_role'. + Otherwise, we return None. + """ -def _check_file_length(file_object, trusted_file_length): - """ - TODO - """ - file_object.seek(0, 2) - observed_length = file_object.tell() - - # Return and log a message if the length 'file_object' is equal to - # 'trusted_file_length', otherwise raise an exception. A hard check - # ensures that a downloaded file strictly matches a known, or trusted, - # file length. - if observed_length != trusted_file_length: - raise tuf.exceptions.DownloadLengthMismatchError( - trusted_file_length, observed_length - ) + child_role_name = child_role["name"] + child_role_paths = child_role.get("paths") + child_role_path_hash_prefixes = child_role.get("path_hash_prefixes") + if child_role_path_hash_prefixes is not None: + target_filepath_hash = self._get_target_hash(target_filepath) + for child_role_path_hash_prefix in child_role_path_hash_prefixes: + if not target_filepath_hash.startswith(child_role_path_hash_prefix): + continue -def _check_hashes(file_object, trusted_hashes): - """ - TODO - """ - # Verify each trusted hash of 'trusted_hashes'. If all are valid, simply - # return. - for algorithm, trusted_hash in trusted_hashes.items(): - digest_object = securesystemslib.hash.digest(algorithm) - # Ensure we read from the beginning of the file object - # TODO: should we store file position (before the loop) and reset - # after we seek about? - file_object.seek(0) - digest_object.update(file_object.read()) - computed_hash = digest_object.hexdigest() - - # Raise an exception if any of the hashes are incorrect. - if trusted_hash != computed_hash: - raise securesystemslib.exceptions.BadHashError( - trusted_hash, computed_hash + return child_role_name + + elif child_role_paths is not None: + # Is 'child_role_name' allowed to sign for 'target_filepath'? + for child_role_path in child_role_paths: + # A child role path may be an explicit path or glob pattern (Unix + # shell-style wildcards). The child role 'child_role_name' is + # returned if 'target_filepath' is equal to or matches + # 'child_role_path'. Explicit filepaths are also considered + # matches. A repo maintainer might delegate a glob pattern with a + # leading path separator, while the client requests a matching + # target without a leading path separator - make sure to strip any + # leading path separators so that a match is made. + # Example: "foo.tgz" should match with "/*.tgz". + if fnmatch.fnmatch( + target_filepath.lstrip(os.sep), child_role_path.lstrip(os.sep) + ): + logger.debug( + "Child role " + + repr(child_role_name) + + " is allowed to sign for " + + repr(target_filepath) + ) + + return child_role_name + + logger.debug( + "The given target path " + + repr(target_filepath) + + " does not match the trusted path or glob pattern: " + + repr(child_role_path) + ) + continue + + else: + # 'role_name' should have been validated when it was downloaded. + # The 'paths' or 'path_hash_prefixes' fields should not be missing, + # so we raise a format error here in case they are both missing. + raise tuf.exceptions.FormatError( + repr(child_role_name) + " " + 'has neither a "paths" nor "path_hash_prefixes". At least' + " one of these attributes must be present." ) - logger.info( - "The file's " + algorithm + " hash is" " correct: " + trusted_hash - ) + return None + @staticmethod + def _check_file_length(file_object, trusted_file_length): + """ + TODO + """ + file_object.seek(0, 2) + observed_length = file_object.tell() -def _get_target_hash(target_filepath, hash_function="sha256"): - """ - TODO - """ - # Calculate the hash of the filepath to determine which bin to find the - # target. The client currently assumes the repository (i.e., repository - # tool) uses 'hash_function' to generate hashes and UTF-8. - digest_object = securesystemslib.hash.digest(hash_function) - encoded_target_filepath = target_filepath.encode("utf-8") - digest_object.update(encoded_target_filepath) - target_filepath_hash = digest_object.hexdigest() + # Return and log a message if the length 'file_object' is equal to + # 'trusted_file_length', otherwise raise an exception. A hard check + # ensures that a downloaded file strictly matches a known, or trusted, + # file length. + if observed_length != trusted_file_length: + raise tuf.exceptions.DownloadLengthMismatchError( + trusted_file_length, observed_length + ) - return target_filepath_hash + @staticmethod + def _check_hashes(file_object, trusted_hashes): + """ + TODO + """ + # Verify each trusted hash of 'trusted_hashes'. If all are valid, simply + # return. + for algorithm, trusted_hash in trusted_hashes.items(): + digest_object = securesystemslib.hash.digest(algorithm) + # Ensure we read from the beginning of the file object + # TODO: should we store file position (before the loop) and reset + # after we seek about? + file_object.seek(0) + digest_object.update(file_object.read()) + computed_hash = digest_object.hexdigest() + + # Raise an exception if any of the hashes are incorrect. + if trusted_hash != computed_hash: + raise securesystemslib.exceptions.BadHashError( + trusted_hash, computed_hash + ) + logger.info( + "The file's " + algorithm + " hash is" " correct: " + trusted_hash + ) -def neither_403_nor_404(mirror_error): - """ - TODO - """ - if isinstance(mirror_error, tuf.exceptions.FetcherHTTPError): - if mirror_error.status_code in {403, 404}: - return False - return True + @staticmethod + def _get_target_hash(target_filepath, hash_function="sha256"): + """ + TODO + """ + # Calculate the hash of the filepath to determine which bin to find the + # target. The client currently assumes the repository (i.e., repository + # tool) uses 'hash_function' to generate hashes and UTF-8. + digest_object = securesystemslib.hash.digest(hash_function) + encoded_target_filepath = target_filepath.encode("utf-8") + digest_object.update(encoded_target_filepath) + target_filepath_hash = digest_object.hexdigest() + + return target_filepath_hash + + @staticmethod + def neither_403_nor_404(mirror_error): + """ + TODO + """ + if isinstance(mirror_error, tuf.exceptions.FetcherHTTPError): + if mirror_error.status_code in {403, 404}: + return False + return True From 741ba4dc2fc3d458a30d5a8014931b0ce51e1764 Mon Sep 17 00:00:00 2001 From: Teodora Sechkova Date: Tue, 30 Mar 2021 14:53:30 +0300 Subject: [PATCH 3/9] Create MetadataUpdater class Move the code that implements the update of the metadata files to a separate class called MetadataUpdater. Signed-off-by: Teodora Sechkova --- tuf/client_rework/updater_rework.py | 205 +++++++++++++++++----------- 1 file changed, 127 insertions(+), 78 deletions(-) diff --git a/tuf/client_rework/updater_rework.py b/tuf/client_rework/updater_rework.py index 4254425d10..3457727bcd 100644 --- a/tuf/client_rework/updater_rework.py +++ b/tuf/client_rework/updater_rework.py @@ -58,16 +58,17 @@ def __init__( fetcher: Optional[FetcherInterface] = None, ): - self._repository_name = repository_name self._mirrors = repository_mirrors - self._consistent_snapshot = False - self._metadata = {} if fetcher is None: self._fetcher = RequestsFetcher() else: self._fetcher = fetcher + self._metadata = MetadataUpdater( + repository_name, self._mirrors, self._fetcher + ) + def refresh(self) -> None: """ This method downloads, verifies, and loads metadata for the top-level @@ -82,10 +83,7 @@ def refresh(self) -> None: requests. """ - self._load_root() - self._load_timestamp() - self._load_snapshot() - self._load_targets("targets", "root") + self._metadata.refresh() def get_one_valid_targetinfo(self, filename: str) -> Dict: """ @@ -93,7 +91,7 @@ def get_one_valid_targetinfo(self, filename: str) -> Dict: file path. This target method also downloads the metadata of updated targets. """ - return self._preorder_depth_first_walk(filename) + return self._metadata.preorder_depth_first_walk(filename) @staticmethod def updated_targets(targets: Dict, destination_directory: str) -> Dict: @@ -152,9 +150,9 @@ def download_target(self, target: Dict, destination_directory: str): The file is saved to the 'destination_directory' argument. """ - for temp_obj in mirrors._mirror_target_download(target, - self._mirrors, - self._fetcher): + for temp_obj in mirrors._mirror_target_download( + target, self._mirrors, self._fetcher + ): try: self._verify_target_file(temp_obj, target) @@ -169,8 +167,79 @@ def download_target(self, target: Dict, destination_directory: str): # TODO: do something with exceptions raise + def _verify_target_file(self, temp_obj: BinaryIO, targetinfo: Dict) -> None: + """ + TODO + """ + + self._check_file_length(temp_obj, targetinfo["fileinfo"]["length"]) + _check_hashes(temp_obj, targetinfo["fileinfo"]["hashes"]) + @staticmethod + def _check_file_length(file_object, trusted_file_length): + """ + TODO + """ + file_object.seek(0, 2) + observed_length = file_object.tell() + # Return and log a message if the length 'file_object' is equal to + # 'trusted_file_length', otherwise raise an exception. A hard check + # ensures that a downloaded file strictly matches a known, or trusted, + # file length. + if observed_length != trusted_file_length: + raise tuf.exceptions.DownloadLengthMismatchError( + trusted_file_length, observed_length + ) + + @staticmethod + def _get_target_hash(target_filepath, hash_function="sha256"): + """ + TODO + """ + # Calculate the hash of the filepath to determine which bin to find the + # target. The client currently assumes the repository (i.e., repository + # tool) uses 'hash_function' to generate hashes and UTF-8. + digest_object = securesystemslib.hash.digest(hash_function) + encoded_target_filepath = target_filepath.encode("utf-8") + digest_object.update(encoded_target_filepath) + target_filepath_hash = digest_object.hexdigest() + + return target_filepath_hash + + +class MetadataUpdater: + def __init__( + self, + repository_name: str, + repository_mirrors: Dict, + fetcher: FetcherInterface, + ): + + self._repository_name = repository_name + self._mirrors = repository_mirrors + self._fetcher = fetcher + + self._metadata = {} + + def refresh(self) -> None: + """ + This method downloads, verifies, and loads metadata for the top-level + roles in a specific order (root -> timestamp -> snapshot -> targets) + The expiration time for downloaded metadata is also verified. + + The metadata for delegated roles are not refreshed by this method, but + by the method that returns targetinfo (i.e., + get_one_valid_targetinfo()). + + The refresh() method should be called by the client before any target + requests. + """ + + self._load_root() + self._load_timestamp() + self._load_snapshot() + self._load_targets("targets", "root") def _get_full_meta_name( self, role: str, extension: str = ".json", version: int = None @@ -226,10 +295,11 @@ def _load_root(self) -> None: for next_version in range(lower_bound, upper_bound): try: mirror_download = mirrors._mirror_meta_download( - self._get_relative_meta_name('root', version=next_version), + self._get_relative_meta_name("root", version=next_version), tuf.settings.DEFAULT_ROOT_REQUIRED_LENGTH, self._mirrors, - self._fetcher) + self._fetcher, + ) for temp_obj in mirror_download: try: @@ -287,10 +357,12 @@ def _load_timestamp(self) -> None: TODO """ # TODO Check if timestamp exists locally - for temp_obj in mirrors._mirror_meta_download('timestamp.json', + for temp_obj in mirrors._mirror_meta_download( + "timestamp.json", tuf.settings.DEFAULT_TIMESTAMP_REQUIRED_LENGTH, self._mirrors, - self._fetcher): + self._fetcher, + ): try: verified_tampstamp = self._verify_timestamp(temp_obj) @@ -327,9 +399,8 @@ def _load_snapshot(self) -> None: # Check if exists locally # self.loadLocal('snapshot', snapshotVerifier) for temp_obj in mirrors._mirror_meta_download( - 'snapshot.json', length, - self._mirrors, - self._fetcher): + "snapshot.json", length, self._mirrors, self._fetcher + ): try: verified_snapshot = self._verify_snapshot(temp_obj) @@ -367,9 +438,8 @@ def _load_targets(self, targets_role: str, parent_role: str) -> None: # self.loadLocal('snapshot', targetsVerifier) for temp_obj in mirrors._mirror_meta_download( - targets_role + '.json', length, - self._mirrors, - self._fetcher): + targets_role + ".json", length, self._mirrors, self._fetcher + ): try: verified_targets = self._verify_targets( @@ -464,7 +534,7 @@ def _verify_snapshot(self, temp_obj: TextIO) -> SnapshotWrapper: # Check against timestamp metadata if self._metadata["timestamp"].snapshot.get("hash"): - self._check_hashes( + _check_hashes( temp_obj, self._metadata["timestamp"].snapshot.get("hash") ) @@ -506,7 +576,7 @@ def _verify_targets( # Check against timestamp metadata if self._metadata["snapshot"].role(filename).get("hash"): - self._check_hashes( + _check_hashes( temp_obj, self._metadata["snapshot"].targets.get("hash") ) @@ -529,16 +599,7 @@ def _verify_targets( return intermediate_targets - - def _verify_target_file(self, temp_obj: BinaryIO, targetinfo: Dict) -> None: - """ - TODO - """ - - self._check_file_length(temp_obj, targetinfo["fileinfo"]["length"]) - self._check_hashes(temp_obj, targetinfo["fileinfo"]["hashes"]) - - def _preorder_depth_first_walk(self, target_filepath) -> Dict: + def preorder_depth_first_walk(self, target_filepath) -> Dict: """ TODO """ @@ -699,7 +760,9 @@ def _visit_child_role(child_role: Dict, target_filepath: str) -> str: if child_role_path_hash_prefixes is not None: target_filepath_hash = self._get_target_hash(target_filepath) for child_role_path_hash_prefix in child_role_path_hash_prefixes: - if not target_filepath_hash.startswith(child_role_path_hash_prefix): + if not target_filepath_hash.startswith( + child_role_path_hash_prefix + ): continue return child_role_name @@ -717,7 +780,8 @@ def _visit_child_role(child_role: Dict, target_filepath: str) -> str: # leading path separators so that a match is made. # Example: "foo.tgz" should match with "/*.tgz". if fnmatch.fnmatch( - target_filepath.lstrip(os.sep), child_role_path.lstrip(os.sep) + target_filepath.lstrip(os.sep), + child_role_path.lstrip(os.sep), ): logger.debug( "Child role " @@ -748,49 +812,6 @@ def _visit_child_role(child_role: Dict, target_filepath: str) -> str: return None - @staticmethod - def _check_file_length(file_object, trusted_file_length): - """ - TODO - """ - file_object.seek(0, 2) - observed_length = file_object.tell() - - # Return and log a message if the length 'file_object' is equal to - # 'trusted_file_length', otherwise raise an exception. A hard check - # ensures that a downloaded file strictly matches a known, or trusted, - # file length. - if observed_length != trusted_file_length: - raise tuf.exceptions.DownloadLengthMismatchError( - trusted_file_length, observed_length - ) - - @staticmethod - def _check_hashes(file_object, trusted_hashes): - """ - TODO - """ - # Verify each trusted hash of 'trusted_hashes'. If all are valid, simply - # return. - for algorithm, trusted_hash in trusted_hashes.items(): - digest_object = securesystemslib.hash.digest(algorithm) - # Ensure we read from the beginning of the file object - # TODO: should we store file position (before the loop) and reset - # after we seek about? - file_object.seek(0) - digest_object.update(file_object.read()) - computed_hash = digest_object.hexdigest() - - # Raise an exception if any of the hashes are incorrect. - if trusted_hash != computed_hash: - raise securesystemslib.exceptions.BadHashError( - trusted_hash, computed_hash - ) - - logger.info( - "The file's " + algorithm + " hash is" " correct: " + trusted_hash - ) - @staticmethod def _get_target_hash(target_filepath, hash_function="sha256"): """ @@ -815,3 +836,31 @@ def neither_403_nor_404(mirror_error): if mirror_error.status_code in {403, 404}: return False return True + + +# FIXME: _check_hashes is moved outside the classes so that it can be reused. +# Find a proper class design to avoid this. +def _check_hashes(file_object, trusted_hashes): + """ + TODO + """ + # Verify each trusted hash of 'trusted_hashes'. If all are valid, simply + # return. + for algorithm, trusted_hash in trusted_hashes.items(): + digest_object = securesystemslib.hash.digest(algorithm) + # Ensure we read from the beginning of the file object + # TODO: should we store file position (before the loop) and reset + # after we seek about? + file_object.seek(0) + digest_object.update(file_object.read()) + computed_hash = digest_object.hexdigest() + + # Raise an exception if any of the hashes are incorrect. + if trusted_hash != computed_hash: + raise securesystemslib.exceptions.BadHashError( + trusted_hash, computed_hash + ) + + logger.info( + "The file's " + algorithm + " hash is" " correct: " + trusted_hash + ) From f36f39b08e2249d4d99d63a62d119d5727c4949e Mon Sep 17 00:00:00 2001 From: Teodora Sechkova Date: Tue, 30 Mar 2021 17:41:11 +0300 Subject: [PATCH 4/9] Create TargetUpdater class Move target files update code to a separate class. Signed-off-by: Teodora Sechkova --- tuf/client_rework/updater_rework.py | 223 +++++++++++++++------------- 1 file changed, 116 insertions(+), 107 deletions(-) diff --git a/tuf/client_rework/updater_rework.py b/tuf/client_rework/updater_rework.py index 3457727bcd..eb652e716c 100644 --- a/tuf/client_rework/updater_rework.py +++ b/tuf/client_rework/updater_rework.py @@ -10,7 +10,7 @@ import fnmatch import logging import os -from typing import BinaryIO, Dict, Optional, TextIO +from typing import Dict, Optional, TextIO import securesystemslib.exceptions import securesystemslib.util @@ -58,17 +58,15 @@ def __init__( fetcher: Optional[FetcherInterface] = None, ): - self._mirrors = repository_mirrors - if fetcher is None: - self._fetcher = RequestsFetcher() - else: - self._fetcher = fetcher + fetcher = RequestsFetcher() self._metadata = MetadataUpdater( - repository_name, self._mirrors, self._fetcher + repository_name, repository_mirrors, fetcher ) + self._target_updater = TargetUpdater(repository_mirrors, fetcher) + def refresh(self) -> None: """ This method downloads, verifies, and loads metadata for the top-level @@ -93,8 +91,9 @@ def get_one_valid_targetinfo(self, filename: str) -> Dict: """ return self._metadata.preorder_depth_first_walk(filename) - @staticmethod - def updated_targets(targets: Dict, destination_directory: str) -> Dict: + def updated_targets( + self, targets: Dict, destination_directory: str + ) -> Dict: """ After the client has retrieved the target information for those targets they are interested in updating, they would call this method to @@ -102,110 +101,17 @@ def updated_targets(targets: Dict, destination_directory: str) -> Dict: All the targets that have changed are returns in a list. From this list, they can request a download by calling 'download_target()'. """ - # Keep track of the target objects and filepaths of updated targets. - # Return 'updated_targets' and use 'updated_targetpaths' to avoid - # duplicates. - updated_targets = [] - updated_targetpaths = [] - - for target in targets: - # Prepend 'destination_directory' to the target's relative filepath - # (as stored in metadata.) Verify the hash of 'target_filepath' - # against each hash listed for its fileinfo. Note: join() discards - # 'destination_directory' if 'filepath' contains a leading path - # separator (i.e., is treated as an absolute path). - filepath = target["filepath"] - target_filepath = os.path.join(destination_directory, filepath) - - if target_filepath in updated_targetpaths: - continue - - # Try one of the algorithm/digest combos for a mismatch. We break - # as soon as we find a mismatch. - for algorithm, digest in target["fileinfo"]["hashes"].items(): - digest_object = None - try: - digest_object = securesystemslib.hash.digest_filename( - target_filepath, algorithm=algorithm - ) - - # This exception will occur if the target does not exist - # locally. - except securesystemslib.exceptions.StorageError: - updated_targets.append(target) - updated_targetpaths.append(target_filepath) - break - - # The file does exist locally, check if its hash differs. - if digest_object.hexdigest() != digest: - updated_targets.append(target) - updated_targetpaths.append(target_filepath) - break - - return updated_targets + return self._target_updater.updated_targets( + targets, destination_directory + ) - def download_target(self, target: Dict, destination_directory: str): + def download_target(self, target: Dict, destination_directory: str) -> None: """ This method performs the actual download of the specified target. The file is saved to the 'destination_directory' argument. """ - for temp_obj in mirrors._mirror_target_download( - target, self._mirrors, self._fetcher - ): - - try: - self._verify_target_file(temp_obj, target) - # break? should we break after first successful download? - - filepath = os.path.join( - destination_directory, target["filepath"] - ) - securesystemslib.util.persist_temp_file(temp_obj, filepath) - # pylint: disable=try-except-raise - except Exception: - # TODO: do something with exceptions - raise - - def _verify_target_file(self, temp_obj: BinaryIO, targetinfo: Dict) -> None: - """ - TODO - """ - - self._check_file_length(temp_obj, targetinfo["fileinfo"]["length"]) - _check_hashes(temp_obj, targetinfo["fileinfo"]["hashes"]) - - @staticmethod - def _check_file_length(file_object, trusted_file_length): - """ - TODO - """ - file_object.seek(0, 2) - observed_length = file_object.tell() - - # Return and log a message if the length 'file_object' is equal to - # 'trusted_file_length', otherwise raise an exception. A hard check - # ensures that a downloaded file strictly matches a known, or trusted, - # file length. - if observed_length != trusted_file_length: - raise tuf.exceptions.DownloadLengthMismatchError( - trusted_file_length, observed_length - ) - - @staticmethod - def _get_target_hash(target_filepath, hash_function="sha256"): - """ - TODO - """ - # Calculate the hash of the filepath to determine which bin to find the - # target. The client currently assumes the repository (i.e., repository - # tool) uses 'hash_function' to generate hashes and UTF-8. - digest_object = securesystemslib.hash.digest(hash_function) - encoded_target_filepath = target_filepath.encode("utf-8") - digest_object.update(encoded_target_filepath) - target_filepath_hash = digest_object.hexdigest() - - return target_filepath_hash + self._target_updater.download_target(target, destination_directory) class MetadataUpdater: @@ -838,6 +744,109 @@ def neither_403_nor_404(mirror_error): return True +class TargetUpdater: + def __init__( + self, + repository_mirrors: Dict, + fetcher: FetcherInterface, + ): + + self._mirrors = repository_mirrors + self._fetcher = fetcher + + @staticmethod + def updated_targets(targets: Dict, destination_directory: str) -> Dict: + """ + After the client has retrieved the target information for those targets + they are interested in updating, they would call this method to + determine which targets have changed from those saved locally on disk. + All the targets that have changed are returns in a list. From this + list, they can request a download by calling 'download_target()'. + """ + # Keep track of the target objects and filepaths of updated targets. + # Return 'updated_targets' and use 'updated_targetpaths' to avoid + # duplicates. + updated_targets = [] + updated_targetpaths = [] + + for target in targets: + # Prepend 'destination_directory' to the target's relative filepath + # (as stored in metadata.) Verify the hash of 'target_filepath' + # against each hash listed for its fileinfo. Note: join() discards + # 'destination_directory' if 'filepath' contains a leading path + # separator (i.e., is treated as an absolute path). + filepath = target["filepath"] + target_filepath = os.path.join(destination_directory, filepath) + + if target_filepath in updated_targetpaths: + continue + + # Try one of the algorithm/digest combos for a mismatch. We break + # as soon as we find a mismatch. + for algorithm, digest in target["fileinfo"]["hashes"].items(): + digest_object = None + try: + digest_object = securesystemslib.hash.digest_filename( + target_filepath, algorithm=algorithm + ) + + # This exception will occur if the target does not exist + # locally. + except securesystemslib.exceptions.StorageError: + updated_targets.append(target) + updated_targetpaths.append(target_filepath) + break + + # The file does exist locally, check if its hash differs. + if digest_object.hexdigest() != digest: + updated_targets.append(target) + updated_targetpaths.append(target_filepath) + break + + return updated_targets + + def download_target(self, target: Dict, destination_directory: str): + """ + This method performs the actual download of the specified target. + The file is saved to the 'destination_directory' argument. + """ + + for temp_obj in mirrors._mirror_target_download( + target, self._mirrors, self._fetcher + ): + + try: + self._check_file_length(temp_obj, target["fileinfo"]["length"]) + _check_hashes(temp_obj, target["fileinfo"]["hashes"]) + # break? should we break after first successful download? + + filepath = os.path.join( + destination_directory, target["filepath"] + ) + securesystemslib.util.persist_temp_file(temp_obj, filepath) + # pylint: disable=try-except-raise + except Exception: + # TODO: do something with exceptions + raise + + @staticmethod + def _check_file_length(file_object, trusted_file_length): + """ + TODO + """ + file_object.seek(0, 2) + observed_length = file_object.tell() + + # Return and log a message if the length 'file_object' is equal to + # 'trusted_file_length', otherwise raise an exception. A hard check + # ensures that a downloaded file strictly matches a known, or trusted, + # file length. + if observed_length != trusted_file_length: + raise tuf.exceptions.DownloadLengthMismatchError( + trusted_file_length, observed_length + ) + + # FIXME: _check_hashes is moved outside the classes so that it can be reused. # Find a proper class design to avoid this. def _check_hashes(file_object, trusted_hashes): From 4548cf02a9cb7fe37a9167e6c3976538a947b63f Mon Sep 17 00:00:00 2001 From: Teodora Sechkova Date: Wed, 31 Mar 2021 12:38:06 +0300 Subject: [PATCH 5/9] Remove (un)safe_download functions The two functions safe/unsafe_download differ only by setting a single boolean flag. Remove them and call directly _download_file instead. Signed-off-by: Teodora Sechkova --- tuf/client_rework/download.py | 103 ++-------------------------------- tuf/client_rework/mirrors.py | 7 ++- 2 files changed, 10 insertions(+), 100 deletions(-) diff --git a/tuf/client_rework/download.py b/tuf/client_rework/download.py index 2d946ef891..4cc855f372 100644 --- a/tuf/client_rework/download.py +++ b/tuf/client_rework/download.py @@ -47,103 +47,7 @@ logger = logging.getLogger(__name__) -def safe_download(url, required_length, fetcher): - """ - - Given the 'url' and 'required_length' of the desired file, open a connection - to 'url', download it, and return the contents of the file. Also ensure - the length of the downloaded file matches 'required_length' exactly. - tuf.download.unsafe_download() may be called if an upper download limit is - preferred. - - - url: - A URL string that represents the location of the file. - - required_length: - An integer value representing the length of the file. This is an exact - limit. - - fetcher: - An object implementing FetcherInterface that performs the network IO - operations. - - - A file object is created on disk to store the contents of 'url'. - - - tuf.ssl_commons.exceptions.DownloadLengthMismatchError, if there was a - mismatch of observed vs expected lengths while downloading the file. - - securesystemslib.exceptions.FormatError, if any of the arguments are - improperly formatted. - - Any other unforeseen runtime exception. - - - A file object that points to the contents of 'url'. - """ - - # Do all of the arguments have the appropriate format? - # Raise 'securesystemslib.exceptions.FormatError' if there is a mismatch. - securesystemslib.formats.URL_SCHEMA.check_match(url) - tuf.formats.LENGTH_SCHEMA.check_match(required_length) - - return _download_file(url, required_length, fetcher, STRICT_REQUIRED_LENGTH=True) - - - - - -def unsafe_download(url, required_length, fetcher): - """ - - Given the 'url' and 'required_length' of the desired file, open a connection - to 'url', download it, and return the contents of the file. Also ensure - the length of the downloaded file is up to 'required_length', and no larger. - tuf.download.safe_download() may be called if an exact download limit is - preferred. - - - url: - A URL string that represents the location of the file. - - required_length: - An integer value representing the length of the file. This is an upper - limit. - - fetcher: - An object implementing FetcherInterface that performs the network IO - operations. - - - A file object is created on disk to store the contents of 'url'. - - - tuf.ssl_commons.exceptions.DownloadLengthMismatchError, if there was a - mismatch of observed vs expected lengths while downloading the file. - - securesystemslib.exceptions.FormatError, if any of the arguments are - improperly formatted. - - Any other unforeseen runtime exception. - - - A file object that points to the contents of 'url'. - """ - - # Do all of the arguments have the appropriate format? - # Raise 'securesystemslib.exceptions.FormatError' if there is a mismatch. - securesystemslib.formats.URL_SCHEMA.check_match(url) - tuf.formats.LENGTH_SCHEMA.check_match(required_length) - - return _download_file(url, required_length, fetcher, STRICT_REQUIRED_LENGTH=False) - - - - - -def _download_file(url, required_length, fetcher, STRICT_REQUIRED_LENGTH=True): +def download_file(url, required_length, fetcher, STRICT_REQUIRED_LENGTH=True): """ Given the url and length of the desired file, this function opens a @@ -180,6 +84,11 @@ def _download_file(url, required_length, fetcher, STRICT_REQUIRED_LENGTH=True): A file object that points to the contents of 'url'. """ + # Do all of the arguments have the appropriate format? + # Raise 'securesystemslib.exceptions.FormatError' if there is a mismatch. + securesystemslib.formats.URL_SCHEMA.check_match(url) + tuf.formats.LENGTH_SCHEMA.check_match(required_length) + # 'url.replace('\\', '/')' is needed for compatibility with Windows-based # systems, because they might use back-slashes in place of forward-slashes. # This converts it to the common format. unquote() replaces %xx escapes in a diff --git a/tuf/client_rework/mirrors.py b/tuf/client_rework/mirrors.py index a9e4dd266b..2b7682645f 100644 --- a/tuf/client_rework/mirrors.py +++ b/tuf/client_rework/mirrors.py @@ -148,10 +148,11 @@ def _mirror_meta_download(filename: str, upper_length: int, file_mirror_errors = {} for file_mirror in file_mirrors: try: - temp_obj = download.unsafe_download( + temp_obj = download.download_file( file_mirror, upper_length, - fetcher) + fetcher, + STRICT_REQUIRED_LENGTH=False) temp_obj.seek(0) yield temp_obj @@ -177,7 +178,7 @@ def _mirror_target_download(fileinfo: str, mirrors_config: Dict, file_mirror_errors = {} for file_mirror in file_mirrors: try: - temp_obj = download.safe_download( + temp_obj = download.download_file( file_mirror, fileinfo['fileinfo']['length'], fetcher) From e9db24618f35d467193be5e7d025921c8e9a2c9d Mon Sep 17 00:00:00 2001 From: Teodora Sechkova Date: Wed, 31 Mar 2021 13:58:57 +0300 Subject: [PATCH 6/9] Wrap mirrors inside Mirrors class Wrap the functionality from the mirrors.py module inside a Mirrors class. Apply black and isort over the old-style mirrors code. Signed-off-by: Teodora Sechkova --- tuf/client_rework/mirrors.py | 308 +++++++++++++--------------- tuf/client_rework/updater_rework.py | 42 ++-- 2 files changed, 164 insertions(+), 186 deletions(-) diff --git a/tuf/client_rework/mirrors.py b/tuf/client_rework/mirrors.py index 2b7682645f..4a59ab9150 100644 --- a/tuf/client_rework/mirrors.py +++ b/tuf/client_rework/mirrors.py @@ -22,174 +22,162 @@ of the file with respect to the base url. """ + # Help with Python 3 compatibility, where the print statement is a function, an # implicit relative import is invalid, and the '/' operator performs true # division. Example: print 'hello world' raises a 'SyntaxError' exception. -from __future__ import print_function -from __future__ import absolute_import -from __future__ import division -from __future__ import unicode_literals - -from typing import TextIO, BinaryIO, Dict +from __future__ import ( + absolute_import, + division, + print_function, + unicode_literals, +) import os - -import tuf -import tuf.formats -import tuf.client_rework.download as download +from typing import BinaryIO, Dict, TextIO import securesystemslib import six +import tuf +import tuf.client_rework.download as download +import tuf.formats + # The type of file to be downloaded from a repository. The # 'get_list_of_mirrors' function supports these file types. -_SUPPORTED_FILE_TYPES = ['meta', 'target'] - - -def get_list_of_mirrors(file_type, file_path, mirrors_dict): - """ - - Get a list of mirror urls from a mirrors dictionary, provided the type - and the path of the file with respect to the base url. - - - file_type: - Type of data needed for download, must correspond to one of the strings - in the list ['meta', 'target']. 'meta' for metadata file type or - 'target' for target file type. It should correspond to - NAME_SCHEMA format. - - file_path: - A relative path to the file that corresponds to RELPATH_SCHEMA format. - Ex: 'http://url_prefix/targets_path/file_path' - - mirrors_dict: - A mirrors_dict object that corresponds to MIRRORDICT_SCHEMA, where - keys are strings and values are MIRROR_SCHEMA. An example format - of MIRROR_SCHEMA: - - {'url_prefix': 'http://localhost:8001', - 'metadata_path': 'metadata/', - 'targets_path': 'targets/', - 'confined_target_dirs': ['targets/snapshot1/', ...], - 'custom': {...}} - - The 'custom' field is optional. - - - securesystemslib.exceptions.Error, on unsupported 'file_type'. - - securesystemslib.exceptions.FormatError, on bad argument. - - - List of mirror urls corresponding to the file_type and file_path. If no - match is found, empty list is returned. - """ - - # Checking if all the arguments have appropriate format. - tuf.formats.RELPATH_SCHEMA.check_match(file_path) - tuf.formats.MIRRORDICT_SCHEMA.check_match(mirrors_dict) - securesystemslib.formats.NAME_SCHEMA.check_match(file_type) - - # Verify 'file_type' is supported. - if file_type not in _SUPPORTED_FILE_TYPES: - raise securesystemslib.exceptions.Error('Invalid file_type argument.' - ' Supported file types: ' + repr(_SUPPORTED_FILE_TYPES)) - path_key = 'metadata_path' if file_type == 'meta' else 'targets_path' - - # Reference to 'securesystemslib.util.file_in_confined_directories()' (improve - # readability). This function checks whether a mirror should serve a file to - # the client. A client may be confined to certain paths on a repository - # mirror when fetching target files. This field may be set by the client - # when the repository mirror is added to the 'tuf.client.updater.Updater' - # object. - in_confined_directory = securesystemslib.util.file_in_confined_directories - - list_of_mirrors = [] - for junk, mirror_info in six.iteritems(mirrors_dict): - # Does mirror serve this file type at all? - path = mirror_info.get(path_key) - if path is None: - continue - - # for targets, ensure directory confinement - if path_key == 'targets_path': - full_filepath = os.path.join(path, file_path) - confined_target_dirs = mirror_info.get('confined_target_dirs') - # confined_target_dirs is an optional field - if confined_target_dirs and not in_confined_directory(full_filepath, - confined_target_dirs): - continue - - # urllib.quote(string) replaces special characters in string using the %xx - # escape. This is done to avoid parsing issues of the URL on the server - # side. Do *NOT* pass URLs with Unicode characters without first encoding - # the URL as UTF-8. We need a long-term solution with #61. - # http://bugs.python.org/issue1712522 - file_path = six.moves.urllib.parse.quote(file_path) - url = os.path.join(mirror_info['url_prefix'], path, file_path) - - # The above os.path.join() result as well as input file_path may be - # invalid on windows (might contain both separator types), see #1077. - # Make sure the URL doesn't contain backward slashes on Windows. - list_of_mirrors.append(url.replace('\\', '/')) - - return list_of_mirrors - - -def _mirror_meta_download(filename: str, upper_length: int, - mirrors_config: Dict, - fetcher: "FetcherInterface") -> TextIO: - """ - Download metadata file from the list of metadata mirrors - """ - file_mirrors = get_list_of_mirrors('meta', filename, mirrors_config) - - file_mirror_errors = {} - for file_mirror in file_mirrors: - try: - temp_obj = download.download_file( - file_mirror, - upper_length, - fetcher, - STRICT_REQUIRED_LENGTH=False) - - temp_obj.seek(0) - yield temp_obj - - except Exception as exception: - file_mirror_errors[file_mirror] = exception - - finally: - if file_mirror_errors: - raise tuf.exceptions.NoWorkingMirrorError( - file_mirror_errors) - - -def _mirror_target_download(fileinfo: str, mirrors_config: Dict, - fetcher: "FetcherInterface") -> BinaryIO: - """ - Download target file from the list of target mirrors - """ - # full_filename = _get_full_name(filename) - file_mirrors = get_list_of_mirrors('target', fileinfo['filepath'], - mirrors_config) - - file_mirror_errors = {} - for file_mirror in file_mirrors: - try: - temp_obj = download.download_file( - file_mirror, - fileinfo['fileinfo']['length'], - fetcher) - - temp_obj.seek(0) - yield temp_obj - - except Exception as exception: - file_mirror_errors[file_mirror] = exception - - finally: - if file_mirror_errors: - raise tuf.exceptions.NoWorkingMirrorError( - file_mirror_errors) +_SUPPORTED_FILE_TYPES = ["meta", "target"] + + +class Mirrors: + def __init__(self, mirrors_dict: Dict): + tuf.formats.MIRRORDICT_SCHEMA.check_match(mirrors_dict) + self._config = mirrors_dict + + def _get_list_of_mirrors(self, file_type, file_path): + """ + + Get a list of mirror urls from a mirrors dictionary, provided the type + and the path of the file with respect to the base url. + + + file_type: + Type of data needed for download, must correspond to one of the strings + in the list ['meta', 'target']. 'meta' for metadata file type or + 'target' for target file type. It should correspond to + NAME_SCHEMA format. + + file_path: + A relative path to the file that corresponds to RELPATH_SCHEMA format. + Ex: 'http://url_prefix/targets_path/file_path' + + + securesystemslib.exceptions.Error, on unsupported 'file_type'. + + securesystemslib.exceptions.FormatError, on bad argument. + + + List of mirror urls corresponding to the file_type and file_path. If no + match is found, empty list is returned. + """ + + # Checking if all the arguments have appropriate format. + tuf.formats.RELPATH_SCHEMA.check_match(file_path) + securesystemslib.formats.NAME_SCHEMA.check_match(file_type) + + # Verify 'file_type' is supported. + if file_type not in _SUPPORTED_FILE_TYPES: + raise sslib_exceptions.Error( + "Invalid file_type argument." + " Supported file types: " + repr(_SUPPORTED_FILE_TYPES) + ) + path_key = "metadata_path" if file_type == "meta" else "targets_path" + + list_of_mirrors = [] + for junk, mirror_info in six.iteritems(self._config): + # Does mirror serve this file type at all? + path = mirror_info.get(path_key) + if path is None: + continue + + # for targets, ensure directory confinement + if path_key == "targets_path": + full_filepath = os.path.join(path, file_path) + confined_target_dirs = mirror_info.get("confined_target_dirs") + # confined_target_dirs is optional and can used to confine the client to + # certain paths on a repository mirror when fetching target files. + if confined_target_dirs and not file_in_confined_directories( + full_filepath, confined_target_dirs + ): + continue + + # urllib.quote(string) replaces special characters in string using the %xx + # escape. This is done to avoid parsing issues of the URL on the server + # side. Do *NOT* pass URLs with Unicode characters without first encoding + # the URL as UTF-8. We need a long-term solution with #61. + # http://bugs.python.org/issue1712522 + file_path = six.moves.urllib.parse.quote(file_path) + url = os.path.join(mirror_info["url_prefix"], path, file_path) + + # The above os.path.join() result as well as input file_path may be + # invalid on windows (might contain both separator types), see #1077. + # Make sure the URL doesn't contain backward slashes on Windows. + list_of_mirrors.append(url.replace("\\", "/")) + + return list_of_mirrors + + def meta_download( + self, filename: str, upper_length: int, fetcher: "FetcherInterface" + ) -> TextIO: + """ + Download metadata file from the list of metadata mirrors + """ + file_mirrors = self._get_list_of_mirrors("meta", filename) + + file_mirror_errors = {} + for file_mirror in file_mirrors: + try: + temp_obj = download.download_file( + file_mirror, + upper_length, + fetcher, + STRICT_REQUIRED_LENGTH=False, + ) + + temp_obj.seek(0) + yield temp_obj + + except Exception as exception: + file_mirror_errors[file_mirror] = exception + + finally: + if file_mirror_errors: + raise tuf.exceptions.NoWorkingMirrorError( + file_mirror_errors + ) + + def target_download( + self, filename: str, strict_length: int, fetcher: "FetcherInterface" + ) -> BinaryIO: + """ + Download target file from the list of target mirrors + """ + file_mirrors = self._get_list_of_mirrors("target", filename) + + file_mirror_errors = {} + for file_mirror in file_mirrors: + try: + temp_obj = download.download_file( + file_mirror, strict_length, fetcher + ) + + temp_obj.seek(0) + yield temp_obj + + except Exception as exception: + file_mirror_errors[file_mirror] = exception + + finally: + if file_mirror_errors: + raise tuf.exceptions.NoWorkingMirrorError( + file_mirror_errors + ) diff --git a/tuf/client_rework/updater_rework.py b/tuf/client_rework/updater_rework.py index eb652e716c..57203193b7 100644 --- a/tuf/client_rework/updater_rework.py +++ b/tuf/client_rework/updater_rework.py @@ -18,9 +18,8 @@ import tuf.exceptions import tuf.formats import tuf.settings - -from tuf.client_rework import mirrors from tuf.client.fetcher import FetcherInterface +from tuf.client_rework.mirrors import Mirrors from tuf.requests_fetcher import RequestsFetcher from .metadata_wrapper import ( @@ -30,7 +29,6 @@ TimestampWrapper, ) - # Globals logger = logging.getLogger(__name__) @@ -61,11 +59,9 @@ def __init__( if fetcher is None: fetcher = RequestsFetcher() - self._metadata = MetadataUpdater( - repository_name, repository_mirrors, fetcher - ) - - self._target_updater = TargetUpdater(repository_mirrors, fetcher) + mirrors = Mirrors(repository_mirrors) + self._metadata = MetadataUpdater(repository_name, mirrors, fetcher) + self._target_updater = TargetUpdater(mirrors, fetcher) def refresh(self) -> None: """ @@ -118,12 +114,12 @@ class MetadataUpdater: def __init__( self, repository_name: str, - repository_mirrors: Dict, + mirrors: "Mirrors", fetcher: FetcherInterface, ): self._repository_name = repository_name - self._mirrors = repository_mirrors + self._mirrors = mirrors self._fetcher = fetcher self._metadata = {} @@ -200,10 +196,9 @@ def _load_root(self) -> None: verified_root = None for next_version in range(lower_bound, upper_bound): try: - mirror_download = mirrors._mirror_meta_download( + mirror_download = self._mirrors.meta_download( self._get_relative_meta_name("root", version=next_version), tuf.settings.DEFAULT_ROOT_REQUIRED_LENGTH, - self._mirrors, self._fetcher, ) @@ -263,10 +258,9 @@ def _load_timestamp(self) -> None: TODO """ # TODO Check if timestamp exists locally - for temp_obj in mirrors._mirror_meta_download( + for temp_obj in self._mirrors.meta_download( "timestamp.json", tuf.settings.DEFAULT_TIMESTAMP_REQUIRED_LENGTH, - self._mirrors, self._fetcher, ): @@ -304,8 +298,8 @@ def _load_snapshot(self) -> None: # Check if exists locally # self.loadLocal('snapshot', snapshotVerifier) - for temp_obj in mirrors._mirror_meta_download( - "snapshot.json", length, self._mirrors, self._fetcher + for temp_obj in self._mirrors.meta_download( + "snapshot.json", length, self._fetcher ): try: @@ -343,8 +337,8 @@ def _load_targets(self, targets_role: str, parent_role: str) -> None: # Check if exists locally # self.loadLocal('snapshot', targetsVerifier) - for temp_obj in mirrors._mirror_meta_download( - targets_role + ".json", length, self._mirrors, self._fetcher + for temp_obj in self._mirrors.meta_download( + targets_role + ".json", length, self._fetcher ): try: @@ -745,13 +739,9 @@ def neither_403_nor_404(mirror_error): class TargetUpdater: - def __init__( - self, - repository_mirrors: Dict, - fetcher: FetcherInterface, - ): + def __init__(self, mirrors: "Mirrors", fetcher: FetcherInterface): - self._mirrors = repository_mirrors + self._mirrors = mirrors self._fetcher = fetcher @staticmethod @@ -811,8 +801,8 @@ def download_target(self, target: Dict, destination_directory: str): The file is saved to the 'destination_directory' argument. """ - for temp_obj in mirrors._mirror_target_download( - target, self._mirrors, self._fetcher + for temp_obj in self._mirrors.target_download( + target["filepath"], target["fileinfo"]["length"], self._fetcher ): try: From aa74477b80163730c83254b22c27de7ce2c07c71 Mon Sep 17 00:00:00 2001 From: Teodora Sechkova Date: Wed, 31 Mar 2021 14:13:47 +0300 Subject: [PATCH 7/9] Move "fetcher" to Mirrors class Make fetcher a member of Mirrors class. Move the instantiation of the default fetcher object to the Mirrors constructor. Signed-off-by: Teodora Sechkova --- tuf/client_rework/mirrors.py | 24 +++++++++++-------- tuf/client_rework/updater_rework.py | 37 ++++++++--------------------- 2 files changed, 24 insertions(+), 37 deletions(-) diff --git a/tuf/client_rework/mirrors.py b/tuf/client_rework/mirrors.py index 4a59ab9150..eef385617e 100644 --- a/tuf/client_rework/mirrors.py +++ b/tuf/client_rework/mirrors.py @@ -34,7 +34,7 @@ ) import os -from typing import BinaryIO, Dict, TextIO +from typing import BinaryIO, Dict, Optional, TextIO import securesystemslib import six @@ -42,6 +42,7 @@ import tuf import tuf.client_rework.download as download import tuf.formats +from tuf.requests_fetcher import RequestsFetcher # The type of file to be downloaded from a repository. The # 'get_list_of_mirrors' function supports these file types. @@ -49,10 +50,17 @@ class Mirrors: - def __init__(self, mirrors_dict: Dict): + def __init__( + self, mirrors_dict: Dict, fetcher: Optional["FetcherInterface"] = None + ): tuf.formats.MIRRORDICT_SCHEMA.check_match(mirrors_dict) self._config = mirrors_dict + if fetcher is None: + self._fetcher = RequestsFetcher() + else: + self._fetcher = fetcher + def _get_list_of_mirrors(self, file_type, file_path): """ @@ -125,9 +133,7 @@ def _get_list_of_mirrors(self, file_type, file_path): return list_of_mirrors - def meta_download( - self, filename: str, upper_length: int, fetcher: "FetcherInterface" - ) -> TextIO: + def meta_download(self, filename: str, upper_length: int) -> TextIO: """ Download metadata file from the list of metadata mirrors """ @@ -139,7 +145,7 @@ def meta_download( temp_obj = download.download_file( file_mirror, upper_length, - fetcher, + self._fetcher, STRICT_REQUIRED_LENGTH=False, ) @@ -155,9 +161,7 @@ def meta_download( file_mirror_errors ) - def target_download( - self, filename: str, strict_length: int, fetcher: "FetcherInterface" - ) -> BinaryIO: + def target_download(self, filename: str, strict_length: int) -> BinaryIO: """ Download target file from the list of target mirrors """ @@ -167,7 +171,7 @@ def target_download( for file_mirror in file_mirrors: try: temp_obj = download.download_file( - file_mirror, strict_length, fetcher + file_mirror, strict_length, self._fetcher ) temp_obj.seek(0) diff --git a/tuf/client_rework/updater_rework.py b/tuf/client_rework/updater_rework.py index 57203193b7..c2ed0636ea 100644 --- a/tuf/client_rework/updater_rework.py +++ b/tuf/client_rework/updater_rework.py @@ -18,9 +18,7 @@ import tuf.exceptions import tuf.formats import tuf.settings -from tuf.client.fetcher import FetcherInterface from tuf.client_rework.mirrors import Mirrors -from tuf.requests_fetcher import RequestsFetcher from .metadata_wrapper import ( RootWrapper, @@ -53,15 +51,12 @@ def __init__( self, repository_name: str, repository_mirrors: Dict, - fetcher: Optional[FetcherInterface] = None, + fetcher: Optional["FetcherInterface"] = None, ): - if fetcher is None: - fetcher = RequestsFetcher() - - mirrors = Mirrors(repository_mirrors) - self._metadata = MetadataUpdater(repository_name, mirrors, fetcher) - self._target_updater = TargetUpdater(mirrors, fetcher) + mirrors = Mirrors(repository_mirrors, fetcher) + self._metadata = MetadataUpdater(repository_name, mirrors) + self._target_updater = TargetUpdater(mirrors) def refresh(self) -> None: """ @@ -111,16 +106,10 @@ def download_target(self, target: Dict, destination_directory: str) -> None: class MetadataUpdater: - def __init__( - self, - repository_name: str, - mirrors: "Mirrors", - fetcher: FetcherInterface, - ): + def __init__(self, repository_name: str, mirrors: "Mirrors"): self._repository_name = repository_name self._mirrors = mirrors - self._fetcher = fetcher self._metadata = {} @@ -199,7 +188,6 @@ def _load_root(self) -> None: mirror_download = self._mirrors.meta_download( self._get_relative_meta_name("root", version=next_version), tuf.settings.DEFAULT_ROOT_REQUIRED_LENGTH, - self._fetcher, ) for temp_obj in mirror_download: @@ -259,9 +247,7 @@ def _load_timestamp(self) -> None: """ # TODO Check if timestamp exists locally for temp_obj in self._mirrors.meta_download( - "timestamp.json", - tuf.settings.DEFAULT_TIMESTAMP_REQUIRED_LENGTH, - self._fetcher, + "timestamp.json", tuf.settings.DEFAULT_TIMESTAMP_REQUIRED_LENGTH ): try: @@ -298,9 +284,7 @@ def _load_snapshot(self) -> None: # Check if exists locally # self.loadLocal('snapshot', snapshotVerifier) - for temp_obj in self._mirrors.meta_download( - "snapshot.json", length, self._fetcher - ): + for temp_obj in self._mirrors.meta_download("snapshot.json", length): try: verified_snapshot = self._verify_snapshot(temp_obj) @@ -338,7 +322,7 @@ def _load_targets(self, targets_role: str, parent_role: str) -> None: # self.loadLocal('snapshot', targetsVerifier) for temp_obj in self._mirrors.meta_download( - targets_role + ".json", length, self._fetcher + targets_role + ".json", length ): try: @@ -739,10 +723,9 @@ def neither_403_nor_404(mirror_error): class TargetUpdater: - def __init__(self, mirrors: "Mirrors", fetcher: FetcherInterface): + def __init__(self, mirrors: "Mirrors"): self._mirrors = mirrors - self._fetcher = fetcher @staticmethod def updated_targets(targets: Dict, destination_directory: str) -> Dict: @@ -802,7 +785,7 @@ def download_target(self, target: Dict, destination_directory: str): """ for temp_obj in self._mirrors.target_download( - target["filepath"], target["fileinfo"]["length"], self._fetcher + target["filepath"], target["fileinfo"]["length"] ): try: From 0637ec8e58ba39d9f9fff1549a2515416adeed3a Mon Sep 17 00:00:00 2001 From: Teodora Sechkova Date: Wed, 31 Mar 2021 14:34:27 +0300 Subject: [PATCH 8/9] Merge download and mirrors modules Move the functionality from the download module inside Mirrors class. Signed-off-by: Teodora Sechkova --- tuf/client_rework/download.py | 232 -------------- tuf/client_rework/mirrors.py | 187 ------------ tuf/client_rework/mirrors_download.py | 417 ++++++++++++++++++++++++++ tuf/client_rework/updater_rework.py | 2 +- 4 files changed, 418 insertions(+), 420 deletions(-) delete mode 100644 tuf/client_rework/download.py delete mode 100644 tuf/client_rework/mirrors.py create mode 100644 tuf/client_rework/mirrors_download.py diff --git a/tuf/client_rework/download.py b/tuf/client_rework/download.py deleted file mode 100644 index 4cc855f372..0000000000 --- a/tuf/client_rework/download.py +++ /dev/null @@ -1,232 +0,0 @@ -#!/usr/bin/env python - -# Copyright 2012 - 2017, New York University and the TUF contributors -# SPDX-License-Identifier: MIT OR Apache-2.0 - -""" - - download.py - - - February 21, 2012. Based on previous version by Geremy Condra. - - - Konstantin Andrianov - Vladimir Diaz - - - See LICENSE-MIT OR LICENSE for licensing information. - - - Download metadata and target files and check their validity. The hash and - length of a downloaded file has to match the hash and length supplied by the - metadata of that file. -""" - -# Help with Python 3 compatibility, where the print statement is a function, an -# implicit relative import is invalid, and the '/' operator performs true -# division. Example: print 'hello world' raises a 'SyntaxError' exception. -from __future__ import print_function -from __future__ import absolute_import -from __future__ import division -from __future__ import unicode_literals - -import logging -import timeit -import tempfile - -import securesystemslib -import securesystemslib.util -import six - -import tuf -import tuf.exceptions -import tuf.formats - -# See 'log.py' to learn how logging is handled in TUF. -logger = logging.getLogger(__name__) - - -def download_file(url, required_length, fetcher, STRICT_REQUIRED_LENGTH=True): - """ - - Given the url and length of the desired file, this function opens a - connection to 'url' and downloads the file while ensuring its length - matches 'required_length' if 'STRICT_REQUIRED_LENGH' is True (If False, - the file's length is not checked and a slow retrieval exception is raised - if the downloaded rate falls below the acceptable rate). - - - url: - A URL string that represents the location of the file. - - required_length: - An integer value representing the length of the file. - - STRICT_REQUIRED_LENGTH: - A Boolean indicator used to signal whether we should perform strict - checking of required_length. True by default. We explicitly set this to - False when we know that we want to turn this off for downloading the - timestamp metadata, which has no signed required_length. - - - A file object is created on disk to store the contents of 'url'. - - - tuf.exceptions.DownloadLengthMismatchError, if there was a - mismatch of observed vs expected lengths while downloading the file. - - securesystemslib.exceptions.FormatError, if any of the arguments are - improperly formatted. - - Any other unforeseen runtime exception. - - - A file object that points to the contents of 'url'. - """ - # Do all of the arguments have the appropriate format? - # Raise 'securesystemslib.exceptions.FormatError' if there is a mismatch. - securesystemslib.formats.URL_SCHEMA.check_match(url) - tuf.formats.LENGTH_SCHEMA.check_match(required_length) - - # 'url.replace('\\', '/')' is needed for compatibility with Windows-based - # systems, because they might use back-slashes in place of forward-slashes. - # This converts it to the common format. unquote() replaces %xx escapes in a - # url with their single-character equivalent. A back-slash may be encoded as - # %5c in the url, which should also be replaced with a forward slash. - url = six.moves.urllib.parse.unquote(url).replace('\\', '/') - logger.info('Downloading: ' + repr(url)) - - # This is the temporary file that we will return to contain the contents of - # the downloaded file. - temp_file = tempfile.TemporaryFile() - - average_download_speed = 0 - number_of_bytes_received = 0 - - try: - chunks = fetcher.fetch(url, required_length) - start_time = timeit.default_timer() - for chunk in chunks: - - stop_time = timeit.default_timer() - temp_file.write(chunk) - - # Measure the average download speed. - number_of_bytes_received += len(chunk) - seconds_spent_receiving = stop_time - start_time - average_download_speed = number_of_bytes_received / seconds_spent_receiving - - if average_download_speed < tuf.settings.MIN_AVERAGE_DOWNLOAD_SPEED: - logger.debug('The average download speed dropped below the minimum' - ' average download speed set in tuf.settings.py. Stopping the' - ' download!') - break - - else: - logger.debug('The average download speed has not dipped below the' - ' minimum average download speed set in tuf.settings.py.') - - # Does the total number of downloaded bytes match the required length? - _check_downloaded_length(number_of_bytes_received, required_length, - STRICT_REQUIRED_LENGTH=STRICT_REQUIRED_LENGTH, - average_download_speed=average_download_speed) - - except Exception: - # Close 'temp_file'. Any written data is lost. - temp_file.close() - logger.debug('Could not download URL: ' + repr(url)) - raise - - else: - return temp_file - - - - -def _check_downloaded_length(total_downloaded, required_length, - STRICT_REQUIRED_LENGTH=True, - average_download_speed=None): - """ - - A helper function which checks whether the total number of downloaded bytes - matches our expectation. - - - total_downloaded: - The total number of bytes supposedly downloaded for the file in question. - - required_length: - The total number of bytes expected of the file as seen from its metadata. - The Timestamp role is always downloaded without a known file length, and - the Root role when the client cannot download any of the required - top-level roles. In both cases, 'required_length' is actually an upper - limit on the length of the downloaded file. - - STRICT_REQUIRED_LENGTH: - A Boolean indicator used to signal whether we should perform strict - checking of required_length. True by default. We explicitly set this to - False when we know that we want to turn this off for downloading the - timestamp metadata, which has no signed required_length. - - average_download_speed: - The average download speed for the downloaded file. - - - None. - - - securesystemslib.exceptions.DownloadLengthMismatchError, if - STRICT_REQUIRED_LENGTH is True and total_downloaded is not equal - required_length. - - tuf.exceptions.SlowRetrievalError, if the total downloaded was - done in less than the acceptable download speed (as set in - tuf.settings.py). - - - None. - """ - - if total_downloaded == required_length: - logger.info('Downloaded ' + str(total_downloaded) + ' bytes out of the' - ' expected ' + str(required_length) + ' bytes.') - - else: - difference_in_bytes = abs(total_downloaded - required_length) - - # What we downloaded is not equal to the required length, but did we ask - # for strict checking of required length? - if STRICT_REQUIRED_LENGTH: - logger.info('Downloaded ' + str(total_downloaded) + ' bytes, but' - ' expected ' + str(required_length) + ' bytes. There is a difference' - ' of ' + str(difference_in_bytes) + ' bytes.') - - # If the average download speed is below a certain threshold, we flag - # this as a possible slow-retrieval attack. - logger.debug('Average download speed: ' + repr(average_download_speed)) - logger.debug('Minimum average download speed: ' + repr(tuf.settings.MIN_AVERAGE_DOWNLOAD_SPEED)) - - if average_download_speed < tuf.settings.MIN_AVERAGE_DOWNLOAD_SPEED: - raise tuf.exceptions.SlowRetrievalError(average_download_speed) - - else: - logger.debug('Good average download speed: ' + - repr(average_download_speed) + ' bytes per second') - - raise tuf.exceptions.DownloadLengthMismatchError(required_length, total_downloaded) - - else: - # We specifically disabled strict checking of required length, but we - # will log a warning anyway. This is useful when we wish to download the - # Timestamp or Root metadata, for which we have no signed metadata; so, - # we must guess a reasonable required_length for it. - if average_download_speed < tuf.settings.MIN_AVERAGE_DOWNLOAD_SPEED: - raise tuf.exceptions.SlowRetrievalError(average_download_speed) - - else: - logger.debug('Good average download speed: ' + - repr(average_download_speed) + ' bytes per second') - - logger.info('Downloaded ' + str(total_downloaded) + ' bytes out of an' - ' upper limit of ' + str(required_length) + ' bytes.') diff --git a/tuf/client_rework/mirrors.py b/tuf/client_rework/mirrors.py deleted file mode 100644 index eef385617e..0000000000 --- a/tuf/client_rework/mirrors.py +++ /dev/null @@ -1,187 +0,0 @@ -#!/usr/bin/env python - -# Copyright 2012 - 2017, New York University and the TUF contributors -# SPDX-License-Identifier: MIT OR Apache-2.0 - -""" - - mirrors.py - - - Konstantin Andrianov. - Derived from original mirrors.py written by Geremy Condra. - - - March 12, 2012. - - - See LICENSE-MIT OR LICENSE for licensing information. - - - Extract a list of mirror urls corresponding to the file type and the location - of the file with respect to the base url. -""" - - -# Help with Python 3 compatibility, where the print statement is a function, an -# implicit relative import is invalid, and the '/' operator performs true -# division. Example: print 'hello world' raises a 'SyntaxError' exception. -from __future__ import ( - absolute_import, - division, - print_function, - unicode_literals, -) - -import os -from typing import BinaryIO, Dict, Optional, TextIO - -import securesystemslib -import six - -import tuf -import tuf.client_rework.download as download -import tuf.formats -from tuf.requests_fetcher import RequestsFetcher - -# The type of file to be downloaded from a repository. The -# 'get_list_of_mirrors' function supports these file types. -_SUPPORTED_FILE_TYPES = ["meta", "target"] - - -class Mirrors: - def __init__( - self, mirrors_dict: Dict, fetcher: Optional["FetcherInterface"] = None - ): - tuf.formats.MIRRORDICT_SCHEMA.check_match(mirrors_dict) - self._config = mirrors_dict - - if fetcher is None: - self._fetcher = RequestsFetcher() - else: - self._fetcher = fetcher - - def _get_list_of_mirrors(self, file_type, file_path): - """ - - Get a list of mirror urls from a mirrors dictionary, provided the type - and the path of the file with respect to the base url. - - - file_type: - Type of data needed for download, must correspond to one of the strings - in the list ['meta', 'target']. 'meta' for metadata file type or - 'target' for target file type. It should correspond to - NAME_SCHEMA format. - - file_path: - A relative path to the file that corresponds to RELPATH_SCHEMA format. - Ex: 'http://url_prefix/targets_path/file_path' - - - securesystemslib.exceptions.Error, on unsupported 'file_type'. - - securesystemslib.exceptions.FormatError, on bad argument. - - - List of mirror urls corresponding to the file_type and file_path. If no - match is found, empty list is returned. - """ - - # Checking if all the arguments have appropriate format. - tuf.formats.RELPATH_SCHEMA.check_match(file_path) - securesystemslib.formats.NAME_SCHEMA.check_match(file_type) - - # Verify 'file_type' is supported. - if file_type not in _SUPPORTED_FILE_TYPES: - raise sslib_exceptions.Error( - "Invalid file_type argument." - " Supported file types: " + repr(_SUPPORTED_FILE_TYPES) - ) - path_key = "metadata_path" if file_type == "meta" else "targets_path" - - list_of_mirrors = [] - for junk, mirror_info in six.iteritems(self._config): - # Does mirror serve this file type at all? - path = mirror_info.get(path_key) - if path is None: - continue - - # for targets, ensure directory confinement - if path_key == "targets_path": - full_filepath = os.path.join(path, file_path) - confined_target_dirs = mirror_info.get("confined_target_dirs") - # confined_target_dirs is optional and can used to confine the client to - # certain paths on a repository mirror when fetching target files. - if confined_target_dirs and not file_in_confined_directories( - full_filepath, confined_target_dirs - ): - continue - - # urllib.quote(string) replaces special characters in string using the %xx - # escape. This is done to avoid parsing issues of the URL on the server - # side. Do *NOT* pass URLs with Unicode characters without first encoding - # the URL as UTF-8. We need a long-term solution with #61. - # http://bugs.python.org/issue1712522 - file_path = six.moves.urllib.parse.quote(file_path) - url = os.path.join(mirror_info["url_prefix"], path, file_path) - - # The above os.path.join() result as well as input file_path may be - # invalid on windows (might contain both separator types), see #1077. - # Make sure the URL doesn't contain backward slashes on Windows. - list_of_mirrors.append(url.replace("\\", "/")) - - return list_of_mirrors - - def meta_download(self, filename: str, upper_length: int) -> TextIO: - """ - Download metadata file from the list of metadata mirrors - """ - file_mirrors = self._get_list_of_mirrors("meta", filename) - - file_mirror_errors = {} - for file_mirror in file_mirrors: - try: - temp_obj = download.download_file( - file_mirror, - upper_length, - self._fetcher, - STRICT_REQUIRED_LENGTH=False, - ) - - temp_obj.seek(0) - yield temp_obj - - except Exception as exception: - file_mirror_errors[file_mirror] = exception - - finally: - if file_mirror_errors: - raise tuf.exceptions.NoWorkingMirrorError( - file_mirror_errors - ) - - def target_download(self, filename: str, strict_length: int) -> BinaryIO: - """ - Download target file from the list of target mirrors - """ - file_mirrors = self._get_list_of_mirrors("target", filename) - - file_mirror_errors = {} - for file_mirror in file_mirrors: - try: - temp_obj = download.download_file( - file_mirror, strict_length, self._fetcher - ) - - temp_obj.seek(0) - yield temp_obj - - except Exception as exception: - file_mirror_errors[file_mirror] = exception - - finally: - if file_mirror_errors: - raise tuf.exceptions.NoWorkingMirrorError( - file_mirror_errors - ) diff --git a/tuf/client_rework/mirrors_download.py b/tuf/client_rework/mirrors_download.py new file mode 100644 index 0000000000..34c767cf69 --- /dev/null +++ b/tuf/client_rework/mirrors_download.py @@ -0,0 +1,417 @@ +#!/usr/bin/env python + +# Copyright 2012 - 2017, New York University and the TUF contributors +# SPDX-License-Identifier: MIT OR Apache-2.0 + +""" + + mirrors.py + + + Konstantin Andrianov. + Derived from original mirrors.py written by Geremy Condra. + + + March 12, 2012. + + + See LICENSE-MIT OR LICENSE for licensing information. + + + Extract a list of mirror urls corresponding to the file type and the location + of the file with respect to the base url. +""" + + +# Help with Python 3 compatibility, where the print statement is a function, an +# implicit relative import is invalid, and the '/' operator performs true +# division. Example: print 'hello world' raises a 'SyntaxError' exception. +from __future__ import ( + absolute_import, + division, + print_function, + unicode_literals, +) + +import logging +import os +import tempfile +import timeit +from typing import BinaryIO, Dict, Optional, TextIO + +import securesystemslib +import six + +import tuf +import tuf.formats +from tuf.requests_fetcher import RequestsFetcher + +# See 'log.py' to learn how logging is handled in TUF. +logger = logging.getLogger(__name__) + +# The type of file to be downloaded from a repository. The +# 'get_list_of_mirrors' function supports these file types. +_SUPPORTED_FILE_TYPES = ["meta", "target"] + + +class Mirrors: + def __init__( + self, mirrors_dict: Dict, fetcher: Optional["FetcherInterface"] = None + ): + tuf.formats.MIRRORDICT_SCHEMA.check_match(mirrors_dict) + self._config = mirrors_dict + + if fetcher is None: + self._fetcher = RequestsFetcher() + else: + self._fetcher = fetcher + + def _get_list_of_mirrors(self, file_type, file_path): + """ + + Get a list of mirror urls from a mirrors dictionary, provided the type + and the path of the file with respect to the base url. + + + file_type: + Type of data needed for download, must correspond to one of the strings + in the list ['meta', 'target']. 'meta' for metadata file type or + 'target' for target file type. It should correspond to + NAME_SCHEMA format. + + file_path: + A relative path to the file that corresponds to RELPATH_SCHEMA format. + Ex: 'http://url_prefix/targets_path/file_path' + + + securesystemslib.exceptions.Error, on unsupported 'file_type'. + + securesystemslib.exceptions.FormatError, on bad argument. + + + List of mirror urls corresponding to the file_type and file_path. If no + match is found, empty list is returned. + """ + + # Checking if all the arguments have appropriate format. + tuf.formats.RELPATH_SCHEMA.check_match(file_path) + securesystemslib.formats.NAME_SCHEMA.check_match(file_type) + + # Verify 'file_type' is supported. + if file_type not in _SUPPORTED_FILE_TYPES: + raise sslib_exceptions.Error( + "Invalid file_type argument." + " Supported file types: " + repr(_SUPPORTED_FILE_TYPES) + ) + path_key = "metadata_path" if file_type == "meta" else "targets_path" + + list_of_mirrors = [] + for junk, mirror_info in six.iteritems(self._config): + # Does mirror serve this file type at all? + path = mirror_info.get(path_key) + if path is None: + continue + + # for targets, ensure directory confinement + if path_key == "targets_path": + full_filepath = os.path.join(path, file_path) + confined_target_dirs = mirror_info.get("confined_target_dirs") + # confined_target_dirs is optional and can used to confine the client to + # certain paths on a repository mirror when fetching target files. + if confined_target_dirs and not file_in_confined_directories( + full_filepath, confined_target_dirs + ): + continue + + # urllib.quote(string) replaces special characters in string using the %xx + # escape. This is done to avoid parsing issues of the URL on the server + # side. Do *NOT* pass URLs with Unicode characters without first encoding + # the URL as UTF-8. We need a long-term solution with #61. + # http://bugs.python.org/issue1712522 + file_path = six.moves.urllib.parse.quote(file_path) + url = os.path.join(mirror_info["url_prefix"], path, file_path) + + # The above os.path.join() result as well as input file_path may be + # invalid on windows (might contain both separator types), see #1077. + # Make sure the URL doesn't contain backward slashes on Windows. + list_of_mirrors.append(url.replace("\\", "/")) + + return list_of_mirrors + + def meta_download(self, filename: str, upper_length: int) -> TextIO: + """ + Download metadata file from the list of metadata mirrors + """ + file_mirrors = self._get_list_of_mirrors("meta", filename) + + file_mirror_errors = {} + for file_mirror in file_mirrors: + try: + temp_obj = self._download_file( + file_mirror, + upper_length, + STRICT_REQUIRED_LENGTH=False, + ) + + temp_obj.seek(0) + yield temp_obj + + except Exception as exception: + file_mirror_errors[file_mirror] = exception + + finally: + if file_mirror_errors: + raise tuf.exceptions.NoWorkingMirrorError( + file_mirror_errors + ) + + def target_download(self, filename: str, strict_length: int) -> BinaryIO: + """ + Download target file from the list of target mirrors + """ + file_mirrors = self._get_list_of_mirrors("target", filename) + + file_mirror_errors = {} + for file_mirror in file_mirrors: + try: + temp_obj = self._download_file(file_mirror, strict_length) + + temp_obj.seek(0) + yield temp_obj + + except Exception as exception: + file_mirror_errors[file_mirror] = exception + + finally: + if file_mirror_errors: + raise tuf.exceptions.NoWorkingMirrorError( + file_mirror_errors + ) + + def _download_file(self, url, required_length, STRICT_REQUIRED_LENGTH=True): + """ + + Given the url and length of the desired file, this function opens a + connection to 'url' and downloads the file while ensuring its length + matches 'required_length' if 'STRICT_REQUIRED_LENGH' is True (If False, + the file's length is not checked and a slow retrieval exception is raised + if the downloaded rate falls below the acceptable rate). + + + url: + A URL string that represents the location of the file. + + required_length: + An integer value representing the length of the file. + + STRICT_REQUIRED_LENGTH: + A Boolean indicator used to signal whether we should perform strict + checking of required_length. True by default. We explicitly set this to + False when we know that we want to turn this off for downloading the + timestamp metadata, which has no signed required_length. + + + A file object is created on disk to store the contents of 'url'. + + + tuf.exceptions.DownloadLengthMismatchError, if there was a + mismatch of observed vs expected lengths while downloading the file. + + securesystemslib.exceptions.FormatError, if any of the arguments are + improperly formatted. + + Any other unforeseen runtime exception. + + + A file object that points to the contents of 'url'. + """ + # Do all of the arguments have the appropriate format? + # Raise 'securesystemslib.exceptions.FormatError' if there is a mismatch. + securesystemslib.formats.URL_SCHEMA.check_match(url) + tuf.formats.LENGTH_SCHEMA.check_match(required_length) + + # 'url.replace('\\', '/')' is needed for compatibility with Windows-based + # systems, because they might use back-slashes in place of forward-slashes. + # This converts it to the common format. unquote() replaces %xx escapes in a + # url with their single-character equivalent. A back-slash may be encoded as + # %5c in the url, which should also be replaced with a forward slash. + url = six.moves.urllib.parse.unquote(url).replace("\\", "/") + logger.info("Downloading: " + repr(url)) + + # This is the temporary file that we will return to contain the contents of + # the downloaded file. + temp_file = tempfile.TemporaryFile() + + average_download_speed = 0 + number_of_bytes_received = 0 + + try: + chunks = self._fetcher.fetch(url, required_length) + start_time = timeit.default_timer() + for chunk in chunks: + + stop_time = timeit.default_timer() + temp_file.write(chunk) + + # Measure the average download speed. + number_of_bytes_received += len(chunk) + seconds_spent_receiving = stop_time - start_time + average_download_speed = ( + number_of_bytes_received / seconds_spent_receiving + ) + + if ( + average_download_speed + < tuf.settings.MIN_AVERAGE_DOWNLOAD_SPEED + ): + logger.debug( + "The average download speed dropped below the minimum" + " average download speed set in tuf.settings.py. Stopping the" + " download!" + ) + break + + else: + logger.debug( + "The average download speed has not dipped below the" + " minimum average download speed set in tuf.settings.py." + ) + + # Does the total number of downloaded bytes match the required length? + self._check_downloaded_length( + number_of_bytes_received, + required_length, + STRICT_REQUIRED_LENGTH=STRICT_REQUIRED_LENGTH, + average_download_speed=average_download_speed, + ) + + except Exception: + # Close 'temp_file'. Any written data is lost. + temp_file.close() + logger.debug("Could not download URL: " + repr(url)) + raise + + else: + return temp_file + + @staticmethod + def _check_downloaded_length( + total_downloaded, + required_length, + STRICT_REQUIRED_LENGTH=True, + average_download_speed=None, + ): + """ + + A helper function which checks whether the total number of downloaded bytes + matches our expectation. + + + total_downloaded: + The total number of bytes supposedly downloaded for the file in question. + + required_length: + The total number of bytes expected of the file as seen from its metadata. + The Timestamp role is always downloaded without a known file length, and + the Root role when the client cannot download any of the required + top-level roles. In both cases, 'required_length' is actually an upper + limit on the length of the downloaded file. + + STRICT_REQUIRED_LENGTH: + A Boolean indicator used to signal whether we should perform strict + checking of required_length. True by default. We explicitly set this to + False when we know that we want to turn this off for downloading the + timestamp metadata, which has no signed required_length. + + average_download_speed: + The average download speed for the downloaded file. + + + None. + + + securesystemslib.exceptions.DownloadLengthMismatchError, if + STRICT_REQUIRED_LENGTH is True and total_downloaded is not equal + required_length. + + tuf.exceptions.SlowRetrievalError, if the total downloaded was + done in less than the acceptable download speed (as set in + tuf.settings.py). + + + None. + """ + + if total_downloaded == required_length: + logger.info( + "Downloaded " + str(total_downloaded) + " bytes out of the" + " expected " + str(required_length) + " bytes." + ) + + else: + difference_in_bytes = abs(total_downloaded - required_length) + + # What we downloaded is not equal to the required length, but did we ask + # for strict checking of required length? + if STRICT_REQUIRED_LENGTH: + logger.info( + "Downloaded " + str(total_downloaded) + " bytes, but" + " expected " + + str(required_length) + + " bytes. There is a difference" + " of " + str(difference_in_bytes) + " bytes." + ) + + # If the average download speed is below a certain threshold, we flag + # this as a possible slow-retrieval attack. + logger.debug( + "Average download speed: " + repr(average_download_speed) + ) + logger.debug( + "Minimum average download speed: " + + repr(tuf.settings.MIN_AVERAGE_DOWNLOAD_SPEED) + ) + + if ( + average_download_speed + < tuf.settings.MIN_AVERAGE_DOWNLOAD_SPEED + ): + raise tuf.exceptions.SlowRetrievalError( + average_download_speed + ) + + else: + logger.debug( + "Good average download speed: " + + repr(average_download_speed) + + " bytes per second" + ) + + raise tuf.exceptions.DownloadLengthMismatchError( + required_length, total_downloaded + ) + + else: + # We specifically disabled strict checking of required length, but we + # will log a warning anyway. This is useful when we wish to download the + # Timestamp or Root metadata, for which we have no signed metadata; so, + # we must guess a reasonable required_length for it. + if ( + average_download_speed + < tuf.settings.MIN_AVERAGE_DOWNLOAD_SPEED + ): + raise tuf.exceptions.SlowRetrievalError( + average_download_speed + ) + + else: + logger.debug( + "Good average download speed: " + + repr(average_download_speed) + + " bytes per second" + ) + + logger.info( + "Downloaded " + str(total_downloaded) + " bytes out of an" + " upper limit of " + str(required_length) + " bytes." + ) diff --git a/tuf/client_rework/updater_rework.py b/tuf/client_rework/updater_rework.py index c2ed0636ea..06550fa3e9 100644 --- a/tuf/client_rework/updater_rework.py +++ b/tuf/client_rework/updater_rework.py @@ -18,7 +18,7 @@ import tuf.exceptions import tuf.formats import tuf.settings -from tuf.client_rework.mirrors import Mirrors +from tuf.client_rework.mirrors_download import Mirrors from .metadata_wrapper import ( RootWrapper, From bf9924b2b189301728a0a9da31bd1c1141208d5b Mon Sep 17 00:00:00 2001 From: Teodora Sechkova Date: Wed, 31 Mar 2021 14:37:50 +0300 Subject: [PATCH 9/9] Reformat fetcher code Apply manually black and isort to fetcher.py and request_fetcher.py Signed-off-by: Teodora Sechkova --- tuf/client_rework/fetcher.py | 43 ++-- tuf/client_rework/requests_fetcher.py | 299 +++++++++++++------------- 2 files changed, 176 insertions(+), 166 deletions(-) diff --git a/tuf/client_rework/fetcher.py b/tuf/client_rework/fetcher.py index 8768bdd4b9..2b6de6f837 100644 --- a/tuf/client_rework/fetcher.py +++ b/tuf/client_rework/fetcher.py @@ -7,32 +7,33 @@ # Imports import abc + # Classes -class FetcherInterface(): - """Defines an interface for abstract network download. +class FetcherInterface: + """Defines an interface for abstract network download. - By providing a concrete implementation of the abstract interface, - users of the framework can plug-in their preferred/customized - network stack. - """ + By providing a concrete implementation of the abstract interface, + users of the framework can plug-in their preferred/customized + network stack. + """ - __metaclass__ = abc.ABCMeta + __metaclass__ = abc.ABCMeta - @abc.abstractmethod - def fetch(self, url, required_length): - """Fetches the contents of HTTP/HTTPS url from a remote server. + @abc.abstractmethod + def fetch(self, url, required_length): + """Fetches the contents of HTTP/HTTPS url from a remote server. - Ensures the length of the downloaded data is up to 'required_length'. + Ensures the length of the downloaded data is up to 'required_length'. - Arguments: - url: A URL string that represents a file location. - required_length: An integer value representing the file length in bytes. + Arguments: + url: A URL string that represents a file location. + required_length: An integer value representing the file length in bytes. - Raises: - tuf.exceptions.SlowRetrievalError: A timeout occurs while receiving data. - tuf.exceptions.FetcherHTTPError: An HTTP error code is received. + Raises: + tuf.exceptions.SlowRetrievalError: A timeout occurs while receiving data. + tuf.exceptions.FetcherHTTPError: An HTTP error code is received. - Returns: - A bytes iterator - """ - raise NotImplementedError # pragma: no cover + Returns: + A bytes iterator + """ + raise NotImplementedError # pragma: no cover diff --git a/tuf/client_rework/requests_fetcher.py b/tuf/client_rework/requests_fetcher.py index 8074890d25..6f5e89ec4e 100644 --- a/tuf/client_rework/requests_fetcher.py +++ b/tuf/client_rework/requests_fetcher.py @@ -5,17 +5,16 @@ library. """ -# Imports -import requests -import six import logging import time +# Imports +import requests +import six import urllib3.exceptions import tuf.exceptions import tuf.settings - from tuf.client_rework.fetcher import FetcherInterface # Globals @@ -23,151 +22,161 @@ # Classess class RequestsFetcher(FetcherInterface): - """A concrete implementation of FetcherInterface based on the Requests + """A concrete implementation of FetcherInterface based on the Requests library. Attributes: _sessions: A dictionary of Requests.Session objects storing a separate session per scheme+hostname combination. - """ - - def __init__(self): - # From http://docs.python-requests.org/en/master/user/advanced/#session-objects: - # - # "The Session object allows you to persist certain parameters across - # requests. It also persists cookies across all requests made from the - # Session instance, and will use urllib3's connection pooling. So if you're - # making several requests to the same host, the underlying TCP connection - # will be reused, which can result in a significant performance increase - # (see HTTP persistent connection)." - # - # NOTE: We use a separate requests.Session per scheme+hostname combination, - # in order to reuse connections to the same hostname to improve efficiency, - # but avoiding sharing state between different hosts-scheme combinations to - # minimize subtle security issues. Some cookies may not be HTTP-safe. - self._sessions = {} - - - def fetch(self, url, required_length): - """Fetches the contents of HTTP/HTTPS url from a remote server. - - Ensures the length of the downloaded data is up to 'required_length'. - - Arguments: - url: A URL string that represents a file location. - required_length: An integer value representing the file length in bytes. - - Raises: - tuf.exceptions.SlowRetrievalError: A timeout occurs while receiving data. - tuf.exceptions.FetcherHTTPError: An HTTP error code is received. - - Returns: - A bytes iterator - """ - # Get a customized session for each new schema+hostname combination. - session = self._get_session(url) - - # Get the requests.Response object for this URL. - # - # Defer downloading the response body with stream=True. - # Always set the timeout. This timeout value is interpreted by requests as: - # - connect timeout (max delay before first byte is received) - # - read (gap) timeout (max delay between bytes received) - response = session.get(url, stream=True, - timeout=tuf.settings.SOCKET_TIMEOUT) - # Check response status. - try: - response.raise_for_status() - except requests.HTTPError as e: - response.close() - status = e.response.status_code - raise tuf.exceptions.FetcherHTTPError(str(e), status) - - - # Define a generator function to be returned by fetch. This way the caller - # of fetch can differentiate between connection and actual data download - # and measure download times accordingly. - def chunks(): - try: - bytes_received = 0 - while True: - # We download a fixed chunk of data in every round. This is so that we - # can defend against slow retrieval attacks. Furthermore, we do not - # wish to download an extremely large file in one shot. - # Before beginning the round, sleep (if set) for a short amount of - # time so that the CPU is not hogged in the while loop. - if tuf.settings.SLEEP_BEFORE_ROUND: - time.sleep(tuf.settings.SLEEP_BEFORE_ROUND) - - read_amount = min( - tuf.settings.CHUNK_SIZE, required_length - bytes_received) - - # NOTE: This may not handle some servers adding a Content-Encoding - # header, which may cause urllib3 to misbehave: - # https://github.com/pypa/pip/blob/404838abcca467648180b358598c597b74d568c9/src/pip/_internal/download.py#L547-L582 - data = response.raw.read(read_amount) - bytes_received += len(data) - - # We might have no more data to read. Check number of bytes downloaded. - if not data: - logger.debug('Downloaded ' + repr(bytes_received) + '/' + - repr(required_length) + ' bytes.') - - # Finally, we signal that the download is complete. - break - - yield data - - if bytes_received >= required_length: - break - - except urllib3.exceptions.ReadTimeoutError as e: - raise tuf.exceptions.SlowRetrievalError(str(e)) - - finally: - response.close() - - return chunks() - - - - def _get_session(self, url): - """Returns a different customized requests.Session per schema+hostname - combination. """ - # Use a different requests.Session per schema+hostname combination, to - # reuse connections while minimizing subtle security issues. - parsed_url = six.moves.urllib.parse.urlparse(url) - - if not parsed_url.scheme or not parsed_url.hostname: - raise tuf.exceptions.URLParsingError( - 'Could not get scheme and hostname from URL: ' + url) - - session_index = parsed_url.scheme + '+' + parsed_url.hostname - - logger.debug('url: ' + url) - logger.debug('session index: ' + session_index) - - session = self._sessions.get(session_index) - - if not session: - session = requests.Session() - self._sessions[session_index] = session - - # Attach some default headers to every Session. - requests_user_agent = session.headers['User-Agent'] - # Follows the RFC: https://tools.ietf.org/html/rfc7231#section-5.5.3 - tuf_user_agent = 'tuf/' + tuf.__version__ + ' ' + requests_user_agent - session.headers.update({ - # Tell the server not to compress or modify anything. - # https://developer.mozilla.org/en-US/docs/Web/HTTP/Headers/Accept-Encoding#Directives - 'Accept-Encoding': 'identity', - # The TUF user agent. - 'User-Agent': tuf_user_agent}) - - logger.debug('Made new session for ' + session_index) - - else: - logger.debug('Reusing session for ' + session_index) - return session + def __init__(self): + # From http://docs.python-requests.org/en/master/user/advanced/#session-objects: + # + # "The Session object allows you to persist certain parameters across + # requests. It also persists cookies across all requests made from the + # Session instance, and will use urllib3's connection pooling. So if you're + # making several requests to the same host, the underlying TCP connection + # will be reused, which can result in a significant performance increase + # (see HTTP persistent connection)." + # + # NOTE: We use a separate requests.Session per scheme+hostname combination, + # in order to reuse connections to the same hostname to improve efficiency, + # but avoiding sharing state between different hosts-scheme combinations to + # minimize subtle security issues. Some cookies may not be HTTP-safe. + self._sessions = {} + + def fetch(self, url, required_length): + """Fetches the contents of HTTP/HTTPS url from a remote server. + + Ensures the length of the downloaded data is up to 'required_length'. + + Arguments: + url: A URL string that represents a file location. + required_length: An integer value representing the file length in bytes. + + Raises: + tuf.exceptions.SlowRetrievalError: A timeout occurs while receiving data. + tuf.exceptions.FetcherHTTPError: An HTTP error code is received. + + Returns: + A bytes iterator + """ + # Get a customized session for each new schema+hostname combination. + session = self._get_session(url) + + # Get the requests.Response object for this URL. + # + # Defer downloading the response body with stream=True. + # Always set the timeout. This timeout value is interpreted by requests as: + # - connect timeout (max delay before first byte is received) + # - read (gap) timeout (max delay between bytes received) + response = session.get( + url, stream=True, timeout=tuf.settings.SOCKET_TIMEOUT + ) + # Check response status. + try: + response.raise_for_status() + except requests.HTTPError as e: + response.close() + status = e.response.status_code + raise tuf.exceptions.FetcherHTTPError(str(e), status) + + # Define a generator function to be returned by fetch. This way the caller + # of fetch can differentiate between connection and actual data download + # and measure download times accordingly. + def chunks(): + try: + bytes_received = 0 + while True: + # We download a fixed chunk of data in every round. This is so that we + # can defend against slow retrieval attacks. Furthermore, we do not + # wish to download an extremely large file in one shot. + # Before beginning the round, sleep (if set) for a short amount of + # time so that the CPU is not hogged in the while loop. + if tuf.settings.SLEEP_BEFORE_ROUND: + time.sleep(tuf.settings.SLEEP_BEFORE_ROUND) + + read_amount = min( + tuf.settings.CHUNK_SIZE, + required_length - bytes_received, + ) + + # NOTE: This may not handle some servers adding a Content-Encoding + # header, which may cause urllib3 to misbehave: + # https://github.com/pypa/pip/blob/404838abcca467648180b358598c597b74d568c9/src/pip/_internal/download.py#L547-L582 + data = response.raw.read(read_amount) + bytes_received += len(data) + + # We might have no more data to read. Check number of bytes downloaded. + if not data: + logger.debug( + "Downloaded " + + repr(bytes_received) + + "/" + + repr(required_length) + + " bytes." + ) + + # Finally, we signal that the download is complete. + break + + yield data + + if bytes_received >= required_length: + break + + except urllib3.exceptions.ReadTimeoutError as e: + raise tuf.exceptions.SlowRetrievalError(str(e)) + + finally: + response.close() + + return chunks() + + def _get_session(self, url): + """Returns a different customized requests.Session per schema+hostname + combination. + """ + # Use a different requests.Session per schema+hostname combination, to + # reuse connections while minimizing subtle security issues. + parsed_url = six.moves.urllib.parse.urlparse(url) + + if not parsed_url.scheme or not parsed_url.hostname: + raise tuf.exceptions.URLParsingError( + "Could not get scheme and hostname from URL: " + url + ) + + session_index = parsed_url.scheme + "+" + parsed_url.hostname + + logger.debug("url: " + url) + logger.debug("session index: " + session_index) + + session = self._sessions.get(session_index) + + if not session: + session = requests.Session() + self._sessions[session_index] = session + + # Attach some default headers to every Session. + requests_user_agent = session.headers["User-Agent"] + # Follows the RFC: https://tools.ietf.org/html/rfc7231#section-5.5.3 + tuf_user_agent = ( + "tuf/" + tuf.__version__ + " " + requests_user_agent + ) + session.headers.update( + { + # Tell the server not to compress or modify anything. + # https://developer.mozilla.org/en-US/docs/Web/HTTP/Headers/Accept-Encoding#Directives + "Accept-Encoding": "identity", + # The TUF user agent. + "User-Agent": tuf_user_agent, + } + ) + + logger.debug("Made new session for " + session_index) + + else: + logger.debug("Reusing session for " + session_index) + + return session