diff --git a/tuf/client_rework/fetcher.py b/tuf/client_rework/fetcher.py new file mode 100644 index 0000000000..2b6de6f837 --- /dev/null +++ b/tuf/client_rework/fetcher.py @@ -0,0 +1,39 @@ +# Copyright 2021, New York University and the TUF contributors +# SPDX-License-Identifier: MIT OR Apache-2.0 + +"""Provides an interface for network IO abstraction. +""" + +# Imports +import abc + + +# Classes +class FetcherInterface: + """Defines an interface for abstract network download. + + By providing a concrete implementation of the abstract interface, + users of the framework can plug-in their preferred/customized + network stack. + """ + + __metaclass__ = abc.ABCMeta + + @abc.abstractmethod + def fetch(self, url, required_length): + """Fetches the contents of HTTP/HTTPS url from a remote server. + + Ensures the length of the downloaded data is up to 'required_length'. + + Arguments: + url: A URL string that represents a file location. + required_length: An integer value representing the file length in bytes. + + Raises: + tuf.exceptions.SlowRetrievalError: A timeout occurs while receiving data. + tuf.exceptions.FetcherHTTPError: An HTTP error code is received. + + Returns: + A bytes iterator + """ + raise NotImplementedError # pragma: no cover diff --git a/tuf/client_rework/mirrors_download.py b/tuf/client_rework/mirrors_download.py new file mode 100644 index 0000000000..34c767cf69 --- /dev/null +++ b/tuf/client_rework/mirrors_download.py @@ -0,0 +1,417 @@ +#!/usr/bin/env python + +# Copyright 2012 - 2017, New York University and the TUF contributors +# SPDX-License-Identifier: MIT OR Apache-2.0 + +""" + + mirrors.py + + + Konstantin Andrianov. + Derived from original mirrors.py written by Geremy Condra. + + + March 12, 2012. + + + See LICENSE-MIT OR LICENSE for licensing information. + + + Extract a list of mirror urls corresponding to the file type and the location + of the file with respect to the base url. +""" + + +# Help with Python 3 compatibility, where the print statement is a function, an +# implicit relative import is invalid, and the '/' operator performs true +# division. Example: print 'hello world' raises a 'SyntaxError' exception. +from __future__ import ( + absolute_import, + division, + print_function, + unicode_literals, +) + +import logging +import os +import tempfile +import timeit +from typing import BinaryIO, Dict, Optional, TextIO + +import securesystemslib +import six + +import tuf +import tuf.formats +from tuf.requests_fetcher import RequestsFetcher + +# See 'log.py' to learn how logging is handled in TUF. +logger = logging.getLogger(__name__) + +# The type of file to be downloaded from a repository. The +# 'get_list_of_mirrors' function supports these file types. +_SUPPORTED_FILE_TYPES = ["meta", "target"] + + +class Mirrors: + def __init__( + self, mirrors_dict: Dict, fetcher: Optional["FetcherInterface"] = None + ): + tuf.formats.MIRRORDICT_SCHEMA.check_match(mirrors_dict) + self._config = mirrors_dict + + if fetcher is None: + self._fetcher = RequestsFetcher() + else: + self._fetcher = fetcher + + def _get_list_of_mirrors(self, file_type, file_path): + """ + + Get a list of mirror urls from a mirrors dictionary, provided the type + and the path of the file with respect to the base url. + + + file_type: + Type of data needed for download, must correspond to one of the strings + in the list ['meta', 'target']. 'meta' for metadata file type or + 'target' for target file type. It should correspond to + NAME_SCHEMA format. + + file_path: + A relative path to the file that corresponds to RELPATH_SCHEMA format. + Ex: 'http://url_prefix/targets_path/file_path' + + + securesystemslib.exceptions.Error, on unsupported 'file_type'. + + securesystemslib.exceptions.FormatError, on bad argument. + + + List of mirror urls corresponding to the file_type and file_path. If no + match is found, empty list is returned. + """ + + # Checking if all the arguments have appropriate format. + tuf.formats.RELPATH_SCHEMA.check_match(file_path) + securesystemslib.formats.NAME_SCHEMA.check_match(file_type) + + # Verify 'file_type' is supported. + if file_type not in _SUPPORTED_FILE_TYPES: + raise sslib_exceptions.Error( + "Invalid file_type argument." + " Supported file types: " + repr(_SUPPORTED_FILE_TYPES) + ) + path_key = "metadata_path" if file_type == "meta" else "targets_path" + + list_of_mirrors = [] + for junk, mirror_info in six.iteritems(self._config): + # Does mirror serve this file type at all? + path = mirror_info.get(path_key) + if path is None: + continue + + # for targets, ensure directory confinement + if path_key == "targets_path": + full_filepath = os.path.join(path, file_path) + confined_target_dirs = mirror_info.get("confined_target_dirs") + # confined_target_dirs is optional and can used to confine the client to + # certain paths on a repository mirror when fetching target files. + if confined_target_dirs and not file_in_confined_directories( + full_filepath, confined_target_dirs + ): + continue + + # urllib.quote(string) replaces special characters in string using the %xx + # escape. This is done to avoid parsing issues of the URL on the server + # side. Do *NOT* pass URLs with Unicode characters without first encoding + # the URL as UTF-8. We need a long-term solution with #61. + # http://bugs.python.org/issue1712522 + file_path = six.moves.urllib.parse.quote(file_path) + url = os.path.join(mirror_info["url_prefix"], path, file_path) + + # The above os.path.join() result as well as input file_path may be + # invalid on windows (might contain both separator types), see #1077. + # Make sure the URL doesn't contain backward slashes on Windows. + list_of_mirrors.append(url.replace("\\", "/")) + + return list_of_mirrors + + def meta_download(self, filename: str, upper_length: int) -> TextIO: + """ + Download metadata file from the list of metadata mirrors + """ + file_mirrors = self._get_list_of_mirrors("meta", filename) + + file_mirror_errors = {} + for file_mirror in file_mirrors: + try: + temp_obj = self._download_file( + file_mirror, + upper_length, + STRICT_REQUIRED_LENGTH=False, + ) + + temp_obj.seek(0) + yield temp_obj + + except Exception as exception: + file_mirror_errors[file_mirror] = exception + + finally: + if file_mirror_errors: + raise tuf.exceptions.NoWorkingMirrorError( + file_mirror_errors + ) + + def target_download(self, filename: str, strict_length: int) -> BinaryIO: + """ + Download target file from the list of target mirrors + """ + file_mirrors = self._get_list_of_mirrors("target", filename) + + file_mirror_errors = {} + for file_mirror in file_mirrors: + try: + temp_obj = self._download_file(file_mirror, strict_length) + + temp_obj.seek(0) + yield temp_obj + + except Exception as exception: + file_mirror_errors[file_mirror] = exception + + finally: + if file_mirror_errors: + raise tuf.exceptions.NoWorkingMirrorError( + file_mirror_errors + ) + + def _download_file(self, url, required_length, STRICT_REQUIRED_LENGTH=True): + """ + + Given the url and length of the desired file, this function opens a + connection to 'url' and downloads the file while ensuring its length + matches 'required_length' if 'STRICT_REQUIRED_LENGH' is True (If False, + the file's length is not checked and a slow retrieval exception is raised + if the downloaded rate falls below the acceptable rate). + + + url: + A URL string that represents the location of the file. + + required_length: + An integer value representing the length of the file. + + STRICT_REQUIRED_LENGTH: + A Boolean indicator used to signal whether we should perform strict + checking of required_length. True by default. We explicitly set this to + False when we know that we want to turn this off for downloading the + timestamp metadata, which has no signed required_length. + + + A file object is created on disk to store the contents of 'url'. + + + tuf.exceptions.DownloadLengthMismatchError, if there was a + mismatch of observed vs expected lengths while downloading the file. + + securesystemslib.exceptions.FormatError, if any of the arguments are + improperly formatted. + + Any other unforeseen runtime exception. + + + A file object that points to the contents of 'url'. + """ + # Do all of the arguments have the appropriate format? + # Raise 'securesystemslib.exceptions.FormatError' if there is a mismatch. + securesystemslib.formats.URL_SCHEMA.check_match(url) + tuf.formats.LENGTH_SCHEMA.check_match(required_length) + + # 'url.replace('\\', '/')' is needed for compatibility with Windows-based + # systems, because they might use back-slashes in place of forward-slashes. + # This converts it to the common format. unquote() replaces %xx escapes in a + # url with their single-character equivalent. A back-slash may be encoded as + # %5c in the url, which should also be replaced with a forward slash. + url = six.moves.urllib.parse.unquote(url).replace("\\", "/") + logger.info("Downloading: " + repr(url)) + + # This is the temporary file that we will return to contain the contents of + # the downloaded file. + temp_file = tempfile.TemporaryFile() + + average_download_speed = 0 + number_of_bytes_received = 0 + + try: + chunks = self._fetcher.fetch(url, required_length) + start_time = timeit.default_timer() + for chunk in chunks: + + stop_time = timeit.default_timer() + temp_file.write(chunk) + + # Measure the average download speed. + number_of_bytes_received += len(chunk) + seconds_spent_receiving = stop_time - start_time + average_download_speed = ( + number_of_bytes_received / seconds_spent_receiving + ) + + if ( + average_download_speed + < tuf.settings.MIN_AVERAGE_DOWNLOAD_SPEED + ): + logger.debug( + "The average download speed dropped below the minimum" + " average download speed set in tuf.settings.py. Stopping the" + " download!" + ) + break + + else: + logger.debug( + "The average download speed has not dipped below the" + " minimum average download speed set in tuf.settings.py." + ) + + # Does the total number of downloaded bytes match the required length? + self._check_downloaded_length( + number_of_bytes_received, + required_length, + STRICT_REQUIRED_LENGTH=STRICT_REQUIRED_LENGTH, + average_download_speed=average_download_speed, + ) + + except Exception: + # Close 'temp_file'. Any written data is lost. + temp_file.close() + logger.debug("Could not download URL: " + repr(url)) + raise + + else: + return temp_file + + @staticmethod + def _check_downloaded_length( + total_downloaded, + required_length, + STRICT_REQUIRED_LENGTH=True, + average_download_speed=None, + ): + """ + + A helper function which checks whether the total number of downloaded bytes + matches our expectation. + + + total_downloaded: + The total number of bytes supposedly downloaded for the file in question. + + required_length: + The total number of bytes expected of the file as seen from its metadata. + The Timestamp role is always downloaded without a known file length, and + the Root role when the client cannot download any of the required + top-level roles. In both cases, 'required_length' is actually an upper + limit on the length of the downloaded file. + + STRICT_REQUIRED_LENGTH: + A Boolean indicator used to signal whether we should perform strict + checking of required_length. True by default. We explicitly set this to + False when we know that we want to turn this off for downloading the + timestamp metadata, which has no signed required_length. + + average_download_speed: + The average download speed for the downloaded file. + + + None. + + + securesystemslib.exceptions.DownloadLengthMismatchError, if + STRICT_REQUIRED_LENGTH is True and total_downloaded is not equal + required_length. + + tuf.exceptions.SlowRetrievalError, if the total downloaded was + done in less than the acceptable download speed (as set in + tuf.settings.py). + + + None. + """ + + if total_downloaded == required_length: + logger.info( + "Downloaded " + str(total_downloaded) + " bytes out of the" + " expected " + str(required_length) + " bytes." + ) + + else: + difference_in_bytes = abs(total_downloaded - required_length) + + # What we downloaded is not equal to the required length, but did we ask + # for strict checking of required length? + if STRICT_REQUIRED_LENGTH: + logger.info( + "Downloaded " + str(total_downloaded) + " bytes, but" + " expected " + + str(required_length) + + " bytes. There is a difference" + " of " + str(difference_in_bytes) + " bytes." + ) + + # If the average download speed is below a certain threshold, we flag + # this as a possible slow-retrieval attack. + logger.debug( + "Average download speed: " + repr(average_download_speed) + ) + logger.debug( + "Minimum average download speed: " + + repr(tuf.settings.MIN_AVERAGE_DOWNLOAD_SPEED) + ) + + if ( + average_download_speed + < tuf.settings.MIN_AVERAGE_DOWNLOAD_SPEED + ): + raise tuf.exceptions.SlowRetrievalError( + average_download_speed + ) + + else: + logger.debug( + "Good average download speed: " + + repr(average_download_speed) + + " bytes per second" + ) + + raise tuf.exceptions.DownloadLengthMismatchError( + required_length, total_downloaded + ) + + else: + # We specifically disabled strict checking of required length, but we + # will log a warning anyway. This is useful when we wish to download the + # Timestamp or Root metadata, for which we have no signed metadata; so, + # we must guess a reasonable required_length for it. + if ( + average_download_speed + < tuf.settings.MIN_AVERAGE_DOWNLOAD_SPEED + ): + raise tuf.exceptions.SlowRetrievalError( + average_download_speed + ) + + else: + logger.debug( + "Good average download speed: " + + repr(average_download_speed) + + " bytes per second" + ) + + logger.info( + "Downloaded " + str(total_downloaded) + " bytes out of an" + " upper limit of " + str(required_length) + " bytes." + ) diff --git a/tuf/client_rework/requests_fetcher.py b/tuf/client_rework/requests_fetcher.py new file mode 100644 index 0000000000..6f5e89ec4e --- /dev/null +++ b/tuf/client_rework/requests_fetcher.py @@ -0,0 +1,182 @@ +# Copyright 2021, New York University and the TUF contributors +# SPDX-License-Identifier: MIT OR Apache-2.0 + +"""Provides an implementation of FetcherInterface using the Requests HTTP + library. +""" + +import logging +import time + +# Imports +import requests +import six +import urllib3.exceptions + +import tuf.exceptions +import tuf.settings +from tuf.client_rework.fetcher import FetcherInterface + +# Globals +logger = logging.getLogger(__name__) + +# Classess +class RequestsFetcher(FetcherInterface): + """A concrete implementation of FetcherInterface based on the Requests + library. + + Attributes: + _sessions: A dictionary of Requests.Session objects storing a separate + session per scheme+hostname combination. + """ + + def __init__(self): + # From http://docs.python-requests.org/en/master/user/advanced/#session-objects: + # + # "The Session object allows you to persist certain parameters across + # requests. It also persists cookies across all requests made from the + # Session instance, and will use urllib3's connection pooling. So if you're + # making several requests to the same host, the underlying TCP connection + # will be reused, which can result in a significant performance increase + # (see HTTP persistent connection)." + # + # NOTE: We use a separate requests.Session per scheme+hostname combination, + # in order to reuse connections to the same hostname to improve efficiency, + # but avoiding sharing state between different hosts-scheme combinations to + # minimize subtle security issues. Some cookies may not be HTTP-safe. + self._sessions = {} + + def fetch(self, url, required_length): + """Fetches the contents of HTTP/HTTPS url from a remote server. + + Ensures the length of the downloaded data is up to 'required_length'. + + Arguments: + url: A URL string that represents a file location. + required_length: An integer value representing the file length in bytes. + + Raises: + tuf.exceptions.SlowRetrievalError: A timeout occurs while receiving data. + tuf.exceptions.FetcherHTTPError: An HTTP error code is received. + + Returns: + A bytes iterator + """ + # Get a customized session for each new schema+hostname combination. + session = self._get_session(url) + + # Get the requests.Response object for this URL. + # + # Defer downloading the response body with stream=True. + # Always set the timeout. This timeout value is interpreted by requests as: + # - connect timeout (max delay before first byte is received) + # - read (gap) timeout (max delay between bytes received) + response = session.get( + url, stream=True, timeout=tuf.settings.SOCKET_TIMEOUT + ) + # Check response status. + try: + response.raise_for_status() + except requests.HTTPError as e: + response.close() + status = e.response.status_code + raise tuf.exceptions.FetcherHTTPError(str(e), status) + + # Define a generator function to be returned by fetch. This way the caller + # of fetch can differentiate between connection and actual data download + # and measure download times accordingly. + def chunks(): + try: + bytes_received = 0 + while True: + # We download a fixed chunk of data in every round. This is so that we + # can defend against slow retrieval attacks. Furthermore, we do not + # wish to download an extremely large file in one shot. + # Before beginning the round, sleep (if set) for a short amount of + # time so that the CPU is not hogged in the while loop. + if tuf.settings.SLEEP_BEFORE_ROUND: + time.sleep(tuf.settings.SLEEP_BEFORE_ROUND) + + read_amount = min( + tuf.settings.CHUNK_SIZE, + required_length - bytes_received, + ) + + # NOTE: This may not handle some servers adding a Content-Encoding + # header, which may cause urllib3 to misbehave: + # https://github.com/pypa/pip/blob/404838abcca467648180b358598c597b74d568c9/src/pip/_internal/download.py#L547-L582 + data = response.raw.read(read_amount) + bytes_received += len(data) + + # We might have no more data to read. Check number of bytes downloaded. + if not data: + logger.debug( + "Downloaded " + + repr(bytes_received) + + "/" + + repr(required_length) + + " bytes." + ) + + # Finally, we signal that the download is complete. + break + + yield data + + if bytes_received >= required_length: + break + + except urllib3.exceptions.ReadTimeoutError as e: + raise tuf.exceptions.SlowRetrievalError(str(e)) + + finally: + response.close() + + return chunks() + + def _get_session(self, url): + """Returns a different customized requests.Session per schema+hostname + combination. + """ + # Use a different requests.Session per schema+hostname combination, to + # reuse connections while minimizing subtle security issues. + parsed_url = six.moves.urllib.parse.urlparse(url) + + if not parsed_url.scheme or not parsed_url.hostname: + raise tuf.exceptions.URLParsingError( + "Could not get scheme and hostname from URL: " + url + ) + + session_index = parsed_url.scheme + "+" + parsed_url.hostname + + logger.debug("url: " + url) + logger.debug("session index: " + session_index) + + session = self._sessions.get(session_index) + + if not session: + session = requests.Session() + self._sessions[session_index] = session + + # Attach some default headers to every Session. + requests_user_agent = session.headers["User-Agent"] + # Follows the RFC: https://tools.ietf.org/html/rfc7231#section-5.5.3 + tuf_user_agent = ( + "tuf/" + tuf.__version__ + " " + requests_user_agent + ) + session.headers.update( + { + # Tell the server not to compress or modify anything. + # https://developer.mozilla.org/en-US/docs/Web/HTTP/Headers/Accept-Encoding#Directives + "Accept-Encoding": "identity", + # The TUF user agent. + "User-Agent": tuf_user_agent, + } + ) + + logger.debug("Made new session for " + session_index) + + else: + logger.debug("Reusing session for " + session_index) + + return session diff --git a/tuf/client_rework/updater_rework.py b/tuf/client_rework/updater_rework.py index 10fdcc415f..06550fa3e9 100644 --- a/tuf/client_rework/updater_rework.py +++ b/tuf/client_rework/updater_rework.py @@ -10,18 +10,15 @@ import fnmatch import logging import os -from typing import BinaryIO, Dict, Optional, TextIO +from typing import Dict, Optional, TextIO import securesystemslib.exceptions import securesystemslib.util -import tuf.download import tuf.exceptions import tuf.formats -import tuf.mirrors import tuf.settings -from tuf.client.fetcher import FetcherInterface -from tuf.requests_fetcher import RequestsFetcher +from tuf.client_rework.mirrors_download import Mirrors from .metadata_wrapper import ( RootWrapper, @@ -54,18 +51,12 @@ def __init__( self, repository_name: str, repository_mirrors: Dict, - fetcher: Optional[FetcherInterface] = None, + fetcher: Optional["FetcherInterface"] = None, ): - self._repository_name = repository_name - self._mirrors = repository_mirrors - self._consistent_snapshot = False - self._metadata = {} - - if fetcher is None: - self._fetcher = RequestsFetcher() - else: - self._fetcher = fetcher + mirrors = Mirrors(repository_mirrors, fetcher) + self._metadata = MetadataUpdater(repository_name, mirrors) + self._target_updater = TargetUpdater(mirrors) def refresh(self) -> None: """ @@ -81,10 +72,7 @@ def refresh(self) -> None: requests. """ - self._load_root() - self._load_timestamp() - self._load_snapshot() - self._load_targets("targets", "root") + self._metadata.refresh() def get_one_valid_targetinfo(self, filename: str) -> Dict: """ @@ -92,10 +80,11 @@ def get_one_valid_targetinfo(self, filename: str) -> Dict: file path. This target method also downloads the metadata of updated targets. """ - return self._preorder_depth_first_walk(filename) + return self._metadata.preorder_depth_first_walk(filename) - @staticmethod - def updated_targets(targets: Dict, destination_directory: str) -> Dict: + def updated_targets( + self, targets: Dict, destination_directory: str + ) -> Dict: """ After the client has retrieved the target information for those targets they are interested in updating, they would call this method to @@ -103,123 +92,45 @@ def updated_targets(targets: Dict, destination_directory: str) -> Dict: All the targets that have changed are returns in a list. From this list, they can request a download by calling 'download_target()'. """ - # Keep track of the target objects and filepaths of updated targets. - # Return 'updated_targets' and use 'updated_targetpaths' to avoid - # duplicates. - updated_targets = [] - updated_targetpaths = [] - - for target in targets: - # Prepend 'destination_directory' to the target's relative filepath - # (as stored in metadata.) Verify the hash of 'target_filepath' - # against each hash listed for its fileinfo. Note: join() discards - # 'destination_directory' if 'filepath' contains a leading path - # separator (i.e., is treated as an absolute path). - filepath = target["filepath"] - target_filepath = os.path.join(destination_directory, filepath) - - if target_filepath in updated_targetpaths: - continue - - # Try one of the algorithm/digest combos for a mismatch. We break - # as soon as we find a mismatch. - for algorithm, digest in target["fileinfo"]["hashes"].items(): - digest_object = None - try: - digest_object = securesystemslib.hash.digest_filename( - target_filepath, algorithm=algorithm - ) - - # This exception will occur if the target does not exist - # locally. - except securesystemslib.exceptions.StorageError: - updated_targets.append(target) - updated_targetpaths.append(target_filepath) - break - - # The file does exist locally, check if its hash differs. - if digest_object.hexdigest() != digest: - updated_targets.append(target) - updated_targetpaths.append(target_filepath) - break - - return updated_targets + return self._target_updater.updated_targets( + targets, destination_directory + ) - def download_target(self, target: Dict, destination_directory: str): + def download_target(self, target: Dict, destination_directory: str) -> None: """ This method performs the actual download of the specified target. The file is saved to the 'destination_directory' argument. """ - try: - for temp_obj in self._mirror_target_download(target): - self._verify_target_file(temp_obj, target) - # break? should we break after first successful download? - - filepath = os.path.join( - destination_directory, target["filepath"] - ) - securesystemslib.util.persist_temp_file(temp_obj, filepath) - # pylint: disable=try-except-raise - except Exception: - # TODO: do something with exceptions - raise - - def _mirror_meta_download(self, filename: str, upper_length: int) -> TextIO: - """ - Download metadata file from the list of metadata mirrors - """ - file_mirrors = tuf.mirrors.get_list_of_mirrors( - "meta", filename, self._mirrors - ) + self._target_updater.download_target(target, destination_directory) - file_mirror_errors = {} - for file_mirror in file_mirrors: - try: - temp_obj = tuf.download.unsafe_download( - file_mirror, upper_length, self._fetcher - ) - temp_obj.seek(0) - yield temp_obj +class MetadataUpdater: + def __init__(self, repository_name: str, mirrors: "Mirrors"): - # pylint: disable=broad-except - except Exception as exception: - file_mirror_errors[file_mirror] = exception + self._repository_name = repository_name + self._mirrors = mirrors - finally: - if file_mirror_errors: - raise tuf.exceptions.NoWorkingMirrorError( - file_mirror_errors - ) + self._metadata = {} - def _mirror_target_download(self, fileinfo: str) -> BinaryIO: - """ - Download target file from the list of target mirrors + def refresh(self) -> None: """ - # full_filename = _get_full_name(filename) - file_mirrors = tuf.mirrors.get_list_of_mirrors( - "target", fileinfo["filepath"], self._mirrors - ) + This method downloads, verifies, and loads metadata for the top-level + roles in a specific order (root -> timestamp -> snapshot -> targets) + The expiration time for downloaded metadata is also verified. - file_mirror_errors = {} - for file_mirror in file_mirrors: - try: - temp_obj = tuf.download.safe_download( - file_mirror, fileinfo["fileinfo"]["length"], self._fetcher - ) + The metadata for delegated roles are not refreshed by this method, but + by the method that returns targetinfo (i.e., + get_one_valid_targetinfo()). - temp_obj.seek(0) - yield temp_obj - # pylint: disable=broad-except - except Exception as exception: - file_mirror_errors[file_mirror] = exception + The refresh() method should be called by the client before any target + requests. + """ - finally: - if file_mirror_errors: - raise tuf.exceptions.NoWorkingMirrorError( - file_mirror_errors - ) + self._load_root() + self._load_timestamp() + self._load_snapshot() + self._load_targets("targets", "root") def _get_full_meta_name( self, role: str, extension: str = ".json", version: int = None @@ -274,7 +185,7 @@ def _load_root(self) -> None: verified_root = None for next_version in range(lower_bound, upper_bound): try: - mirror_download = self._mirror_meta_download( + mirror_download = self._mirrors.meta_download( self._get_relative_meta_name("root", version=next_version), tuf.settings.DEFAULT_ROOT_REQUIRED_LENGTH, ) @@ -288,7 +199,7 @@ def _load_root(self) -> None: except tuf.exceptions.NoWorkingMirrorError as exception: for mirror_error in exception.mirror_errors.values(): - if neither_403_nor_404(mirror_error): + if self.neither_403_nor_404(mirror_error): temp_obj.close() raise @@ -335,9 +246,10 @@ def _load_timestamp(self) -> None: TODO """ # TODO Check if timestamp exists locally - for temp_obj in self._mirror_meta_download( + for temp_obj in self._mirrors.meta_download( "timestamp.json", tuf.settings.DEFAULT_TIMESTAMP_REQUIRED_LENGTH ): + try: verified_tampstamp = self._verify_timestamp(temp_obj) # break? should we break after first successful download? @@ -372,7 +284,8 @@ def _load_snapshot(self) -> None: # Check if exists locally # self.loadLocal('snapshot', snapshotVerifier) - for temp_obj in self._mirror_meta_download("snapshot.json", length): + for temp_obj in self._mirrors.meta_download("snapshot.json", length): + try: verified_snapshot = self._verify_snapshot(temp_obj) # break? should we break after first successful download? @@ -408,9 +321,10 @@ def _load_targets(self, targets_role: str, parent_role: str) -> None: # Check if exists locally # self.loadLocal('snapshot', targetsVerifier) - for temp_obj in self._mirror_meta_download( + for temp_obj in self._mirrors.meta_download( targets_role + ".json", length ): + try: verified_targets = self._verify_targets( temp_obj, targets_role, parent_role @@ -569,16 +483,7 @@ def _verify_targets( return intermediate_targets - @staticmethod - def _verify_target_file(temp_obj: BinaryIO, targetinfo: Dict) -> None: - """ - TODO - """ - - _check_file_length(temp_obj, targetinfo["fileinfo"]["length"]) - _check_hashes(temp_obj, targetinfo["fileinfo"]["hashes"]) - - def _preorder_depth_first_walk(self, target_filepath) -> Dict: + def preorder_depth_first_walk(self, target_filepath) -> Dict: """ TODO """ @@ -636,7 +541,7 @@ def _preorder_depth_first_walk(self, target_filepath) -> Dict: # NOTE: This may be a slow operation if there are many # delegated roles. for child_role in child_roles: - child_role_name = _visit_child_role( + child_role_name = self._visit_child_role( child_role, target_filepath ) @@ -691,121 +596,232 @@ def _preorder_depth_first_walk(self, target_filepath) -> Dict: return {"filepath": target_filepath, "fileinfo": target} + @staticmethod + def _visit_child_role(child_role: Dict, target_filepath: str) -> str: + """ + + Non-public method that determines whether the given 'target_filepath' + is an allowed path of 'child_role'. -def _visit_child_role(child_role: Dict, target_filepath: str) -> str: - """ - - Non-public method that determines whether the given 'target_filepath' - is an allowed path of 'child_role'. - - Ensure that we explore only delegated roles trusted with the target. The - metadata for 'child_role' should have been refreshed prior to this point, - however, the paths/targets that 'child_role' signs for have not been - verified (as intended). The paths/targets that 'child_role' is allowed - to specify in its metadata depends on the delegating role, and thus is - left to the caller to verify. We verify here that 'target_filepath' - is an allowed path according to the delegated 'child_role'. - - TODO: Should the TUF spec restrict the repository to one particular - algorithm? Should we allow the repository to specify in the role - dictionary the algorithm used for these generated hashed paths? - - - child_role: - The delegation targets role object of 'child_role', containing its - paths, path_hash_prefixes, keys, and so on. - - target_filepath: - The path to the target file on the repository. This will be relative to - the 'targets' (or equivalent) directory on a given mirror. - - - None. - - - None. - - - If 'child_role' has been delegated the target with the name - 'target_filepath', then we return the role name of 'child_role'. - - Otherwise, we return None. - """ + Ensure that we explore only delegated roles trusted with the target. The + metadata for 'child_role' should have been refreshed prior to this point, + however, the paths/targets that 'child_role' signs for have not been + verified (as intended). The paths/targets that 'child_role' is allowed + to specify in its metadata depends on the delegating role, and thus is + left to the caller to verify. We verify here that 'target_filepath' + is an allowed path according to the delegated 'child_role'. - child_role_name = child_role["name"] - child_role_paths = child_role.get("paths") - child_role_path_hash_prefixes = child_role.get("path_hash_prefixes") + TODO: Should the TUF spec restrict the repository to one particular + algorithm? Should we allow the repository to specify in the role + dictionary the algorithm used for these generated hashed paths? - if child_role_path_hash_prefixes is not None: - target_filepath_hash = _get_target_hash(target_filepath) - for child_role_path_hash_prefix in child_role_path_hash_prefixes: - if not target_filepath_hash.startswith(child_role_path_hash_prefix): - continue + + child_role: + The delegation targets role object of 'child_role', containing its + paths, path_hash_prefixes, keys, and so on. + + target_filepath: + The path to the target file on the repository. This will be relative to + the 'targets' (or equivalent) directory on a given mirror. + + + None. + + + None. + + + If 'child_role' has been delegated the target with the name + 'target_filepath', then we return the role name of 'child_role'. + + Otherwise, we return None. + """ + + child_role_name = child_role["name"] + child_role_paths = child_role.get("paths") + child_role_path_hash_prefixes = child_role.get("path_hash_prefixes") + + if child_role_path_hash_prefixes is not None: + target_filepath_hash = self._get_target_hash(target_filepath) + for child_role_path_hash_prefix in child_role_path_hash_prefixes: + if not target_filepath_hash.startswith( + child_role_path_hash_prefix + ): + continue + + return child_role_name + + elif child_role_paths is not None: + # Is 'child_role_name' allowed to sign for 'target_filepath'? + for child_role_path in child_role_paths: + # A child role path may be an explicit path or glob pattern (Unix + # shell-style wildcards). The child role 'child_role_name' is + # returned if 'target_filepath' is equal to or matches + # 'child_role_path'. Explicit filepaths are also considered + # matches. A repo maintainer might delegate a glob pattern with a + # leading path separator, while the client requests a matching + # target without a leading path separator - make sure to strip any + # leading path separators so that a match is made. + # Example: "foo.tgz" should match with "/*.tgz". + if fnmatch.fnmatch( + target_filepath.lstrip(os.sep), + child_role_path.lstrip(os.sep), + ): + logger.debug( + "Child role " + + repr(child_role_name) + + " is allowed to sign for " + + repr(target_filepath) + ) + + return child_role_name - return child_role_name - - elif child_role_paths is not None: - # Is 'child_role_name' allowed to sign for 'target_filepath'? - for child_role_path in child_role_paths: - # A child role path may be an explicit path or glob pattern (Unix - # shell-style wildcards). The child role 'child_role_name' is - # returned if 'target_filepath' is equal to or matches - # 'child_role_path'. Explicit filepaths are also considered - # matches. A repo maintainer might delegate a glob pattern with a - # leading path separator, while the client requests a matching - # target without a leading path separator - make sure to strip any - # leading path separators so that a match is made. - # Example: "foo.tgz" should match with "/*.tgz". - if fnmatch.fnmatch( - target_filepath.lstrip(os.sep), child_role_path.lstrip(os.sep) - ): logger.debug( - "Child role " - + repr(child_role_name) - + " is allowed to sign for " + "The given target path " + repr(target_filepath) + + " does not match the trusted path or glob pattern: " + + repr(child_role_path) ) + continue - return child_role_name - - logger.debug( - "The given target path " - + repr(target_filepath) - + " does not match the trusted path or glob pattern: " - + repr(child_role_path) + else: + # 'role_name' should have been validated when it was downloaded. + # The 'paths' or 'path_hash_prefixes' fields should not be missing, + # so we raise a format error here in case they are both missing. + raise tuf.exceptions.FormatError( + repr(child_role_name) + " " + 'has neither a "paths" nor "path_hash_prefixes". At least' + " one of these attributes must be present." ) - continue - - else: - # 'role_name' should have been validated when it was downloaded. - # The 'paths' or 'path_hash_prefixes' fields should not be missing, - # so we raise a format error here in case they are both missing. - raise tuf.exceptions.FormatError( - repr(child_role_name) + " " - 'has neither a "paths" nor "path_hash_prefixes". At least' - " one of these attributes must be present." - ) - return None + return None + @staticmethod + def _get_target_hash(target_filepath, hash_function="sha256"): + """ + TODO + """ + # Calculate the hash of the filepath to determine which bin to find the + # target. The client currently assumes the repository (i.e., repository + # tool) uses 'hash_function' to generate hashes and UTF-8. + digest_object = securesystemslib.hash.digest(hash_function) + encoded_target_filepath = target_filepath.encode("utf-8") + digest_object.update(encoded_target_filepath) + target_filepath_hash = digest_object.hexdigest() -def _check_file_length(file_object, trusted_file_length): - """ - TODO - """ - file_object.seek(0, 2) - observed_length = file_object.tell() - - # Return and log a message if the length 'file_object' is equal to - # 'trusted_file_length', otherwise raise an exception. A hard check - # ensures that a downloaded file strictly matches a known, or trusted, - # file length. - if observed_length != trusted_file_length: - raise tuf.exceptions.DownloadLengthMismatchError( - trusted_file_length, observed_length - ) + return target_filepath_hash + + @staticmethod + def neither_403_nor_404(mirror_error): + """ + TODO + """ + if isinstance(mirror_error, tuf.exceptions.FetcherHTTPError): + if mirror_error.status_code in {403, 404}: + return False + return True +class TargetUpdater: + def __init__(self, mirrors: "Mirrors"): + + self._mirrors = mirrors + + @staticmethod + def updated_targets(targets: Dict, destination_directory: str) -> Dict: + """ + After the client has retrieved the target information for those targets + they are interested in updating, they would call this method to + determine which targets have changed from those saved locally on disk. + All the targets that have changed are returns in a list. From this + list, they can request a download by calling 'download_target()'. + """ + # Keep track of the target objects and filepaths of updated targets. + # Return 'updated_targets' and use 'updated_targetpaths' to avoid + # duplicates. + updated_targets = [] + updated_targetpaths = [] + + for target in targets: + # Prepend 'destination_directory' to the target's relative filepath + # (as stored in metadata.) Verify the hash of 'target_filepath' + # against each hash listed for its fileinfo. Note: join() discards + # 'destination_directory' if 'filepath' contains a leading path + # separator (i.e., is treated as an absolute path). + filepath = target["filepath"] + target_filepath = os.path.join(destination_directory, filepath) + + if target_filepath in updated_targetpaths: + continue + + # Try one of the algorithm/digest combos for a mismatch. We break + # as soon as we find a mismatch. + for algorithm, digest in target["fileinfo"]["hashes"].items(): + digest_object = None + try: + digest_object = securesystemslib.hash.digest_filename( + target_filepath, algorithm=algorithm + ) + + # This exception will occur if the target does not exist + # locally. + except securesystemslib.exceptions.StorageError: + updated_targets.append(target) + updated_targetpaths.append(target_filepath) + break + + # The file does exist locally, check if its hash differs. + if digest_object.hexdigest() != digest: + updated_targets.append(target) + updated_targetpaths.append(target_filepath) + break + + return updated_targets + + def download_target(self, target: Dict, destination_directory: str): + """ + This method performs the actual download of the specified target. + The file is saved to the 'destination_directory' argument. + """ + + for temp_obj in self._mirrors.target_download( + target["filepath"], target["fileinfo"]["length"] + ): + + try: + self._check_file_length(temp_obj, target["fileinfo"]["length"]) + _check_hashes(temp_obj, target["fileinfo"]["hashes"]) + # break? should we break after first successful download? + + filepath = os.path.join( + destination_directory, target["filepath"] + ) + securesystemslib.util.persist_temp_file(temp_obj, filepath) + # pylint: disable=try-except-raise + except Exception: + # TODO: do something with exceptions + raise + + @staticmethod + def _check_file_length(file_object, trusted_file_length): + """ + TODO + """ + file_object.seek(0, 2) + observed_length = file_object.tell() + + # Return and log a message if the length 'file_object' is equal to + # 'trusted_file_length', otherwise raise an exception. A hard check + # ensures that a downloaded file strictly matches a known, or trusted, + # file length. + if observed_length != trusted_file_length: + raise tuf.exceptions.DownloadLengthMismatchError( + trusted_file_length, observed_length + ) + + +# FIXME: _check_hashes is moved outside the classes so that it can be reused. +# Find a proper class design to avoid this. def _check_hashes(file_object, trusted_hashes): """ TODO @@ -830,28 +846,3 @@ def _check_hashes(file_object, trusted_hashes): logger.info( "The file's " + algorithm + " hash is" " correct: " + trusted_hash ) - - -def _get_target_hash(target_filepath, hash_function="sha256"): - """ - TODO - """ - # Calculate the hash of the filepath to determine which bin to find the - # target. The client currently assumes the repository (i.e., repository - # tool) uses 'hash_function' to generate hashes and UTF-8. - digest_object = securesystemslib.hash.digest(hash_function) - encoded_target_filepath = target_filepath.encode("utf-8") - digest_object.update(encoded_target_filepath) - target_filepath_hash = digest_object.hexdigest() - - return target_filepath_hash - - -def neither_403_nor_404(mirror_error): - """ - TODO - """ - if isinstance(mirror_error, tuf.exceptions.FetcherHTTPError): - if mirror_error.status_code in {403, 404}: - return False - return True