diff --git a/tests/repository_data/repository/metadata/role1-snapshot.json b/tests/repository_data/repository/metadata/role1-snapshot.json new file mode 100644 index 0000000000..355e607efb --- /dev/null +++ b/tests/repository_data/repository/metadata/role1-snapshot.json @@ -0,0 +1,14 @@ +{ + "leaf_contents": { + "name": "role1", + "version": 1 + }, + "merkle_path": { + "0": "3bd2912d01accd816767dcde96a2b470dc5bb51cefe3b3aeb3aca7fdc1704d6b", + "1": "70304860310d2c6f0a05f2ccbfb49a4a6d6d3c7a9ff9c93e0b91b2e0ab7fff97" + }, + "path_directions": { + "0": -1, + "1": -1 + } +} \ No newline at end of file diff --git a/tests/repository_data/repository/metadata/role2-snapshot.json b/tests/repository_data/repository/metadata/role2-snapshot.json new file mode 100644 index 0000000000..cc932dcac9 --- /dev/null +++ b/tests/repository_data/repository/metadata/role2-snapshot.json @@ -0,0 +1,14 @@ +{ + "leaf_contents": { + "name": "role2", + "version": 1 + }, + "merkle_path": { + "0": "9a8cf4b3e3cf611d339867f295792c3105d3d8ebfcd559607f9528ba7511e52a", + "1": "70304860310d2c6f0a05f2ccbfb49a4a6d6d3c7a9ff9c93e0b91b2e0ab7fff97" + }, + "path_directions": { + "0": 1, + "1": -1 + } +} \ No newline at end of file diff --git a/tests/repository_data/repository/metadata/targets-snapshot.json b/tests/repository_data/repository/metadata/targets-snapshot.json new file mode 100644 index 0000000000..070ef81738 --- /dev/null +++ b/tests/repository_data/repository/metadata/targets-snapshot.json @@ -0,0 +1,12 @@ +{ + "leaf_contents": { + "name": "targets", + "version": 1 + }, + "merkle_path": { + "0": "30e11c75a8fa88fd36cc2a4796c5c9f405c9ae52b7adf4180d1c351141e5037a" + }, + "path_directions": { + "0": 1 + } +} \ No newline at end of file diff --git a/tests/repository_data/repository/metadata/timestamp-merkle.json b/tests/repository_data/repository/metadata/timestamp-merkle.json new file mode 100644 index 0000000000..219b036199 --- /dev/null +++ b/tests/repository_data/repository/metadata/timestamp-merkle.json @@ -0,0 +1,25 @@ +{ + "signatures": [ + { + "keyid": "8a1c4a3ac2d515dec982ba9910c5fd79b91ae57f625b9cff25d06bf0a61c1758", + "sig": "1790a53390ab9928ba5c46e7a30a4e0348976e26f34d8cdd29ee11d644276dfc72e3fff6d1a7a913a42a1443cda12a738a3e4803818e970446a91e0e99f24601" + } + ], + "signed": { + "_type": "timestamp", + "expires": "2030-01-01T00:00:00Z", + "merkle_root": "76eb3066cb278633fda18fa6e3ae33d783ff154e813e2752eb7bc8b65568a41b", + "meta": { + "snapshot.json": { + "hashes": { + "sha256": "8f88e2ba48b412c3843e9bb26e1b6f8fc9e98aceb0fbaa97ba37b4c98717d7ab", + "sha512": "fe9ed4b709776cc24e877babc76928cd119c18a806f432650ef6a5c687b0b5411df3c7fb3b69eda1163db83e1ae24ee3e22c9152e548b04f0a0884ee65310a95" + }, + "length": 515, + "version": 1 + } + }, + "spec_version": "1.0.0", + "version": 1 + } +} \ No newline at end of file diff --git a/tests/test_auditor.py b/tests/test_auditor.py new file mode 100644 index 0000000000..7419abe0b4 --- /dev/null +++ b/tests/test_auditor.py @@ -0,0 +1,317 @@ +#!/usr/bin/env python + +""" + + test_auditor.py + + + Marina Moore + + + January 29, 2021 + + + See LICENSE-MIT OR LICENSE for licensing information. + + + 'test-auditor.py' provides a collection of methods that test the public / + non-public methods and functions of 'tuf.client.auditor.py'. + +""" + +import unittest +import tempfile +import os +import logging +import shutil + +import tuf +import tuf.exceptions +import tuf.log +import tuf.keydb +import tuf.roledb +import tuf.repository_tool as repo_tool +import tuf.repository_lib as repo_lib +import tuf.unittest_toolbox as unittest_toolbox +import tuf.client.auditor as auditor + +from tests import utils + +import securesystemslib + +logger = logging.getLogger(__name__) +repo_tool.disable_console_log_messages() + + +class TestAuditor(unittest_toolbox.Modified_TestCase): + + @classmethod + def setUpClass(cls): + # setUpClass is called before tests in an individual class are executed. + + # Create a temporary directory to store the repository, metadata, and target + # files. 'temporary_directory' must be deleted in TearDownModule() so that + # temporary files are always removed, even when exceptions occur. + cls.temporary_directory = tempfile.mkdtemp(dir=os.getcwd()) + + # Needed because in some tests simple_server.py cannot be found. + # The reason is that the current working directory + # has been changed when executing a subprocess. + cls.SIMPLE_SERVER_PATH = os.path.join(os.getcwd(), 'simple_server.py') + + # Launch a SimpleHTTPServer (serves files in the current directory). + # Test cases will request metadata and target files that have been + # pre-generated in 'tuf/tests/repository_data', which will be served + # by the SimpleHTTPServer launched here. The test cases of 'test_updater.py' + # assume the pre-generated metadata files have a specific structure, such + # as a delegated role 'targets/role1', three target files, five key files, + # etc. + cls.server_process_handler = utils.TestServerProcess(log=logger, + server=cls.SIMPLE_SERVER_PATH) + + + @classmethod + def tearDownClass(cls): + # Cleans the resources and flush the logged lines (if any). + cls.server_process_handler.clean() + + # Remove the temporary repository directory, which should contain all the + # metadata, targets, and key files generated for the test cases + shutil.rmtree(cls.temporary_directory) + + + def setUp(self): + # We are inheriting from custom class. + unittest_toolbox.Modified_TestCase.setUp(self) + + tuf.roledb.clear_roledb(clear_all=True) + tuf.keydb.clear_keydb(clear_all=True) + + self.repository_name = 'test_repository1' + + # Copy the original repository files provided in the test folder so that + # any modifications made to repository files are restricted to the copies. + # The 'repository_data' directory is expected to exist in 'tuf.tests/'. + original_repository_files = os.path.join(os.getcwd(), 'repository_data') + temporary_repository_root = \ + self.make_temp_directory(directory=self.temporary_directory) + + # The original repository, keystore, and client directories will be copied + # for each test case. + original_repository = os.path.join(original_repository_files, 'repository') + original_keystore = os.path.join(original_repository_files, 'keystore') + original_client = os.path.join(original_repository_files, 'client') + + # Save references to the often-needed client repository directories. + # Test cases need these references to access metadata and target files. + self.repository_directory = \ + os.path.join(temporary_repository_root, 'repository') + self.keystore_directory = \ + os.path.join(temporary_repository_root, 'keystore') + + self.client_directory = os.path.join(temporary_repository_root, + 'client') + self.client_metadata = os.path.join(self.client_directory, + self.repository_name, 'metadata') + self.client_metadata_current = os.path.join(self.client_metadata, + 'current') + self.client_metadata_previous = os.path.join(self.client_metadata, + 'previous') + + # Copy the original 'repository', 'client', and 'keystore' directories + # to the temporary repository the test cases can use. + shutil.copytree(original_repository, self.repository_directory) + shutil.copytree(original_client, self.client_directory) + shutil.copytree(original_keystore, self.keystore_directory) + + # 'path/to/tmp/repository' -> 'localhost:8001/tmp/repository'. + repository_basepath = self.repository_directory[len(os.getcwd()):] + url_prefix = 'http://localhost:' \ + + str(self.server_process_handler.port) + repository_basepath + + # Setting 'tuf.settings.repository_directory' with the temporary client + # directory copied from the original repository files. + tuf.settings.repositories_directory = self.client_directory + + # replace timestamp with a merkle timestamp + merkle_timestamp = os.path.join(self.repository_directory, 'metadata', 'timestamp-merkle.json') + timestamp = os.path.join(self.repository_directory, 'metadata', 'timestamp.json') + shutil.move(merkle_timestamp, timestamp) + + # Metadata role keys are needed by the test cases to make changes to the + # repository (e.g., adding a new target file to 'targets.json' and then + # requesting a refresh()). + self.role_keys = _load_role_keys(self.keystore_directory) + + # The repository must be rewritten with 'consistent_snapshot' set. + repository = repo_tool.load_repository(self.repository_directory) + + # Write metadata for all the top-level roles , since consistent snapshot + # is now being set to true (i.e., the pre-generated repository isn't set + # to support consistent snapshots. A new version of targets.json is needed + # to ensure .filename target files are written to disk. + repository.targets.load_signing_key(self.role_keys['targets']['private']) + repository.root.load_signing_key(self.role_keys['root']['private']) + repository.snapshot.load_signing_key(self.role_keys['snapshot']['private']) + repository.timestamp.load_signing_key(self.role_keys['timestamp']['private']) + + repository.mark_dirty(['targets', 'root', 'snapshot', 'timestamp']) + repository.writeall(snapshot_merkle=True, consistent_snapshot=True) + + # Move the staged metadata to the "live" metadata. + shutil.rmtree(os.path.join(self.repository_directory, 'metadata')) + shutil.copytree(os.path.join(self.repository_directory, 'metadata.staged'), + os.path.join(self.repository_directory, 'metadata')) + + self.repository_mirrors = {'mirror1': {'url_prefix': url_prefix, + 'metadata_path': 'metadata', + 'targets_path': 'targets'}} + + + + + def tearDown(self): + # We are inheriting from custom class. + unittest_toolbox.Modified_TestCase.tearDown(self) + tuf.roledb.clear_roledb(clear_all=True) + tuf.keydb.clear_keydb(clear_all=True) + + # Logs stdout and stderr from the sever subprocess. + self.server_process_handler.flush_log() + + + # UNIT TESTS. + + def test_1__init_exceptions(self): + # Invalid arguments + self.assertRaises(securesystemslib.exceptions.FormatError, auditor.Auditor, + 5, self.repository_mirrors) + self.assertRaises(securesystemslib.exceptions.FormatError, auditor.Auditor, + self.repository_name, 5) + + + + def test_2__verify_merkle_tree(self): + repository_auditor = auditor.Auditor(self.repository_name, self.repository_mirrors) + # skip version 1 as it was written without consistent snapshots + repository_auditor.last_version_verified = 1 + + # The repository must be rewritten with 'consistent_snapshot' set. + repository = repo_tool.load_repository(self.repository_directory) + + # Write metadata for all the top-level roles , since consistent snapshot + # is now being set to true (i.e., the pre-generated repository isn't set + # to support consistent snapshots. A new version of targets.json is needed + # to ensure .filename target files are written to disk. + repository.targets.load_signing_key(self.role_keys['targets']['private']) + repository.root.load_signing_key(self.role_keys['root']['private']) + repository.snapshot.load_signing_key(self.role_keys['snapshot']['private']) + repository.timestamp.load_signing_key(self.role_keys['timestamp']['private']) + + repository.targets.add_target('file1.txt') + + repository.mark_dirty(['targets', 'root', 'snapshot', 'timestamp']) + repository.writeall(snapshot_merkle=True, consistent_snapshot=True) + + # Move the staged metadata to the "live" metadata. + shutil.rmtree(os.path.join(self.repository_directory, 'metadata')) + shutil.copytree(os.path.join(self.repository_directory, 'metadata.staged'), + os.path.join(self.repository_directory, 'metadata')) + + + # Normal case, should not error + repository_auditor.verify() + + self.assertEqual(repository_auditor.version_info['role1.json'], 1) + self.assertEqual(repository_auditor.version_info['targets.json'], 3) + self.assertEqual(repository_auditor.last_version_verified, 3) + + # modify targets + repository.targets.add_target('file2.txt') + + repository.targets.load_signing_key(self.role_keys['targets']['private']) + repository.root.load_signing_key(self.role_keys['root']['private']) + repository.snapshot.load_signing_key(self.role_keys['snapshot']['private']) + repository.timestamp.load_signing_key(self.role_keys['timestamp']['private']) + + + repository.mark_dirty(['targets', 'root', 'snapshot', 'timestamp']) + repository.writeall(snapshot_merkle=True, consistent_snapshot=True) + + # Move the staged metadata to the "live" metadata. + shutil.rmtree(os.path.join(self.repository_directory, 'metadata')) + shutil.copytree(os.path.join(self.repository_directory, 'metadata.staged'), + os.path.join(self.repository_directory, 'metadata')) + + repository_auditor.verify() + + # Ensure the auditor checked the latest targets + self.assertEqual(repository_auditor.version_info['targets.json'], 4) + + # Test rollback attack detection + repository_auditor.version_info['targets.json'] = 5 + repository_auditor.last_version_verified = 3 + + self.assertRaises(tuf.exceptions.RepositoryError, repository_auditor.verify) + + + + +def _load_role_keys(keystore_directory): + + # Populating 'self.role_keys' by importing the required public and private + # keys of 'tuf/tests/repository_data/'. The role keys are needed when + # modifying the remote repository used by the test cases in this unit test. + + # The pre-generated key files in 'repository_data/keystore' are all encrypted with + # a 'password' passphrase. + EXPECTED_KEYFILE_PASSWORD = 'password' + + # Store and return the cryptography keys of the top-level roles, including 1 + # delegated role. + role_keys = {} + + root_key_file = os.path.join(keystore_directory, 'root_key') + targets_key_file = os.path.join(keystore_directory, 'targets_key') + snapshot_key_file = os.path.join(keystore_directory, 'snapshot_key') + timestamp_key_file = os.path.join(keystore_directory, 'timestamp_key') + delegation_key_file = os.path.join(keystore_directory, 'delegation_key') + + role_keys = {'root': {}, 'targets': {}, 'snapshot': {}, 'timestamp': {}, + 'role1': {}} + + # Import the top-level and delegated role public keys. + role_keys['root']['public'] = \ + repo_tool.import_rsa_publickey_from_file(root_key_file+'.pub') + role_keys['targets']['public'] = \ + repo_tool.import_ed25519_publickey_from_file(targets_key_file+'.pub') + role_keys['snapshot']['public'] = \ + repo_tool.import_ed25519_publickey_from_file(snapshot_key_file+'.pub') + role_keys['timestamp']['public'] = \ + repo_tool.import_ed25519_publickey_from_file(timestamp_key_file+'.pub') + role_keys['role1']['public'] = \ + repo_tool.import_ed25519_publickey_from_file(delegation_key_file+'.pub') + + # Import the private keys of the top-level and delegated roles. + role_keys['root']['private'] = \ + repo_tool.import_rsa_privatekey_from_file(root_key_file, + EXPECTED_KEYFILE_PASSWORD) + role_keys['targets']['private'] = \ + repo_tool.import_ed25519_privatekey_from_file(targets_key_file, + EXPECTED_KEYFILE_PASSWORD) + role_keys['snapshot']['private'] = \ + repo_tool.import_ed25519_privatekey_from_file(snapshot_key_file, + EXPECTED_KEYFILE_PASSWORD) + role_keys['timestamp']['private'] = \ + repo_tool.import_ed25519_privatekey_from_file(timestamp_key_file, + EXPECTED_KEYFILE_PASSWORD) + role_keys['role1']['private'] = \ + repo_tool.import_ed25519_privatekey_from_file(delegation_key_file, + EXPECTED_KEYFILE_PASSWORD) + + return role_keys + + + +if __name__ == '__main__': + unittest.main() diff --git a/tests/test_repository_lib.py b/tests/test_repository_lib.py index 36d1826a2e..ba612f33bd 100755 --- a/tests/test_repository_lib.py +++ b/tests/test_repository_lib.py @@ -467,6 +467,66 @@ def test_generate_targets_metadata(self): False, use_existing_fileinfo=True) + def test_build_merkle_tree(self): + temporary_directory = tempfile.mkdtemp(dir=self.temporary_directory) + storage_backend = securesystemslib.storage.FilesystemBackend() + version = 1 + + # Test building the tree one node at a time to verify the hashes + + test_nodes = {} + test_nodes['file1'] = tuf.formats.make_metadata_fileinfo(5, None, None) + + root_1, leaves = repo_lib._build_merkle_tree(test_nodes) + repo_lib._write_merkle_paths(root_1, leaves, storage_backend, + temporary_directory, version) + + file_path = os.path.join(temporary_directory, 'file1-snapshot.json') + self.assertTrue(os.path.exists(file_path)) + + file_path = os.path.join(temporary_directory, '1.file1-snapshot.json') + self.assertTrue(os.path.exists(file_path)) + + test_nodes['file2'] = tuf.formats.make_metadata_fileinfo(5, None, None) + root_2, leaves = repo_lib._build_merkle_tree(test_nodes) + + self.assertEqual(root_2.left.digest, root_1.digest) + + test_nodes['file3'] = tuf.formats.make_metadata_fileinfo(5, None, None) + test_nodes['file4'] = tuf.formats.make_metadata_fileinfo(5, None, None) + + root_3, leaves = repo_lib._build_merkle_tree(test_nodes) + + self.assertEqual(root_3.left.digest, root_2.digest) + + test_nodes['file5'] = tuf.formats.make_metadata_fileinfo(5, None, None) + + root_4, leaves = repo_lib._build_merkle_tree(test_nodes) + + repo_lib._write_merkle_paths(root_4, leaves, storage_backend, + temporary_directory, version + 1) + + self.assertEqual(root_4.left.digest, root_3.digest) + + # Ensure that the paths are written to the directory + file_path = os.path.join(temporary_directory, 'file1-snapshot.json') + self.assertTrue(os.path.exists(file_path)) + + file_path = os.path.join(temporary_directory, '2.file1-snapshot.json') + self.assertTrue(os.path.exists(file_path)) + + # repo_lib.print_merkle_tree(root_4) + + test_nodes = {} + test_nodes['targets'] = tuf.formats.make_metadata_fileinfo(1, None, None) + test_nodes['role1'] = tuf.formats.make_metadata_fileinfo(1, None, None) + test_nodes['role2'] = tuf.formats.make_metadata_fileinfo(1, None, None) + + root, leaves = repo_lib._build_merkle_tree(test_nodes) + + + + def _setup_generate_snapshot_metadata_test(self): # Test normal case. temporary_directory = tempfile.mkdtemp(dir=self.temporary_directory) @@ -512,7 +572,7 @@ def test_generate_snapshot_metadata(self): repo_lib.generate_snapshot_metadata(metadata_directory, version, expiration_date, storage_backend, - consistent_snapshot=False) + consistent_snapshot=False)[0] self.assertTrue(tuf.formats.SNAPSHOT_SCHEMA.matches(snapshot_metadata)) @@ -541,7 +601,7 @@ def test_generate_snapshot_metadata_with_length(self): expiration_date, storage_backend, consistent_snapshot=False, - use_length=True) + use_length=True)[0] self.assertTrue(tuf.formats.SNAPSHOT_SCHEMA.matches(snapshot_metadata)) metadata_files_info_dict = snapshot_metadata['meta'] @@ -556,7 +616,8 @@ def test_generate_snapshot_metadata_with_length(self): # In the repository, the file "role_file.xml" have been added to make # sure that non-json files aren't loaded. This file should be filtered. if stripped_filename.endswith('.json'): - if stripped_filename not in TOP_LEVEL_METADATA_FILES: + if stripped_filename not in TOP_LEVEL_METADATA_FILES and \ + not stripped_filename.endswith('-snapshot.json'): # Check that length is not calculated but hashes is self.assertIn('length', metadata_files_info_dict[stripped_filename]) self.assertNotIn('hashes', metadata_files_info_dict[stripped_filename]) @@ -572,7 +633,7 @@ def test_generate_snapshot_metadata_with_hashes(self): expiration_date, storage_backend, consistent_snapshot=False, - use_hashes=True) + use_hashes=True)[0] self.assertTrue(tuf.formats.SNAPSHOT_SCHEMA.matches(snapshot_metadata)) metadata_files_info_dict = snapshot_metadata['meta'] @@ -587,7 +648,8 @@ def test_generate_snapshot_metadata_with_hashes(self): # In the repository, the file "role_file.xml" have been added to make # sure that non-json files aren't loaded. This file should be filtered. if stripped_filename.endswith('.json'): - if stripped_filename not in TOP_LEVEL_METADATA_FILES: + if stripped_filename not in TOP_LEVEL_METADATA_FILES and \ + not stripped_filename.endswith('-snapshot.json'): # Check that hashes is not calculated but length is self.assertNotIn('length', metadata_files_info_dict[stripped_filename]) self.assertIn('hashes', metadata_files_info_dict[stripped_filename]) @@ -604,7 +666,7 @@ def test_generate_snapshot_metadata_with_hashes_and_length(self): storage_backend, consistent_snapshot=False, use_length=True, - use_hashes=True) + use_hashes=True)[0] self.assertTrue(tuf.formats.SNAPSHOT_SCHEMA.matches(snapshot_metadata)) metadata_files_info_dict = snapshot_metadata['meta'] @@ -619,7 +681,8 @@ def test_generate_snapshot_metadata_with_hashes_and_length(self): # In the repository, the file "role_file.xml" have been added to make # sure that non-json files aren't loaded. This file should be filtered. if stripped_filename.endswith('.json'): - if stripped_filename not in TOP_LEVEL_METADATA_FILES: + if stripped_filename not in TOP_LEVEL_METADATA_FILES and \ + not stripped_filename.endswith('-snapshot.json'): # Check that both length and hashes are not are not calculated self.assertIn('length', metadata_files_info_dict[stripped_filename]) self.assertIn('hashes', metadata_files_info_dict[stripped_filename]) @@ -940,6 +1003,7 @@ def test__generate_and_write_metadata(self): + def test__delete_obsolete_metadata(self): repository_name = 'test_repository' temporary_directory = tempfile.mkdtemp(dir=self.temporary_directory) diff --git a/tests/test_repository_tool.py b/tests/test_repository_tool.py index 0fac025a31..937e916aa0 100755 --- a/tests/test_repository_tool.py +++ b/tests/test_repository_tool.py @@ -255,6 +255,21 @@ def test_writeall(self): # Verify that status() does not raise an exception. repository.status() + # Test writeall with generating a snapshot merkle tree + repository.mark_dirty(['role1', 'targets', 'root', 'snapshot', 'timestamp']) + repository.writeall(snapshot_merkle=True) + + # Were the merkle snapshots written? + targets_snapshot_filepath = os.path.join(metadata_directory, + 'targets-snapshot.json') + targets_snapshot = securesystemslib.util.load_json_file(targets_snapshot_filepath) + tuf.formats.SNAPSHOT_MERKLE_SCHEMA.check_match(targets_snapshot) + + # Does timestamp have the root hash? + timestamp_filepath = os.path.join(metadata_directory, 'timestamp.json') + timestamp = securesystemslib.util.load_json_file(timestamp_filepath) + timestamp['signed']['merkle_root'] + # Verify that status() does not raise # 'tuf.exceptions.InsufficientKeysError' if a top-level role # does not contain a threshold of keys. @@ -496,7 +511,9 @@ def test_get_filepaths_in_directory(self): # Construct list of file paths expected, determining absolute paths. expected_files = [] for filepath in ['1.root.json', 'root.json', 'targets.json', - 'snapshot.json', 'timestamp.json', 'role1.json', 'role2.json']: + 'snapshot.json', 'timestamp.json', 'role1.json', 'role2.json', + 'targets-snapshot.json', 'timestamp-merkle.json', + 'role1-snapshot.json', 'role2-snapshot.json']: expected_files.append(os.path.abspath(os.path.join( 'repository_data', 'repository', 'metadata', filepath))) diff --git a/tests/test_updater.py b/tests/test_updater.py index 69c67044ea..ce0c870352 100755 --- a/tests/test_updater.py +++ b/tests/test_updater.py @@ -833,7 +833,7 @@ def test_3__get_metadata_file(self): upperbound_filelength = tuf.settings.DEFAULT_TIMESTAMP_REQUIRED_LENGTH try: self.repository_updater._get_metadata_file('timestamp', 'timestamp.json', - upperbound_filelength, 1) + upperbound_filelength, 1, self.repository_updater.signable_verification) except tuf.exceptions.NoWorkingMirrorError as e: # Note that this test provides a piece of metadata which would fail to @@ -1779,6 +1779,35 @@ def test_13__targets_of_role(self): + def test_snapshot_merkle(self): + # replace timestamp with a merkle timestamp and create the updater + merkle_timestamp = os.path.join(self.repository_directory, 'metadata', 'timestamp-merkle.json') + timestamp = os.path.join(self.repository_directory, 'metadata', 'timestamp.json') + + shutil.move(merkle_timestamp, timestamp) + + repository_updater = updater.Updater(self.repository_name, + self.repository_mirrors) + repository_updater.refresh() + + # Test verify merkle path + snapshot_info = repository_updater.verify_merkle_path('targets') + self.assertEqual(snapshot_info['version'], 1) + + snapshot_info = repository_updater.verify_merkle_path('role1') + self.assertEqual(snapshot_info['version'], 1) + + # verify merkle path with invalid role + self.assertRaises(tuf.exceptions.NoWorkingMirrorError, + repository_updater.verify_merkle_path, 'foo') + + # Test get_one_valid_targetinfo with snapshot merkle + repository_updater.get_one_valid_targetinfo('file1.txt') + + + + + class TestMultiRepoUpdater(unittest_toolbox.Modified_TestCase): def setUp(self): diff --git a/tuf/client/auditor.py b/tuf/client/auditor.py new file mode 100644 index 0000000000..992015fb15 --- /dev/null +++ b/tuf/client/auditor.py @@ -0,0 +1,141 @@ +#!/usr/bin/env python + +# Copyright 2012 - 2017, New York University and the TUF contributors +# SPDX-License-Identifier: MIT OR Apache-2.0 + +""" + + auditor.py + + + Marina Moore + + January 28, 2021 + + See LICENSE-MIT OR LICENSE for licensing information. + + 'auditor.py' provides an implementation of an auditor for + snapshot merkle metadata. + +""" + +import tuf +import tuf.download +import tuf.formats +import tuf.client.updater + +import securesystemslib.hash + + + +class Auditor(object): + """ + + Provide a class that downloads and verifies snapshot merkle metadata + from a repository. + + + repository_name: + Name of the repository to be audited + + repository_mirrors: + Dictionary holding repository mirror information, conformant to + `tuf.formats.MIRRORDICT_SCHEMA`. + + + securesystemslib.exceptions.FormatError: + If the arguments are improperly formatted. + + + None. + + + None. + """ + + def __init__(self, repository_name, repository_mirrors): + securesystemslib.formats.NAME_SCHEMA.check_match(repository_name) + tuf.formats.MIRRORDICT_SCHEMA.check_match(repository_mirrors) + + self.repository_name = repository_name + self.mirrors = repository_mirrors + + # Create a dictionary to store current version information + # for all targets metadata + self.version_info = {} + + # Keep track of the last timestamp version number checked + self.last_version_verified = 0 + + # Updater will be used to update top-level metadata + self.updater = tuf.client.updater.Updater(repository_name, repository_mirrors) + + + def verify(self): + # download most recent top-level metadata, determine current timestamp key + self.updater.refresh() + + cur_timestamp_keys = self.updater.metadata['current']['root']['roles']['timestamp']['keyids'] + + # Download all trees since last_version_verified that use cur_timestamp_key + + next_version = self.last_version_verified + 1 + version_exists = True + + while(version_exists): + verification_fn = self.updater.signable_verification + + # Attempt to download this version of timestamp. If it does not exist, + # break out of the loop + timestamp = self.updater.download_metadata_version_if_exists("timestamp", + next_version, verification_fn, + tuf.settings.DEFAULT_TIMESTAMP_REQUIRED_LENGTH) + + if not timestamp: + version_exists = False + break + + + # Compare with the current timestamp keys. We only verify any trees + # that use the current keys for fast forward attack recovery + # Check if there are the same number of keys, and that the keyids match + # TODO: Should the auditor also verify older trees? + if len(timestamp['signatures']) != len(cur_timestamp_keys): + break + + for s in timestamp['signatures']: + if s['keyid'] not in cur_timestamp_keys: + break + + merkle_root = timestamp['signed']['merkle_root'] + + # Download and verify Merkle trees + + # First, download snapshot to get a list of nodes + snapshot = self.updater.download_metadata_version_if_exists("snapshot", + next_version, verification_fn, + tuf.settings.DEFAULT_SNAPSHOT_REQUIRED_LENGTH) + + for metadata_filename in snapshot['signed']['meta']: + # Download the node and verify its path + versioninfo = self.updater.verify_merkle_path( + metadata_filename[:-len('.json')], next_version, merkle_root) + + # Have we seen this metadata file before? + # If yes, compare the version info + if metadata_filename in self.version_info: + if self.version_info[metadata_filename] > versioninfo['version']: + raise tuf.exceptions.RepositoryError('Rollback attack detected' + + 'for ' + metadata_filename + '. Version ' + + str(versioninfo['version']) + ' is less than ' + + str(self.version_info[metadata_filename])) + + # Update `version_info` with the latest seen version + self.version_info[metadata_filename] = versioninfo['version'] + + + self.last_version_verified = next_version + next_version = next_version + 1 + + + diff --git a/tuf/client/updater.py b/tuf/client/updater.py index 9ada0974e2..3d31136a62 100755 --- a/tuf/client/updater.py +++ b/tuf/client/updater.py @@ -1081,8 +1081,12 @@ def refresh(self, unsafely_update_root_if_necessary=True): # require strict checks on its required length. self._update_metadata('timestamp', DEFAULT_TIMESTAMP_UPPERLENGTH) - self._update_metadata_if_changed('snapshot', - referenced_metadata='timestamp') + if 'merkle_root' not in self.metadata['current']['timestamp']: + # If merkle root is set, do not update snapshot metadata. Instead, + # we will download the relevant merkle path later when downloading + # a target. + self._update_metadata_if_changed('snapshot', + referenced_metadata='timestamp') self._update_metadata_if_changed('targets') @@ -1460,9 +1464,9 @@ def _verify_metadata_file(self, metadata_file_object, - def _get_metadata_file(self, metadata_role, remote_filename, - upperbound_filelength, expected_version): + upperbound_filelength, expected_version, + verification_fn): """ Non-public method that tries downloading, up to a certain length, a @@ -1485,6 +1489,11 @@ def _get_metadata_file(self, metadata_role, remote_filename, The expected and required version number of the 'metadata_role' file downloaded. 'expected_version' is an integer. + snapshot_merkle: + Is the metadata file a snapshot merkle file? Snapshot merkle files + are not signed and so should skip some of the verification steps here. + Instead, they must be verified using verify_merkle_path. + tuf.exceptions.NoWorkingMirrorError: The metadata could not be fetched. This is raised only when all known @@ -1512,83 +1521,9 @@ def _get_metadata_file(self, metadata_role, remote_filename, upperbound_filelength) file_object.seek(0) - # Verify 'file_object' according to the callable function. - # 'file_object' is also verified if decompressed above (i.e., the - # uncompressed version). - metadata_signable = \ - securesystemslib.util.load_json_string(file_object.read().decode('utf-8')) - - # Determine if the specification version number is supported. It is - # assumed that "spec_version" is in (major.minor.fix) format, (for - # example: "1.4.3") and that releases with the same major version - # number maintain backwards compatibility. Consequently, if the major - # version number of new metadata equals our expected major version - # number, the new metadata is safe to parse. - try: - metadata_spec_version = metadata_signable['signed']['spec_version'] - metadata_spec_version_split = metadata_spec_version.split('.') - metadata_spec_major_version = int(metadata_spec_version_split[0]) - metadata_spec_minor_version = int(metadata_spec_version_split[1]) - - code_spec_version_split = tuf.SPECIFICATION_VERSION.split('.') - code_spec_major_version = int(code_spec_version_split[0]) - code_spec_minor_version = int(code_spec_version_split[1]) - - if metadata_spec_major_version != code_spec_major_version: - raise tuf.exceptions.UnsupportedSpecificationError( - 'Downloaded metadata that specifies an unsupported ' - 'spec_version. This code supports major version number: ' + - repr(code_spec_major_version) + '; however, the obtained ' - 'metadata lists version number: ' + str(metadata_spec_version)) - - #report to user if minor versions do not match, continue with update - if metadata_spec_minor_version != code_spec_minor_version: - logger.info("Downloaded metadata that specifies a different minor " + - "spec_version. This code has version " + - str(tuf.SPECIFICATION_VERSION) + - " and the metadata lists version number " + - str(metadata_spec_version) + - ". The update will continue as the major versions match.") - - except (ValueError, TypeError) as error: - six.raise_from(securesystemslib.exceptions.FormatError('Improperly' - ' formatted spec_version, which must be in major.minor.fix format'), - error) - - # If the version number is unspecified, ensure that the version number - # downloaded is greater than the currently trusted version number for - # 'metadata_role'. - version_downloaded = metadata_signable['signed']['version'] - - if expected_version is not None: - # Verify that the downloaded version matches the version expected by - # the caller. - if version_downloaded != expected_version: - raise tuf.exceptions.BadVersionNumberError('Downloaded' - ' version number: ' + repr(version_downloaded) + '. Version' - ' number MUST be: ' + repr(expected_version)) - - # The caller does not know which version to download. Verify that the - # downloaded version is at least greater than the one locally - # available. - else: - # Verify that the version number of the locally stored - # 'timestamp.json', if available, is less than what was downloaded. - # Otherwise, accept the new timestamp with version number - # 'version_downloaded'. - - try: - current_version = \ - self.metadata['current'][metadata_role]['version'] - - if version_downloaded < current_version: - raise tuf.exceptions.ReplayedMetadataError(metadata_role, - version_downloaded, current_version) - - except KeyError: - logger.info(metadata_role + ' not available locally.') - - self._verify_metadata_file(file_object, metadata_role) + # Verify the file object using the provided function, if any + if verification_fn is not None: + verification_fn(metadata_role, file_object, expected_version) except Exception as exception: # Remember the error from this mirror, and "reset" the target file. @@ -1612,6 +1547,206 @@ def _get_metadata_file(self, metadata_role, remote_filename, + def signable_verification(self, metadata_role, file_object, expected_version): + # Verify 'file_object' according to the callable function. + # 'file_object' is also verified if decompressed above (i.e., the + # uncompressed version). + metadata_signable = \ + securesystemslib.util.load_json_string(file_object.read().decode('utf-8')) + + # Determine if the specification version number is supported. It is + # assumed that "spec_version" is in (major.minor.fix) format, (for + # example: "1.4.3") and that releases with the same major version + # number maintain backwards compatibility. Consequently, if the major + # version number of new metadata equals our expected major version + # number, the new metadata is safe to parse. + try: + metadata_spec_version = metadata_signable['signed']['spec_version'] + metadata_spec_version_split = metadata_spec_version.split('.') + metadata_spec_major_version = int(metadata_spec_version_split[0]) + metadata_spec_minor_version = int(metadata_spec_version_split[1]) + + code_spec_version_split = tuf.SPECIFICATION_VERSION.split('.') + code_spec_major_version = int(code_spec_version_split[0]) + code_spec_minor_version = int(code_spec_version_split[1]) + + if metadata_spec_major_version != code_spec_major_version: + raise tuf.exceptions.UnsupportedSpecificationError( + 'Downloaded metadata that specifies an unsupported ' + 'spec_version. This code supports major version number: ' + + repr(code_spec_major_version) + '; however, the obtained ' + 'metadata lists version number: ' + str(metadata_spec_version)) + + #report to user if minor versions do not match, continue with update + if metadata_spec_minor_version != code_spec_minor_version: + logger.info("Downloaded metadata that specifies a different minor " + + "spec_version. This code has version " + + str(tuf.SPECIFICATION_VERSION) + + " and the metadata lists version number " + + str(metadata_spec_version) + + ". The update will continue as the major versions match.") + + except (ValueError, TypeError) as error: + six.raise_from(securesystemslib.exceptions.FormatError('Improperly' + ' formatted spec_version, which must be in major.minor.fix format'), + error) + + # If the version number is unspecified, ensure that the version number + # downloaded is greater than the currently trusted version number for + # 'metadata_role'. + version_downloaded = metadata_signable['signed']['version'] + + if expected_version is not None: + # Verify that the downloaded version matches the version expected by + # the caller. + if version_downloaded != expected_version: + raise tuf.exceptions.BadVersionNumberError('Downloaded' + ' version number: ' + repr(version_downloaded) + '. Version' + ' number MUST be: ' + repr(expected_version)) + + # The caller does not know which version to download. Verify that the + # downloaded version is at least greater than the one locally + # available. + else: + # Verify that the version number of the locally stored + # 'timestamp.json', if available, is less than what was downloaded. + # Otherwise, accept the new timestamp with version number + # 'version_downloaded'. + + try: + current_version = \ + self.metadata['current'][metadata_role]['version'] + + if version_downloaded < current_version: + raise tuf.exceptions.ReplayedMetadataError(metadata_role, + version_downloaded, current_version) + + except KeyError: + logger.info(metadata_role + ' not available locally.') + + self._verify_metadata_file(file_object, metadata_role) + + + + + + + def _update_merkle_metadata(self, merkle_filename, upperbound_filelength, + version=None): + """ + + Non-public method that downloads, verifies, and 'installs' the merkle + metadata belonging to 'merkle_filename'. Calling this method implies + that the 'merkle_filename' on the repository is newer than the client's, + and thus needs to be re-downloaded. The current and previous metadata + stores are updated if the newly downloaded metadata is successfully + downloaded and verified. This method also assumes that the store of + top-level metadata is the latest and exists. + + + merkle_filename: + The name of the metadata. This is a merkle tree file and should + not end in '.json'. Examples: 'role1-merkle', 'targets-merkle' + + upperbound_filelength: + The expected length, or upper bound, of the metadata file to be + downloaded. + + version: + The expected and required version number of the 'merkle_filename' file + downloaded. 'version' is an integer. + + + tuf.exceptions.NoWorkingMirrorError: + The metadata cannot be updated. This is not specific to a single + failure but rather indicates that all possible ways to update the + metadata have been tried and failed. + + + The metadata file belonging to 'merkle_filenaem' is downloaded from a + repository mirror. If the metadata is valid, it is stored in the + metadata store. + + + None. + """ + + # Construct the metadata filename as expected by the download/mirror + # modules. + metadata_filename = merkle_filename + '.json' + + # Attempt a file download from each mirror until the file is downloaded and + # verified. If the signature of the downloaded file is valid, proceed, + # otherwise log a warning and try the next mirror. 'metadata_file_object' + # is the file-like object returned by 'download.py'. 'metadata_signable' + # is the object extracted from 'metadata_file_object'. Metadata saved to + # files are regarded as 'signable' objects, conformant to + # 'tuf.formats.SIGNABLE_SCHEMA'. + # + # Some metadata (presently timestamp) will be downloaded "unsafely", in the + # sense that we can only estimate its true length and know nothing about + # its version. This is because not all metadata will have other metadata + # for it; otherwise we will have an infinite regress of metadata signing + # for each other. In this case, we will download the metadata up to the + # best length we can get for it, not request a specific version, but + # perform the rest of the checks (e.g., signature verification). + + remote_filename = metadata_filename + filename_version = '' + + if self.consistent_snapshot and version: + filename_version = version + dirname, basename = os.path.split(remote_filename) + remote_filename = os.path.join( + dirname, str(filename_version) + '.' + basename) + + verification_fn = None + + metadata_file_object = \ + self._get_metadata_file(merkle_filename, remote_filename, + upperbound_filelength, version, verification_fn) + + # The metadata has been verified. Move the metadata file into place. + # First, move the 'current' metadata file to the 'previous' directory + # if it exists. + current_filepath = os.path.join(self.metadata_directory['current'], + metadata_filename) + current_filepath = os.path.abspath(current_filepath) + securesystemslib.util.ensure_parent_dir(current_filepath) + + previous_filepath = os.path.join(self.metadata_directory['previous'], + metadata_filename) + previous_filepath = os.path.abspath(previous_filepath) + + if os.path.exists(current_filepath): + # Previous metadata might not exist, say when delegations are added. + securesystemslib.util.ensure_parent_dir(previous_filepath) + shutil.move(current_filepath, previous_filepath) + + # Next, move the verified updated metadata file to the 'current' directory. + metadata_file_object.seek(0) + updated_metadata_object = \ + securesystemslib.util.load_json_string(metadata_file_object.read().decode('utf-8')) + + securesystemslib.util.persist_temp_file(metadata_file_object, current_filepath) + + # Extract the metadata object so we can store it to the metadata store. + # 'current_metadata_object' set to 'None' if there is not an object + # stored for 'merkle_filename'. + current_metadata_object = self.metadata['current'].get(merkle_filename) + + # Finally, update the metadata and fileinfo stores, and rebuild the + # key and role info for the top-level roles if 'merkle_filename' is root. + # Rebuilding the key and role info is required if the newly-installed + # root metadata has revoked keys or updated any top-level role information. + logger.debug('Updated ' + repr(current_filepath) + '.') + self.metadata['previous'][merkle_filename] = current_metadata_object + self.metadata['current'][merkle_filename] = updated_metadata_object + + + + + def _update_metadata(self, metadata_role, upperbound_filelength, version=None): """ @@ -1681,9 +1816,11 @@ def _update_metadata(self, metadata_role, upperbound_filelength, version=None): remote_filename = os.path.join( dirname, str(filename_version) + '.' + basename) + verification_fn = self.signable_verification + metadata_file_object = \ self._get_metadata_file(metadata_role, remote_filename, - upperbound_filelength, version) + upperbound_filelength, version, verification_fn) # The metadata has been verified. Move the metadata file into place. # First, move the 'current' metadata file to the 'previous' directory @@ -1728,6 +1865,97 @@ def _update_metadata(self, metadata_role, upperbound_filelength, version=None): + def verify_merkle_path(self, metadata_role, version=None, merkle_root=None): + """ + + Download the merkle path associated with metadata_role and verify the hashes. + + metadata_role: + The name of the metadata role. This should not include a file extension. + + tuf.exceptions.RepositoryError: + If the snapshot merkle file is invalid or the verification fails + + A dictionary containing the snapshot information about metadata role, + conforming to VERSIONINFO_SCHEMA or METADATA_FILEINFO_SCHEMA + """ + if not merkle_root: + merkle_root = self.metadata['current']['timestamp']['merkle_root'] + + metadata_rolename = metadata_role + '-snapshot' + + # Download Merkle path + upperbound_filelength = tuf.settings.MERKLE_FILELENGTH + self._update_merkle_metadata(metadata_rolename, upperbound_filelength, version) + metadata_directory = self.metadata_directory['current'] + metadata_filename = metadata_rolename + '.json' + metadata_filepath = os.path.join(metadata_directory, metadata_filename) + + # Ensure the metadata path is valid/exists, else ignore the call. + if not os.path.exists(metadata_filepath): + # No merkle path found + raise tuf.exceptions.RepositoryError('No snapshot merkle file for ' + + metadata_role) + try: + snapshot_merkle = securesystemslib.util.load_json_file( + metadata_filepath) + + # Although the metadata file may exist locally, it may not + # be a valid json file. On the next refresh cycle, it will be + # updated as required. If Root if cannot be loaded from disk + # successfully, an exception should be raised by the caller. + except securesystemslib.exceptions.Error: + return + + # verify the Merkle path + tuf.formats.SNAPSHOT_MERKLE_SCHEMA.check_match(snapshot_merkle) + + # hash the contents to determine the leaf hash in the merkle tree + contents = snapshot_merkle['leaf_contents'] + json_contents = securesystemslib.formats.encode_canonical(contents) + digest_object = securesystemslib.hash.digest() + digest_object.update((json_contents).encode('utf-8')) + node_hash = digest_object.hexdigest() + + # For each hash in the merkle_path, determine if the current node is + # a left of a right node using the path_directions, then combine + # the hash from merkle_path with the current node_hash to determine + # the next node_hash. At the end, the node_hash should match the hash + # in merkle_root + merkle_path = snapshot_merkle['merkle_path'] + path_directions = snapshot_merkle['path_directions'] + + # If merkle_path and path_directions have different lengths, + # the verification will not be possible + if len(merkle_path) != len(path_directions): + raise tuf.exceptions.RepositoryError('Invalid merkle path for ' + + metadata_role) + + for index in range(len(merkle_path)): + i = str(index) + if path_directions[i] < 0: + # The current node is a left node + digest_object = securesystemslib.hash.digest() + digest_object.update((node_hash + merkle_path[i]).encode('utf-8')) + else: + # The current node is a right node + digest_object = securesystemslib.hash.digest() + digest_object.update((merkle_path[i] + node_hash).encode('utf-8')) + node_hash = digest_object.hexdigest() + + # Does the result match the merkle root? + if node_hash != merkle_root: + raise tuf.exceptions.RepositoryError('The merkle root ' + merkle_root + + ' does not match the hash ' + node_hash + ' for ' + metadata_role) + + # return the verified snapshot contents + return contents + + + + + + def _update_metadata_if_changed(self, metadata_role, referenced_metadata='snapshot'): """ @@ -1797,7 +2025,9 @@ def _update_metadata_if_changed(self, metadata_role, # Ensure the referenced metadata has been loaded. The 'root' role may be # updated without having 'snapshot' available. - if referenced_metadata not in self.metadata['current']: + # When snapshot merkle trees are used, there will not be a snapshot file. + # Instead, if the snapshot merkle file is missing, this will error below. + if 'merkle_root' not in self.metadata['current']['timestamp'] and referenced_metadata not in self.metadata['current']: raise tuf.exceptions.RepositoryError('Cannot update' ' ' + repr(metadata_role) + ' because ' + referenced_metadata + ' is' ' missing.') @@ -1809,12 +2039,18 @@ def _update_metadata_if_changed(self, metadata_role, repr(referenced_metadata)+ '. ' + repr(metadata_role) + ' may be updated.') - # Simply return if the metadata for 'metadata_role' has not been updated, - # according to the uncompressed metadata provided by the referenced - # metadata. The metadata is considered updated if its version number is - # strictly greater than its currently trusted version number. - expected_versioninfo = self.metadata['current'][referenced_metadata] \ - ['meta'][metadata_filename] + if 'merkle_root' in self.metadata['current']['timestamp']: + # Download version information from merkle tree + contents = self.verify_merkle_path(metadata_role) + expected_versioninfo = contents + + else: + # Simply return if the metadata for 'metadata_role' has not been updated, + # according to the uncompressed metadata provided by the referenced + # metadata. The metadata is considered updated if its version number is + # strictly greater than its currently trusted version number. + expected_versioninfo = self.metadata['current'][referenced_metadata] \ + ['meta'][metadata_filename] if not self._versioninfo_has_been_updated(metadata_filename, expected_versioninfo): @@ -2385,7 +2621,10 @@ def _refresh_targets_metadata(self, rolename='targets', roles_to_update = [] - if rolename + '.json' in self.metadata['current']['snapshot']['meta']: + # Add the role if it is listed in snapshot. If snapshot merkle + # trees are used, the snapshot check will be done later when + # the merkle tree is verified + if 'merkle_root' in self.metadata['current']['timestamp'] or rolename + '.json' in self.metadata['current']['snapshot']['meta']: roles_to_update.append(rolename) if refresh_all_delegated_roles: @@ -3181,3 +3420,42 @@ def download_target(self, target, destination_directory, trusted_hashes, prefix_filename_with_hash) securesystemslib.util.persist_temp_file(target_file_object, destination) + + def download_metadata_version_if_exists(self, role_name, version, verification_fn, upperbound_filelength): + + filename = role_name + ".json" + dirname, basename = os.path.split(filename) + remote_filename = os.path.join(dirname, str(version) + '.' + basename) + + + def neither_403_nor_404(mirror_error): + if isinstance(mirror_error, requests.exceptions.HTTPError): + if mirror_error.response.status_code in {403, 404}: + return False + return True + + updated_metadata_object = None + + try: + # Thoroughly verify it. + metadata_file_object = \ + self._get_metadata_file(role_name, remote_filename, + upperbound_filelength, version, verification_fn) + metadata_file_object.seek(0) + updated_metadata_object = \ + securesystemslib.util.load_json_string(metadata_file_object.read().decode('utf-8')) + # When we run into HTTP 403/404 error from ALL mirrors, + # metadata file is most likely missing. + except tuf.exceptions.NoWorkingMirrorError as exception: + for mirror_error in exception.mirror_errors.values(): + # Otherwise, reraise the error, because it is not a simple HTTP + # error. + if neither_403_nor_404(mirror_error): + logger.exception('Misc error for root version '+str(version)) + raise + else: + # Calling this function should give us a detailed stack trace + # including an HTTP error code, if any. + logger.exception('HTTP error for root version '+str(version)) + + return updated_metadata_object diff --git a/tuf/formats.py b/tuf/formats.py index dc51ba9c98..6ea9623bfe 100755 --- a/tuf/formats.py +++ b/tuf/formats.py @@ -366,6 +366,12 @@ targets = FILEDICT_SCHEMA, delegations = SCHEMA.Optional(DELEGATIONS_SCHEMA)) +SNAPSHOT_MERKLE_SCHEMA = SCHEMA.Object( + leaf_contents = SCHEMA.OneOf([VERSIONINFO_SCHEMA, + METADATA_FILEINFO_SCHEMA]), + merkle_path = SCHEMA.DictOf(key_schema=SCHEMA.AnyString(), value_schema=HASH_SCHEMA), + path_directions = SCHEMA.DictOf(key_schema=SCHEMA.AnyString(), value_schema=SCHEMA.Integer())) + # Snapshot role: indicates the latest versions of all metadata (except # timestamp). SNAPSHOT_SCHEMA = SCHEMA.Object( @@ -383,7 +389,8 @@ spec_version = SPECIFICATION_VERSION_SCHEMA, version = METADATAVERSION_SCHEMA, expires = securesystemslib.formats.ISO8601_DATETIME_SCHEMA, - meta = FILEINFODICT_SCHEMA) + meta = FILEINFODICT_SCHEMA, + merkle_root = SCHEMA.Optional(HASH_SCHEMA)) # project.cfg file: stores information about the project in a json dictionary diff --git a/tuf/repository_lib.py b/tuf/repository_lib.py index 1f64f66413..a11e49ad63 100644 --- a/tuf/repository_lib.py +++ b/tuf/repository_lib.py @@ -98,7 +98,7 @@ def _generate_and_write_metadata(rolename, metadata_filename, increment_version_number=True, repository_name='default', use_existing_fileinfo=False, use_timestamp_length=True, use_timestamp_hashes=True, use_snapshot_length=False, - use_snapshot_hashes=False): + use_snapshot_hashes=False, snapshot_merkle=False): """ Non-public function that can generate and write the metadata for the specified 'rolename'. It also increments the version number of 'rolename' if @@ -125,11 +125,22 @@ def _generate_and_write_metadata(rolename, metadata_filename, elif rolename == 'snapshot': - metadata = generate_snapshot_metadata(metadata_directory, + metadata, fileinfodict = generate_snapshot_metadata(metadata_directory, roleinfo['version'], roleinfo['expires'], storage_backend, consistent_snapshot, repository_name, use_length=use_snapshot_length, use_hashes=use_snapshot_hashes) + if snapshot_merkle: + root, leaves = _build_merkle_tree(fileinfodict) + + # Add the merkle tree root hash to the timestamp roleinfo + timestamp_roleinfo = tuf.roledb.get_roleinfo('timestamp', repository_name) + timestamp_roleinfo['merkle_root'] = root.digest + + tuf.roledb.update_roleinfo('timestamp', timestamp_roleinfo, + repository_name=repository_name) + + _log_warning_if_expires_soon(SNAPSHOT_FILENAME, roleinfo['expires'], SNAPSHOT_EXPIRES_WARN_SECONDS) @@ -141,7 +152,8 @@ def _generate_and_write_metadata(rolename, metadata_filename, metadata = generate_timestamp_metadata(snapshot_file_path, roleinfo['version'], roleinfo['expires'], storage_backend, repository_name, - use_length=use_timestamp_length, use_hashes=use_timestamp_hashes) + use_length=use_timestamp_length, use_hashes=use_timestamp_hashes, + roleinfo=roleinfo) _log_warning_if_expires_soon(TIMESTAMP_FILENAME, roleinfo['expires'], TIMESTAMP_EXPIRES_WARN_SECONDS) @@ -188,6 +200,9 @@ def _generate_and_write_metadata(rolename, metadata_filename, else: logger.debug('Not incrementing ' + repr(rolename) + '\'s version number.') + if rolename == 'snapshot' and snapshot_merkle: + _write_merkle_paths(root, leaves, storage_backend, metadata_directory, metadata['version']) + if rolename in tuf.roledb.TOP_LEVEL_ROLES and not allow_partially_signed: # Verify that the top-level 'rolename' is fully signed. Only a delegated # role should not be written to disk without full verification of its @@ -386,6 +401,8 @@ def _delete_obsolete_metadata(metadata_directory, snapshot_metadata, for metadata_role in metadata_files: if metadata_role.endswith('root.json'): continue + if metadata_role.endswith('-snapshot.json'): + continue metadata_path = os.path.join(metadata_directory, metadata_role) @@ -1549,6 +1566,222 @@ def _get_hashes_and_length_if_needed(use_length, use_hashes, full_file_path, +class Node(object): + """ + Merkle tree node that keeps track of the node digest and the parent node. + """ + parent = None + digest = None + + def __init__(self): + return + + def is_leaf(self): + return False + + + + +class InternalNode(Node): + """ + An internal Merkle tree node that keeps track of a left and a right + child. Upon creation, this node takes in a left and right Node + and computes the digest of (left + right). In addition, the constructor + sets the parent node of left and right to this node to allow for + traversal of the tree. + """ + left = None + right = None + + def __init__(self, left, right): + super(InternalNode, self).__init__() + self.left = left + self.right = right + + left.parent = self + right.parent = self + digest_object = securesystemslib.hash.digest(algorithm=HASH_FUNCTION) + + digest_object.update((left.digest + right.digest).encode('utf-8')) + + self.digest = digest_object.hexdigest() + + + + +class Leaf(Node): + """ + This Merkle tree leaf node keeps track of the node contents and name. + The name should correspond with a metadata file and the contents should + contain the snapshot information for that metadata file. + + The constructor takes in a name and contents and computes the digest + of the contents. The digest may be provided to save computation time + if it has already been computed. + """ + # Merkle Tree leaf + contents = None + name = None + + def __init__(self, name, contents, digest=None): + super(Leaf, self).__init__() + # Include the name to ensure the digest differs between elements and cannot be replayed + contents["name"] = name + self.contents = contents + self.name = name + + if digest: + self.digest = digest + else: + digest_object = securesystemslib.hash.digest(algorithm=HASH_FUNCTION) + # Hash the canonical json form of the data to ensure consistency + json_contents = securesystemslib.formats.encode_canonical(contents) + + digest_object.update(json_contents.encode('utf-8')) + self.digest = digest_object.hexdigest() + + def is_leaf(self): + return True + + + + +def _build_merkle_tree(fileinfodict): + """ + Create a Merkle tree from the snapshot fileinfo and writes it to individual snapshot files + + Returns the root and leaves + """ + + # We will build the merkle tree starting with the leaf nodes. Each + # leaf contains snapshot information for a single metadata file. + leaves = [] + for name, contents in sorted(fileinfodict.items()): + if name.endswith(".json"): + name = os.path.splitext(name)[0] + leaves.append(Leaf(name, contents)) + + # Starting with the leaves, combine pairs of nodes to build the tree. + # For each pair of nodes, set the first to a left child and the second + # as a right child. Add the resulting parent node to new_nodes. On + # the next iteration, pair the nodes in new_nodes. In order to handle + # an odd number of nodes on any iteration, if this is the last node + # in an odd numbered list (there is no next node), add this node to + # new_nodes. End the loop when there is one remaining current_node + # This last node will be the root of the tree. + current_nodes = leaves + + while(len(current_nodes) > 1): + new_nodes = [] + for i in range(0, len(current_nodes), 2): + # If there are an odd number of nodes and this is the last + # node, add this node to the next level. + if i + 1 >= len(current_nodes): + new_nodes.append(current_nodes[i]) + # Otherwise, use the next two nodes to build a new node. + else: + n = InternalNode(current_nodes[i], current_nodes[i+1]) + # Add this node to the next level + new_nodes.append(n) + current_nodes = new_nodes + + # The only node remaining in current_nodes will be the root node. + root = current_nodes[0] + + # Return the root node and the leaves. The root hash must be used along with the + # path to verify the tree. The root hash should be securely sent to + # each client. To do so, we will add it to the timestamp metadata. + # The leaves will be used to find the path to each leaf and send + # this path to the client for verification + return root, leaves + +def _write_merkle_paths(root, leaves, storage_backend, merkle_directory, version): + # The root and leaves must be part of the same fully constructed + # Merkle tree. Create a path from + # Each leaf to the root node. This path will be downloaded by + # the client and used for verification of the tree. For each + # step in the path, keep track of both the sibling node and + # Whether this is a left or a right child. + + # Before writing each leaf, make sure the storage_backend + # is instantiated + if storage_backend is None: + storage_backend = securesystemslib.storage.FilesystemBackend() + + for l in leaves: + merkle_path = {} + current_node = l + path_directions = {} + + index = 0 + + while(current_node != root): + next_node = current_node.parent + # TODO: determine left or right upon node creation. + # This currently determines which sibling to use by + # finding the sibling that does not match the current digest. + h_left = next_node.left.digest + h_right = next_node.right.digest + if current_node.digest == h_left: + merkle_path[str(index)] = h_right + path_directions[str(index)] = -1 + elif current_node.digest== h_right: + merkle_path[str(index)] = h_left + path_directions[str(index)] = 1 + else: + # error + pass + index = index + 1 + + current_node = next_node + + # Write the path to the merkle_directory + file_contents = tuf.formats.build_dict_conforming_to_schema( + tuf.formats.SNAPSHOT_MERKLE_SCHEMA, + leaf_contents=l.contents, + merkle_path=merkle_path, + path_directions=path_directions) + file_content = _get_written_metadata(file_contents) + file_object = tempfile.TemporaryFile() + file_object.write(file_content) + filename = os.path.join(merkle_directory, l.name + '-snapshot.json') + + # Also write with consistent snapshots for auditing and client verification + consistent_filename = os.path.join(merkle_directory, str(version) + '.' + + l.name + '-snapshot.json') + securesystemslib.util.persist_temp_file(file_object, consistent_filename, + should_close=False) + + storage_backend.put(file_object, filename) + file_object.close() + + + + +def _print_merkle_tree(node, level): + """ + Recursive function used by print_merkle_tree + """ + print('--'* level + node.digest) + if not node.is_leaf(): + _print_merkle_tree(node.left, level + 1) + _print_merkle_tree(node.right, level + 1) + else: + print('--' * (level+1) + node.name) + + + +def print_merkle_tree(root): + """ + Helper function to print merkle tree contents for demos and verification + of the Merkle tree contents + """ + print('') + _print_merkle_tree(root, 0) + + + + def generate_snapshot_metadata(metadata_directory, version, expiration_date, storage_backend, consistent_snapshot=False, repository_name='default', use_length=False, use_hashes=False): @@ -1691,19 +1924,22 @@ def generate_snapshot_metadata(metadata_directory, version, expiration_date, # generate_root_metadata, etc. with one function that generates # metadata, possibly rolling that upwards into the calling function. # There are very few things that really need to be done differently. - return tuf.formats.build_dict_conforming_to_schema( + metadata = tuf.formats.build_dict_conforming_to_schema( tuf.formats.SNAPSHOT_SCHEMA, version=version, expires=expiration_date, meta=fileinfodict) + return metadata, fileinfodict + def generate_timestamp_metadata(snapshot_file_path, version, expiration_date, - storage_backend, repository_name, use_length=True, use_hashes=True): + storage_backend, repository_name, use_length=True, use_hashes=True, + roleinfo=None): """ Generate the timestamp metadata object. The 'snapshot.json' file must @@ -1741,6 +1977,10 @@ def generate_timestamp_metadata(snapshot_file_path, version, expiration_date, metadata file in the timestamp metadata. Default is True. + roleinfo: + The roleinfo for the timestamp role. This is used when a snapshot + merkle tree is used to access the merkle tree's root hash. + securesystemslib.exceptions.FormatError, if the generated timestamp metadata object cannot be formatted correctly, or one of the arguments is improperly @@ -1776,6 +2016,15 @@ def generate_timestamp_metadata(snapshot_file_path, version, expiration_date, tuf.formats.make_metadata_fileinfo(snapshot_version['version'], length, hashes) + if roleinfo and 'merkle_root' in roleinfo: + merkle_root = roleinfo['merkle_root'] + return tuf.formats.build_dict_conforming_to_schema( + tuf.formats.TIMESTAMP_SCHEMA, + version=version, + expires=expiration_date, + meta=snapshot_fileinfo, + merkle_root=merkle_root) + # Generate the timestamp metadata object. # Use generalized build_dict_conforming_to_schema func to produce a dict that # contains all the appropriate information for timestamp metadata, diff --git a/tuf/repository_tool.py b/tuf/repository_tool.py index 1fe6a51e83..f6ea98924f 100755 --- a/tuf/repository_tool.py +++ b/tuf/repository_tool.py @@ -266,7 +266,7 @@ def __init__(self, repository_directory, metadata_directory, - def writeall(self, consistent_snapshot=False, use_existing_fileinfo=False): + def writeall(self, consistent_snapshot=False, use_existing_fileinfo=False, snapshot_merkle=False): """ Write all the JSON Metadata objects to their corresponding files for @@ -296,6 +296,10 @@ def writeall(self, consistent_snapshot=False, use_existing_fileinfo=False): written as-is (True) or whether hashes should be generated (False, requires access to the targets files on-disk). + snapshot_merkle: + Whether to generate snapshot merkle metadata in addition to snapshot + metadata. + tuf.exceptions.UnsignedMetadataError, if any of the top-level and delegated roles do not have the minimum threshold of signatures. @@ -373,7 +377,8 @@ def writeall(self, consistent_snapshot=False, use_existing_fileinfo=False): consistent_snapshot, filenames, repository_name=self._repository_name, use_snapshot_length=self._use_snapshot_length, - use_snapshot_hashes=self._use_snapshot_hashes) + use_snapshot_hashes=self._use_snapshot_hashes, + snapshot_merkle=snapshot_merkle) # Generate the 'timestamp.json' metadata file. if 'timestamp' in dirty_rolenames: @@ -382,7 +387,8 @@ def writeall(self, consistent_snapshot=False, use_existing_fileinfo=False): self._storage_backend, consistent_snapshot, filenames, repository_name=self._repository_name, use_timestamp_length=self._use_timestamp_length, - use_timestamp_hashes=self._use_timestamp_hashes) + use_timestamp_hashes=self._use_timestamp_hashes, + snapshot_merkle=snapshot_merkle) tuf.roledb.unmark_dirty(dirty_rolenames, self._repository_name) diff --git a/tuf/settings.py b/tuf/settings.py index 2dcc8e3b25..cbd0414a92 100755 --- a/tuf/settings.py +++ b/tuf/settings.py @@ -74,6 +74,8 @@ # download Targets metadata. DEFAULT_TARGETS_REQUIRED_LENGTH = 5000000 #bytes +MERKLE_FILELENGTH = 10000 + # Set a timeout value in seconds (float) for non-blocking socket operations. SOCKET_TIMEOUT = 4 #seconds