|
| 1 | +""" |
| 2 | +A TUF hash bin delegation example using the low-level TUF Metadata API. |
| 3 | +
|
| 4 | +As 'repository_tool' and 'repository_lib' are being deprecated, hash bin |
| 5 | +delegation interfaces are no longer available in this implementation. The |
| 6 | +example code in this file demonstrates how to easily implement those |
| 7 | +interfaces, and how to use them together with the TUF metadata API, to perform |
| 8 | +hash bin delegation. |
| 9 | +
|
| 10 | +Contents: |
| 11 | +- Re-usable hash bin delegation helpers |
| 12 | +- Basic hash bin delegation example |
| 13 | +
|
| 14 | +See 'basic_repo.py' for a more comprehensive TUF metadata API example. |
| 15 | +
|
| 16 | +NOTE: Metadata files will be written to a 'tmp*'-directory in CWD. |
| 17 | +
|
| 18 | +""" |
| 19 | +import hashlib |
| 20 | +import os |
| 21 | +import tempfile |
| 22 | +from collections import OrderedDict |
| 23 | +from datetime import datetime, timedelta |
| 24 | +from pathlib import Path |
| 25 | + |
| 26 | +from securesystemslib.keys import generate_ed25519_key |
| 27 | +from securesystemslib.signer import SSlibSigner |
| 28 | + |
| 29 | +from tuf.api.metadata import ( |
| 30 | + DelegatedRole, |
| 31 | + Delegations, |
| 32 | + Key, |
| 33 | + Metadata, |
| 34 | + TargetFile, |
| 35 | + Targets, |
| 36 | +) |
| 37 | +from tuf.api.serialization.json import JSONSerializer |
| 38 | + |
| 39 | + |
| 40 | +def _in(days): |
| 41 | + """Adds 'days' to now and returns datetime object w/o microseconds.""" |
| 42 | + return datetime.utcnow().replace(microsecond=0) + timedelta(days=days) |
| 43 | + |
| 44 | + |
| 45 | +SPEC_VERSION = "1.0.19" |
| 46 | +roles = {} |
| 47 | +keys = {} |
| 48 | + |
| 49 | +# Hash bin delegation |
| 50 | +# =================== |
| 51 | +# Hash bin delegation allows to automatically distribute a large number of |
| 52 | +# target files over a fixed number of targets metadata, in order to reduce the |
| 53 | +# size of the specific targets metadata a client needs to download for a given |
| 54 | +# target file, and thus reduce the metadata network overhead caused by TUF. |
| 55 | +# |
| 56 | +# It is achieved by uniformly delegating the responsibility for target files |
| 57 | +# based on the leading digits of the hash of their file path, where each |
| 58 | +# delegated targets role is responsible for an incremental range of target path |
| 59 | +# hash prefixes. |
| 60 | +# |
| 61 | +# The only number that needs to be configured is the number of bins, everything |
| 62 | +# else is derived using the mathematical operations outlined below. An |
| 63 | +# appropriate number of bins depends on the expected number of target files in |
| 64 | +# a repository. For the purpose of this example we choose... |
| 65 | + |
| 66 | +# The fixed number of bins determines the length of any considered hash prefix, |
| 67 | +# how many prefixes exist in total, and how many prefixes fall into each bin. |
| 68 | +NUMBER_OF_BINS = 32 # power of 2 for even distribution of hash prefixes |
| 69 | + |
| 70 | +# The available digits in the hexadecimal representation of the number of bins |
| 71 | +# (minus one, counting starts at zero) determines the length of any hash prefix, |
| 72 | +# i.e. how many left digits need to be considered to assign the hash to a bin. |
| 73 | +PREFIX_LEN = len(f"{NUMBER_OF_BINS - 1:x}") # 2 |
| 74 | + |
| 75 | +# The total number of distinct hash prefixes is determined by the highest |
| 76 | +# hexadecimal number that can be represented with the given number of digits. |
| 77 | +NUMBER_OF_PREFIXES = 16 ** PREFIX_LEN # 256 |
| 78 | + |
| 79 | +# If the number of bins is a power of two, all hash prefixes can be evenly |
| 80 | +# distributed over all bins and thus allow us to compute the size of any bin. |
| 81 | +BIN_SIZE = NUMBER_OF_PREFIXES // NUMBER_OF_BINS # 8 |
| 82 | + |
| 83 | + |
| 84 | +# Helpers |
| 85 | +# ------- |
| 86 | +def _bin_name(low, high): |
| 87 | + """Generates a bin name according to the hash prefixes the bin serves. |
| 88 | +
|
| 89 | + The name is either a single hash prefix for bin size 1, or a range of hash |
| 90 | + prefixes otherwise. The prefix length is needed to zero-left-pad the |
| 91 | + hex representation of the hash prefix for uniform bin name lengths. |
| 92 | + """ |
| 93 | + if low == high: |
| 94 | + return f"{low:0{PREFIX_LEN}x}" |
| 95 | + |
| 96 | + return f"{low:0{PREFIX_LEN}x}-{high:0{PREFIX_LEN}x}" |
| 97 | + |
| 98 | + |
| 99 | +def generate_hash_bins(): |
| 100 | + """Returns generator for bin names and hash prefixes per bin.""" |
| 101 | + # Iterate over the total number of hash prefixes in 'bin size'-steps to |
| 102 | + # generate bin names and a list of hash prefixes served by each bin. |
| 103 | + for low in range(0, NUMBER_OF_PREFIXES, BIN_SIZE): |
| 104 | + high = low + BIN_SIZE - 1 |
| 105 | + bin_name = _bin_name(low, high) |
| 106 | + hash_prefixes = [] |
| 107 | + for prefix in range(low, low + BIN_SIZE): |
| 108 | + hash_prefixes.append(f"{prefix:0{PREFIX_LEN}x}") |
| 109 | + |
| 110 | + yield bin_name, hash_prefixes |
| 111 | + |
| 112 | + |
| 113 | +def find_hash_bin(path): |
| 114 | + """Returns name of bin for target file based on the target path hash.""" |
| 115 | + # Generate hash digest of passed target path and take its prefix, given the |
| 116 | + # global prefix length for the given number of bins. |
| 117 | + hasher = hashlib.sha256() |
| 118 | + hasher.update(path.encode("utf-8")) |
| 119 | + target_name_hash = hasher.hexdigest() |
| 120 | + prefix = int(target_name_hash[:PREFIX_LEN], 16) |
| 121 | + # Find lower and upper bounds for hash prefix given its numerical value and |
| 122 | + # the the general bin size for the given number of bins. |
| 123 | + low = prefix - (prefix % BIN_SIZE) |
| 124 | + high = low + BIN_SIZE - 1 |
| 125 | + return _bin_name(low, high) |
| 126 | + |
| 127 | + |
| 128 | +# Keys |
| 129 | +# ---- |
| 130 | +# Given that the primary concern of hash bin delegation is to reduce network |
| 131 | +# overhead, it is acceptable to re-use one signing key for all delegated |
| 132 | +# targets roles (bin-n). However, we do use a different key for the delegating |
| 133 | +# targets role (bins). Considering the high responsibility but also low |
| 134 | +# volatility of the bins role, it is recommended to require signature |
| 135 | +# thresholds and keep the keys offline in a real-world scenario. |
| 136 | + |
| 137 | +# NOTE: See "Targets delegation" and "Signature thresholds" paragraphs in |
| 138 | +# 'basic_repo.py' for more details |
| 139 | +for name in ["bin-n", "bins"]: |
| 140 | + keys[name] = generate_ed25519_key() |
| 141 | + |
| 142 | + |
| 143 | +# Targets roles |
| 144 | +# ------------- |
| 145 | +# NOTE: See "Targets" and "Targets delegation" paragraphs in 'basic_repo.py' |
| 146 | +# example for more details about the Targets object. |
| 147 | + |
| 148 | +# Create preliminary delegating targets role (bins) and add public key for |
| 149 | +# delegated targets (bin_n) to key store. Delegation details are update below. |
| 150 | +roles["bins"] = Metadata[Targets]( |
| 151 | + signed=Targets( |
| 152 | + version=1, |
| 153 | + spec_version=SPEC_VERSION, |
| 154 | + expires=_in(365), |
| 155 | + targets={}, |
| 156 | + delegations=Delegations( |
| 157 | + keys={ |
| 158 | + keys["bin-n"]["keyid"]: Key.from_securesystemslib_key( |
| 159 | + keys["bin-n"] |
| 160 | + ) |
| 161 | + }, |
| 162 | + roles=OrderedDict(), |
| 163 | + ), |
| 164 | + ), |
| 165 | + signatures=OrderedDict(), |
| 166 | +) |
| 167 | + |
| 168 | +# The hash bin generator yields an ordered list of incremental hash bin names |
| 169 | +# (ranges), plus the hash prefixes each bin is responsible for, e.g.: |
| 170 | +# |
| 171 | +# bin_n_name: 00-07 bin_n_hash_prefixes: 00 01 02 03 04 05 06 07 |
| 172 | +# 08-0f 08 09 0a 0b 0c 0d 0e 0f |
| 173 | +# 10-17 10 11 12 13 14 15 16 17 |
| 174 | +# ... ... |
| 175 | +# f8-ff f8 f9 fa fb fc fd fe ff |
| 176 | +for bin_n_name, bin_n_hash_prefixes in generate_hash_bins(): |
| 177 | + # Update delegating targets role (bins) with delegation details for each |
| 178 | + # delegated targets role (bin_n). |
| 179 | + roles["bins"].signed.delegations.roles[bin_n_name] = DelegatedRole( |
| 180 | + name=bin_n_name, |
| 181 | + keyids=[keys["bin-n"]["keyid"]], |
| 182 | + threshold=1, |
| 183 | + terminating=False, |
| 184 | + path_hash_prefixes=bin_n_hash_prefixes, |
| 185 | + ) |
| 186 | + |
| 187 | + # Create delegated targets roles (bin_n) |
| 188 | + roles[bin_n_name] = Metadata[Targets]( |
| 189 | + signed=Targets( |
| 190 | + version=1, spec_version=SPEC_VERSION, expires=_in(7), targets={} |
| 191 | + ), |
| 192 | + signatures=OrderedDict(), |
| 193 | + ) |
| 194 | + |
| 195 | +# Add target file |
| 196 | +# --------------- |
| 197 | +# For the purpose of this example we will protect the integrity of this very |
| 198 | +# example script by adding its file info to the corresponding bin metadata. |
| 199 | + |
| 200 | +# NOTE: See "Targets" paragraph in 'basic_repo.py' example for more details |
| 201 | +# about adding target file infos to targets metadata. |
| 202 | +local_path = Path(__file__).resolve() |
| 203 | +target_path = f"{local_path.parts[-2]}/{local_path.parts[-1]}" |
| 204 | +target_file_info = TargetFile.from_file(target_path, local_path) |
| 205 | + |
| 206 | +# The right bin for a target file is determined by the 'target_path' hash, e.g.: |
| 207 | +# |
| 208 | +# target_path: 'repo_example/hashed_bin_delegation.py' |
| 209 | +# target_path (hash digest): '85e1a6c06305bd9c1e15c7ae565fd16ea304bfc...' |
| 210 | +# |
| 211 | +# --> considered hash prefix '85', falls into bin '80-87' |
| 212 | +bin_for_target = find_hash_bin(target_path) |
| 213 | +roles[bin_for_target].signed.targets[target_path] = target_file_info |
| 214 | + |
| 215 | + |
| 216 | +# Sign and persist |
| 217 | +# ---------------- |
| 218 | +# Sign all metadata and persist to temporary directory at CWD for review |
| 219 | +# (most notably see 'bins.json' and '80-87.json'). |
| 220 | + |
| 221 | +# NOTE: See "Persist metadata" paragraph in 'basic_repo.py' example for more |
| 222 | +# details about serialization formats and metadata file name convention. |
| 223 | +PRETTY = JSONSerializer(compact=False) |
| 224 | +TMP_DIR = tempfile.mkdtemp(dir=os.getcwd()) |
| 225 | + |
| 226 | +for role_name, role in roles.items(): |
| 227 | + key = keys["bins"] if role_name == "bins" else keys["bin-n"] |
| 228 | + signer = SSlibSigner(key) |
| 229 | + role.sign(signer) |
| 230 | + |
| 231 | + filename = f"{role_name}.json" |
| 232 | + filepath = os.path.join(TMP_DIR, filename) |
| 233 | + role.to_file(filepath, serializer=PRETTY) |
0 commit comments