Skip to content

Commit c842c73

Browse files
authored
Merge pull request #1012 from joshuagl/joshuagl/hashed-bins-perf
Improve performance of `delegate_hashed_bins` when delegating to a large number of bins
2 parents 3232697 + 6703b51 commit c842c73

File tree

1 file changed

+130
-50
lines changed

1 file changed

+130
-50
lines changed

tuf/repository_tool.py

Lines changed: 130 additions & 50 deletions
Original file line numberDiff line numberDiff line change
@@ -2174,6 +2174,51 @@ def get_delegated_rolenames(self):
21742174

21752175

21762176

2177+
def _create_delegated_target(self, rolename, keyids, threshold, paths):
2178+
"""
2179+
Create a new Targets object for the 'rolename' delegation. An initial
2180+
expiration is set (3 months from the current time).
2181+
"""
2182+
2183+
expiration = tuf.formats.unix_timestamp_to_datetime(
2184+
int(time.time() + TARGETS_EXPIRATION))
2185+
expiration = expiration.isoformat() + 'Z'
2186+
2187+
roleinfo = {'name': rolename, 'keyids': keyids, 'signing_keyids': [],
2188+
'threshold': threshold, 'version': 0,
2189+
'expires': expiration, 'signatures': [], 'partial_loaded': False,
2190+
'paths': paths, 'delegations': {'keys': {}, 'roles': []}}
2191+
2192+
# The new targets object is added as an attribute to this Targets object.
2193+
new_targets_object = Targets(self._targets_directory, rolename, roleinfo,
2194+
parent_targets_object=self._parent_targets_object,
2195+
repository_name=self._repository_name)
2196+
2197+
return new_targets_object
2198+
2199+
2200+
2201+
2202+
2203+
def _update_roledb_delegations(self, keydict, delegations_roleinfo):
2204+
"""
2205+
Update the roledb to include delegations of the keys in keydict and the
2206+
roles in delegations_roleinfo
2207+
"""
2208+
2209+
current_roleinfo = tuf.roledb.get_roleinfo(self.rolename, self._repository_name)
2210+
current_roleinfo['delegations']['keys'].update(keydict)
2211+
2212+
for roleinfo in delegations_roleinfo:
2213+
current_roleinfo['delegations']['roles'].append(roleinfo)
2214+
2215+
tuf.roledb.update_roleinfo(self.rolename, current_roleinfo,
2216+
repository_name=self._repository_name)
2217+
2218+
2219+
2220+
2221+
21772222
def delegate(self, rolename, public_keys, paths, threshold=1,
21782223
terminating=False, list_of_targets=None, path_hash_prefixes=None):
21792224
"""
@@ -2270,19 +2315,7 @@ def delegate(self, rolename, public_keys, paths, threshold=1,
22702315

22712316
# Keep track of the valid keyids (added to the new Targets object) and
22722317
# their keydicts (added to this Targets delegations).
2273-
keyids = []
2274-
keydict = {}
2275-
2276-
# Add all the keys in 'public_keys' to tuf.keydb.
2277-
for key in public_keys:
2278-
keyid = key['keyid']
2279-
key_metadata_format = securesystemslib.keys.format_keyval_to_metadata(
2280-
key['keytype'], key['scheme'], key['keyval'])
2281-
2282-
# Update 'keyids' and 'keydict'.
2283-
new_keydict = {keyid: key_metadata_format}
2284-
keydict.update(new_keydict)
2285-
keyids.append(keyid)
2318+
keyids, keydict = _keys_to_keydict(public_keys)
22862319

22872320
# Ensure the paths of 'list_of_targets' are located in the repository's
22882321
# targets directory.
@@ -2308,34 +2341,17 @@ def delegate(self, rolename, public_keys, paths, threshold=1,
23082341
logger.warning(repr(path) + ' is not located in the repository\'s'
23092342
' targets directory: ' + repr(self._targets_directory))
23102343

2311-
# Create a new Targets object for the 'rolename' delegation. An initial
2312-
# expiration is set (3 months from the current time).
2313-
expiration = tuf.formats.unix_timestamp_to_datetime(
2314-
int(time.time() + TARGETS_EXPIRATION))
2315-
expiration = expiration.isoformat() + 'Z'
2316-
2317-
roleinfo = {'name': rolename, 'keyids': keyids, 'signing_keyids': [],
2318-
'threshold': threshold, 'version': 0,
2319-
'expires': expiration, 'signatures': [], 'partial_loaded': False,
2320-
'paths': relative_targetpaths, 'delegations': {'keys': {},
2321-
'roles': []}}
2322-
23232344
# The new targets object is added as an attribute to this Targets object.
2324-
new_targets_object = Targets(self._targets_directory, rolename, roleinfo,
2325-
parent_targets_object=self._parent_targets_object,
2326-
repository_name=self._repository_name)
2327-
2328-
# Update the 'delegations' field of the current role.
2329-
current_roleinfo = tuf.roledb.get_roleinfo(self.rolename, self._repository_name)
2330-
current_roleinfo['delegations']['keys'].update(keydict)
2345+
new_targets_object = self._create_delegated_target(rolename, keyids,
2346+
threshold, relative_targetpaths)
23312347

23322348
# Update the roleinfo of this role. A ROLE_SCHEMA object requires only
23332349
# 'keyids', 'threshold', and 'paths'.
23342350
roleinfo = {'name': rolename,
2335-
'keyids': roleinfo['keyids'],
2336-
'threshold': roleinfo['threshold'],
2351+
'keyids': keyids,
2352+
'threshold': threshold,
23372353
'terminating': terminating,
2338-
'paths': list(roleinfo['paths'].keys())}
2354+
'paths': list(relative_targetpaths.keys())}
23392355

23402356
if paths:
23412357
roleinfo['paths'] = paths
@@ -2346,25 +2362,22 @@ def delegate(self, rolename, public_keys, paths, threshold=1,
23462362
# or 'paths'.
23472363
del roleinfo['paths']
23482364

2349-
current_roleinfo['delegations']['roles'].append(roleinfo)
2350-
tuf.roledb.update_roleinfo(self.rolename, current_roleinfo,
2351-
repository_name=self._repository_name)
2352-
23532365
# Update the public keys of 'new_targets_object'.
23542366
for key in public_keys:
23552367
new_targets_object.add_verification_key(key)
23562368

23572369
# Add the new delegation to the top-level 'targets' role object (i.e.,
23582370
# 'repository.targets()'). For example, 'django', which was delegated by
23592371
# repository.target('claimed'), is added to 'repository.targets('django')).
2372+
if self.rolename != 'targets':
2373+
self._parent_targets_object.add_delegated_role(rolename,
2374+
new_targets_object)
23602375

23612376
# Add 'new_targets_object' to the 'targets' role object (this object).
2362-
if self.rolename == 'targets':
2363-
self.add_delegated_role(rolename, new_targets_object)
2377+
self.add_delegated_role(rolename, new_targets_object)
23642378

2365-
else:
2366-
self._parent_targets_object.add_delegated_role(rolename, new_targets_object)
2367-
self.add_delegated_role(rolename, new_targets_object)
2379+
# Update the 'delegations' field of the current role.
2380+
self._update_roledb_delegations(keydict, [roleinfo])
23682381

23692382

23702383

@@ -2533,12 +2546,56 @@ def delegate_hashed_bins(self, list_of_targets, keys_of_hashed_bins,
25332546
hash_prefix = _get_hash(target_path.replace('\\', '/').lstrip('/'))[:prefix_length]
25342547
ordered_roles[int(hash_prefix, 16) // bin_size]["target_paths"].append(target_path)
25352548

2536-
for bin_rolename in ordered_roles:
2537-
# Delegate from the "unclaimed" targets role to each 'bin_rolename'
2538-
self.delegate(bin_rolename['name'], keys_of_hashed_bins, [],
2539-
list_of_targets=bin_rolename['target_paths'],
2540-
path_hash_prefixes=bin_rolename['target_hash_prefixes'])
2541-
logger.debug('Delegated from ' + repr(self.rolename) + ' to ' + repr(bin_rolename))
2549+
keyids, keydict = _keys_to_keydict(keys_of_hashed_bins)
2550+
2551+
# A queue of roleinfo's that need to be updated in the roledb
2552+
delegated_roleinfos = []
2553+
2554+
for bin_role in ordered_roles:
2555+
# TODO: originally we just called self.delegate() for each item in this
2556+
# iteration. However, this is *extremely* slow when creating a large
2557+
# number of hashed bins, i.e. 16k as is recommended for PyPI usage in
2558+
# PEP 458: https://www.python.org/dev/peps/pep-0458/
2559+
# The source of the slowness is the interactions with the roledb, which
2560+
# causes several deep copies of roleinfo dictionaries:
2561+
# https://github.com/theupdateframework/tuf/issues/1005
2562+
# Once the underlying issues in #1005 are resolved, i.e. some combination
2563+
# of the intermediate and long-term fixes, we may simplify here by
2564+
# switching back to just calling self.delegate(), but until that time we
2565+
# queue roledb interactions and perform all updates to the roledb in one
2566+
# operation at the end of the iteration.
2567+
2568+
relative_paths = {}
2569+
targets_directory_length = len(self._targets_directory)
2570+
for path in bin_role['target_paths']:
2571+
relative_paths.update({path[targets_directory_length:]: {}})
2572+
2573+
# Delegate from the "unclaimed" targets role to each 'bin_role'
2574+
target = self._create_delegated_target(bin_role['name'], keyids, 1,
2575+
relative_paths)
2576+
2577+
roleinfo = {'name': bin_role['name'],
2578+
'keyids': keyids,
2579+
'threshold': 1,
2580+
'terminating': False,
2581+
'path_hash_prefixes': bin_role['target_hash_prefixes']}
2582+
delegated_roleinfos.append(roleinfo)
2583+
2584+
for key in keys_of_hashed_bins:
2585+
target.add_verification_key(key)
2586+
2587+
# Add the new delegation to the top-level 'targets' role object (i.e.,
2588+
# 'repository.targets()').
2589+
if self.rolename != 'targets':
2590+
self._parent_targets_object.add_delegated_role(bin_role['name'],
2591+
target)
2592+
2593+
# Add 'new_targets_object' to the 'targets' role object (this object).
2594+
self.add_delegated_role(bin_role['name'], target)
2595+
logger.debug('Delegated from ' + repr(self.rolename) + ' to ' + repr(bin_role))
2596+
2597+
2598+
self._update_roledb_delegations(keydict, delegated_roleinfos)
25422599

25432600

25442601

@@ -2697,6 +2754,29 @@ def delegations(self):
26972754

26982755

26992756

2757+
def _keys_to_keydict(keys):
2758+
"""
2759+
Iterate over a list of keys and return a list of keyids and a dict mapping
2760+
keyid to key metadata
2761+
"""
2762+
keyids = []
2763+
keydict = {}
2764+
2765+
for key in keys:
2766+
keyid = key['keyid']
2767+
key_metadata_format = securesystemslib.keys.format_keyval_to_metadata(
2768+
key['keytype'], key['scheme'], key['keyval'])
2769+
2770+
new_keydict = {keyid: key_metadata_format}
2771+
keydict.update(new_keydict)
2772+
keyids.append(keyid)
2773+
2774+
return keyids, keydict
2775+
2776+
2777+
2778+
2779+
27002780
def _get_hash(target_filepath):
27012781
"""
27022782
<Purpose>

0 commit comments

Comments
 (0)