Skip to content

Commit 44b1203

Browse files
authored
Bug 1428000 - Migrate: only annotate affected files (#34)
The speed-up is massive: running the bug 1411012 migration on the Italian repo used to take ~25 seconds on my machine. After this patch it's down to 0.7 seconds. Running the migration on all locales (without pull/push) now takes less than 2 minutes, compared to 30 minutes before the patch. In `hg annotate` data, `path` is the path relative to CWD and `abspath` is the path relative to the root of the repo. With CWD==root, they're the same and we can remove `abspath` to reduce the number of different types of paths used in the code.
1 parent 479af5d commit 44b1203

File tree

2 files changed

+25
-29
lines changed

2 files changed

+25
-29
lines changed

tools/migrate/blame.py

Lines changed: 18 additions & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -1,19 +1,22 @@
11
import argparse
22
import json
3+
import os
4+
5+
from compare_locales.parser import getParser, Junk
36
import hglib
47
from hglib.util import b, cmdbuilder
5-
from compare_locales.parser import getParser, Junk
68

79

810
class Blame(object):
9-
def __init__(self, repopath):
10-
self.client = hglib.open(repopath)
11+
def __init__(self, client):
12+
self.client = client
1113
self.users = []
1214
self.blame = {}
1315

14-
def main(self):
16+
def attribution(self, file_paths):
1517
args = cmdbuilder(
16-
b('annotate'), self.client.root(), d=True, u=True, T='json')
18+
b('annotate'), template='json', date=True, user=True,
19+
cwd=self.client.root(), file=map(b, file_paths))
1720
blame_json = ''.join(self.client.rawcommand(args))
1821
file_blames = json.loads(blame_json)
1922

@@ -24,16 +27,16 @@ def main(self):
2427
'blame': self.blame}
2528

2629
def handleFile(self, file_blame):
27-
abspath = file_blame['abspath']
30+
path = file_blame['path']
2831

2932
try:
30-
parser = getParser(abspath)
33+
parser = getParser(path)
3134
except UserWarning:
3235
return
3336

34-
self.blame[abspath] = {}
37+
self.blame[path] = {}
3538

36-
parser.readFile(file_blame['path'])
39+
parser.readFile(os.path.join(self.client.root(), path))
3740
entities, emap = parser.parse()
3841
for e in entities:
3942
if isinstance(e, Junk):
@@ -49,12 +52,13 @@ def handleFile(self, file_blame):
4952
if user not in self.users:
5053
self.users.append(user)
5154
userid = self.users.index(user)
52-
self.blame[abspath][e.key] = [userid, timestamp]
55+
self.blame[path][e.key] = [userid, timestamp]
5356

5457
if __name__ == '__main__':
5558
parser = argparse.ArgumentParser()
56-
parser.add_argument("repopath")
59+
parser.add_argument('repo_path')
60+
parser.add_argument('file_path', nargs='+')
5761
args = parser.parse_args()
58-
blame = Blame(args.repopath)
59-
blimey = blame.main()
60-
print(json.dumps(blimey, indent=4, separators=(',', ': ')))
62+
blame = Blame(hglib.open(args.repo_path))
63+
attrib = blame.attribution(args.file_path)
64+
print(json.dumps(attrib, indent=4, separators=(',', ': ')))

tools/migrate/migrate-l10n.py

Lines changed: 7 additions & 15 deletions
Original file line numberDiff line numberDiff line change
@@ -17,9 +17,8 @@
1717
from blame import Blame
1818

1919

20-
def main(lang, reference_dir, localization_dir, blame, migrations, dry_run):
20+
def main(lang, reference_dir, localization_dir, migrations, dry_run):
2121
"""Run migrations and commit files with the result."""
22-
changesets = convert_blame_to_changesets(blame)
2322
client = hglib.open(localization_dir)
2423

2524
for migration in migrations:
@@ -38,6 +37,12 @@ def main(lang, reference_dir, localization_dir, blame, migrations, dry_run):
3837
# Keep track of how many changesets we're committing.
3938
index = 0
4039

40+
# Annotate legacy localization files used as sources by this migration
41+
# to preserve attribution of translations.
42+
files = ctx.localization_resources.keys()
43+
blame = Blame(client).attribution(files)
44+
changesets = convert_blame_to_changesets(blame)
45+
4146
for changeset in changesets:
4247
# Run the migration for the changeset.
4348
snapshot = ctx.serialize_changeset(changeset['changes'])
@@ -91,10 +96,6 @@ def main(lang, reference_dir, localization_dir, blame, migrations, dry_run):
9196
'--localization-dir', type=str,
9297
help='directory for localization files'
9398
)
94-
parser.add_argument(
95-
'--blame', type=argparse.FileType(), default=None,
96-
help='path to a JSON with blame information'
97-
)
9899
parser.add_argument(
99100
'--dry-run', action='store_true',
100101
help='do not write to disk nor commit any changes'
@@ -106,19 +107,10 @@ def main(lang, reference_dir, localization_dir, blame, migrations, dry_run):
106107

107108
args = parser.parse_args()
108109

109-
if args.blame:
110-
# Load pre-computed blame from a JSON file.
111-
blame = json.load(args.blame)
112-
else:
113-
# Compute blame right now.
114-
print('Annotating {}'.format(args.localization_dir))
115-
blame = Blame(args.localization_dir).main()
116-
117110
main(
118111
lang=args.lang,
119112
reference_dir=args.reference_dir,
120113
localization_dir=args.localization_dir,
121-
blame=blame,
122114
migrations=map(importlib.import_module, args.migrations),
123115
dry_run=args.dry_run
124116
)

0 commit comments

Comments
 (0)