26
26
27
27
"""
28
28
29
+ # IMPORTANT NOTE
30
+ #
31
+ # This script should run on pandas versions at least as far back as 0.9.1.
32
+ # devs should be able to use the latest version of this script with
33
+ # any dusty old commit and expect it to "just work".
34
+ # One way in which this is useful is when collecting historical data,
35
+ # where writing some logic around this script may prove easier
36
+ # in some cases then running vbench directly (think perf bisection).
37
+ #
38
+ # *please*, when you modify this script for whatever reason,
39
+ # make sure you do not break it's functionality when running under older
40
+ # pandas versions.
41
+ # Note that depreaction warnings are turned off in main(), so there's
42
+ # no need to change the actual code to supress such warnings.
43
+
29
44
import shutil
30
45
import os
31
46
import sys
37
52
import random
38
53
import numpy as np
39
54
55
+ import pandas as pd
40
56
from pandas import DataFrame , Series
41
57
42
58
from suite import REPO_PATH
43
-
59
+ VB_DIR = os . path . dirname ( os . path . abspath ( __file__ ))
44
60
DEFAULT_MIN_DURATION = 0.01
45
61
HEAD_COL = "head[ms]"
46
62
BASE_COL = "base[ms]"
47
63
64
+ try :
65
+ import git # gitpython
66
+ except Exception :
67
+ print ("Error: Please install the `gitpython` package\n " )
68
+ sys .exit (1 )
48
69
49
70
class RevParseAction (argparse .Action ):
50
71
def __call__ (self , parser , namespace , values , option_string = None ):
51
72
import subprocess
52
- cmd = 'git rev-parse {0}' .format (values )
73
+ cmd = 'git rev-parse --short {0}' .format (values )
53
74
rev_parse = subprocess .check_output (cmd , shell = True )
54
75
setattr (namespace , self .dest , rev_parse .strip ())
55
76
@@ -66,6 +87,14 @@ def __call__(self, parser, namespace, values, option_string=None):
66
87
parser .add_argument ('-t' , '--target-commit' ,
67
88
help = 'The commit to compare against the baseline (default: HEAD).' ,
68
89
type = str , action = RevParseAction )
90
+ parser .add_argument ('--base-pickle' ,
91
+ help = 'name of pickle file with timings data generated by a former `-H -d FILE` run. ' \
92
+ 'filename must be of the form <hash>-*.* or specify --base-commit seperately' ,
93
+ type = str )
94
+ parser .add_argument ('--target-pickle' ,
95
+ help = 'name of pickle file with timings data generated by a former `-H -d FILE` run ' \
96
+ 'filename must be of the form <hash>-*.* or specify --target-commit seperately' ,
97
+ type = str )
69
98
parser .add_argument ('-m' , '--min-duration' ,
70
99
help = 'Minimum duration (in ms) of baseline test for inclusion in report (default: %.3f).' % DEFAULT_MIN_DURATION ,
71
100
type = float ,
@@ -187,7 +216,7 @@ def profile_comparative(benchmarks):
187
216
188
217
# ARGH. reparse the repo, without discarding any commits,
189
218
# then overwrite the previous parse results
190
- # prprint ("Slaughtering kittens..." )
219
+ # prprint("Slaughtering kittens...")
191
220
(repo .shas , repo .messages ,
192
221
repo .timestamps , repo .authors ) = _parse_commit_log (None ,REPO_PATH ,
193
222
args .base_commit )
@@ -213,30 +242,82 @@ def profile_comparative(benchmarks):
213
242
214
243
head_res = get_results_df (db , h_head )
215
244
baseline_res = get_results_df (db , h_baseline )
216
- ratio = head_res ['timing' ] / baseline_res ['timing' ]
217
- totals = DataFrame ({HEAD_COL :head_res ['timing' ],
218
- BASE_COL :baseline_res ['timing' ],
219
- 'ratio' :ratio ,
220
- 'name' :baseline_res .name },
221
- columns = [HEAD_COL , BASE_COL , "ratio" , "name" ])
222
- totals = totals .ix [totals [HEAD_COL ] > args .min_duration ]
223
- # ignore below threshold
224
- totals = totals .dropna (
225
- ).sort ("ratio" ).set_index ('name' ) # sort in ascending order
226
-
227
- h_msg = repo .messages .get (h_head , "" )
228
- b_msg = repo .messages .get (h_baseline , "" )
229
-
230
- print_report (totals ,h_head = h_head ,h_msg = h_msg ,
231
- h_baseline = h_baseline ,b_msg = b_msg )
232
-
233
- if args .outdf :
234
- prprint ("The results DataFrame was written to '%s'\n " % args .outdf )
235
- totals .save (args .outdf )
245
+
246
+ report_comparative (head_res ,baseline_res )
247
+
236
248
finally :
237
249
# print("Disposing of TMP_DIR: %s" % TMP_DIR)
238
250
shutil .rmtree (TMP_DIR )
239
251
252
+ def prep_pickle_for_total (df , agg_name = 'median' ):
253
+ """
254
+ accepts a datafram resulting from invocation with -H -d o.pickle
255
+ If multiple data columns are present (-N was used), the
256
+ `agg_name` attr of the datafram will be used to reduce
257
+ them to a single value per vbench, df.median is used by defa
258
+ ult.
259
+
260
+ Returns a datadrame of the form expected by prep_totals
261
+ """
262
+ def prep (df ):
263
+ agg = getattr (df ,agg_name )
264
+ df = DataFrame (agg (1 ))
265
+ cols = list (df .columns )
266
+ cols [0 ]= 'timing'
267
+ df .columns = cols
268
+ df ['name' ] = list (df .index )
269
+ return df
270
+
271
+ return prep (df )
272
+
273
+ def prep_totals (head_res , baseline_res ):
274
+ """
275
+ Each argument should be a dataframe with 'timing' and 'name' columns
276
+ where name is the name of the vbench.
277
+
278
+ returns a 'totals' dataframe, suitable as input for print_report.
279
+ """
280
+ head_res , baseline_res = head_res .align (baseline_res )
281
+ ratio = head_res ['timing' ] / baseline_res ['timing' ]
282
+ totals = DataFrame ({HEAD_COL :head_res ['timing' ],
283
+ BASE_COL :baseline_res ['timing' ],
284
+ 'ratio' :ratio ,
285
+ 'name' :baseline_res .name },
286
+ columns = [HEAD_COL , BASE_COL , "ratio" , "name" ])
287
+ totals = totals .ix [totals [HEAD_COL ] > args .min_duration ]
288
+ # ignore below threshold
289
+ totals = totals .dropna (
290
+ ).sort ("ratio" ).set_index ('name' ) # sort in ascending order
291
+ return totals
292
+
293
+ def report_comparative (head_res ,baseline_res ):
294
+ try :
295
+ r = git .Repo (VB_DIR )
296
+ except :
297
+ import pdb
298
+ pdb .set_trace ()
299
+
300
+ totals = prep_totals (head_res ,baseline_res )
301
+
302
+ h_head = args .target_commit
303
+ h_baseline = args .base_commit
304
+ h_msg = b_msg = "Unknown"
305
+ try :
306
+ h_msg = r .commit (h_head ).message .strip ()
307
+ except git .exc .BadObject :
308
+ pass
309
+ try :
310
+ b_msg = r .commit (h_baseline ).message .strip ()
311
+ except git .exc .BadObject :
312
+ pass
313
+
314
+
315
+ print_report (totals ,h_head = h_head ,h_msg = h_msg ,
316
+ h_baseline = h_baseline ,b_msg = b_msg )
317
+
318
+ if args .outdf :
319
+ prprint ("The results DataFrame was written to '%s'\n " % args .outdf )
320
+ totals .save (args .outdf )
240
321
241
322
def profile_head_single (benchmark ):
242
323
import gc
@@ -373,11 +454,6 @@ def print_report(df,h_head=None,h_msg="",h_baseline=None,b_msg=""):
373
454
374
455
def main ():
375
456
from suite import benchmarks
376
- # GitRepo wants exactly 7 character hash?
377
- if args .base_commit :
378
- args .base_commit = args .base_commit [:7 ]
379
- if args .target_commit :
380
- args .target_commit = args .target_commit [:7 ]
381
457
382
458
if not args .log_file :
383
459
args .log_file = os .path .abspath (
@@ -395,38 +471,22 @@ def main():
395
471
random .seed (args .seed )
396
472
np .random .seed (args .seed )
397
473
398
- affinity_set = False
474
+ if args .base_pickle and args .target_pickle :
475
+ baseline_res = prep_pickle_for_total (pd .load (args .base_pickle ))
476
+ target_res = prep_pickle_for_total (pd .load (args .target_pickle ))
399
477
400
- # try psutil first since it is more commonly present and better
401
- # maintained. Some people experienced problems with affinity package
402
- # (see https://code.google.com/p/psutil/issues/detail?id=238 for more references)
403
- try :
404
- import psutil
405
- if hasattr (psutil .Process , 'set_cpu_affinity' ):
406
- psutil .Process (os .getpid ()).set_cpu_affinity ([args .affinity ])
407
- affinity_set = True
408
- except ImportError :
409
- pass
478
+ report_comparative (target_res , baseline_res )
479
+ sys .exit (0 )
410
480
411
- if not affinity_set :
412
- try :
413
- import affinity
414
- affinity . set_process_affinity_mask ( 0 , args . affinity )
415
- assert affinity . get_process_affinity_mask ( 0 ) == args .affinity
416
- affinity_set = True
481
+ if args . affinity is not None :
482
+ try : # use psutil rather then stale affinity module. Thanks @yarikoptic
483
+ import psutil
484
+ if hasattr ( psutil . Process , 'set_cpu_affinity' ):
485
+ psutil . Process ( os . getpid ()). set_cpu_affinity ([ args .affinity ])
486
+ print ( "CPU affinity set to %d" % args . affinity )
417
487
except ImportError :
418
- pass
419
-
420
- if not affinity_set :
421
- import warnings
422
- warnings .warn ("\n \n "
423
- "!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!\n "
424
- "The 'affinity' or 'psutil' >= 0.5.0 modules are not available, results may be unreliable\n "
425
- "!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!\n \n "
426
- )
427
- time .sleep (2 )
428
- else :
429
- print ("CPU affinity set to %d" % args .affinity )
488
+ print ("-a/--affinity specified, but the 'psutil' module is not available, aborting.\n " )
489
+ sys .exit (1 )
430
490
431
491
print ("\n " )
432
492
prprint ("LOG_FILE = %s" % args .log_file )
@@ -508,10 +568,39 @@ def inner(repo_path):
508
568
509
569
if __name__ == '__main__' :
510
570
args = parser .parse_args ()
511
- if not args .head and (not args .base_commit and not args .target_commit ):
571
+ if (not args .head
572
+ and not (args .base_commit and args .target_commit )
573
+ and not (args .base_pickle and args .target_pickle )):
512
574
parser .print_help ()
513
- else :
514
- import warnings
515
- warnings .filterwarnings ('ignore' ,category = FutureWarning )
516
- warnings .filterwarnings ('ignore' ,category = DeprecationWarning )
517
- main ()
575
+ sys .exit (1 )
576
+ elif ((args .base_pickle or args .target_pickle ) and not
577
+ (args .base_pickle and args .target_pickle )):
578
+ print ("Must specify Both --base-pickle and --target-pickle." )
579
+ sys .exit (1 )
580
+
581
+ if ((args .base_pickle or args .target_pickle ) and not
582
+ (args .base_commit and args .target_commit )):
583
+ if not args .base_commit :
584
+ print ("base_commit not specified, Assuming base_pickle is named <commit>-foo.*" )
585
+ args .base_commit = args .base_pickle .split ('-' )[0 ]
586
+ if not args .target_commit :
587
+ print ("target_commit not specified, Assuming target_pickle is named <commit>-foo.*" )
588
+ args .target_commit = args .target_pickle .split ('-' )[0 ]
589
+
590
+ import warnings
591
+ warnings .filterwarnings ('ignore' ,category = FutureWarning )
592
+ warnings .filterwarnings ('ignore' ,category = DeprecationWarning )
593
+
594
+ if args .base_commit and args .target_commit :
595
+ print ("Verifying specified commits exist in repo..." )
596
+ r = git .Repo (VB_DIR )
597
+ for c in [ args .base_commit , args .target_commit ]:
598
+ try :
599
+ msg = r .commit (c ).message .strip ()
600
+ except git .BadObject :
601
+ print ("The commit '%s' was not found, aborting..." % c )
602
+ sys .exit (1 )
603
+ else :
604
+ print ("%s: %s" % (c ,msg ))
605
+
606
+ main ()
0 commit comments