Skip to content

Commit 58c3ece

Browse files
author
y-p
committed
Revert "DOC: cleanups"
This reverts commit bd1dbe7.
1 parent bd1dbe7 commit 58c3ece

File tree

3 files changed

+157
-74
lines changed

3 files changed

+157
-74
lines changed

doc/source/basics.rst

Lines changed: 4 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -1361,7 +1361,7 @@ and so passing in a substring will work - as long as it is unambiguous :
13611361
get_option("display.max_rows")
13621362
13631363
1364-
The following will **not work** because it matches multiple option names, e.g. ``display.max_colwidth``, ``display.max_rows``, ``display.max_columns``:
1364+
The following will **not work** because it matches multiple option names, e.g.``display.max_colwidth``, ``display.max_rows``, ``display.max_columns``:
13651365

13661366
.. ipython:: python
13671367
:okexcept:
@@ -1372,18 +1372,12 @@ The following will **not work** because it matches multiple option names, e.g. `
13721372
print(e)
13731373
13741374
1375-
**Note:** Using this form of shorthand may cause your code to break if new options with similar names are added in future versions.
1375+
**Note:** Using this form of convenient shorthand may make your code break if new options with similar names are added in future versions.
13761376

13771377

13781378
You can get a list of available options and their descriptions with ``describe_option``. When called
13791379
with no argument ``describe_option`` will print out the descriptions for all available options.
13801380

1381-
.. ipython:: python
1382-
:suppress:
1383-
1384-
reset_option("all")
1385-
1386-
13871381
.. ipython:: python
13881382
13891383
describe_option()
@@ -1413,11 +1407,11 @@ All options also have a default value, and you can use the ``reset_option`` to d
14131407
get_option("display.max_rows")
14141408
14151409
1416-
It's also possible to reset multiple options at once (using a regex):
1410+
It's also possible to reset multiple options at once:
14171411

14181412
.. ipython:: python
14191413
1420-
reset_option("^display")
1414+
reset_option("^display\.")
14211415
14221416
14231417

scripts/use_build_cache.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -39,7 +39,7 @@ class Foo(object):
3939

4040
args = Foo() # for 2.6, no argparse
4141

42-
#print args.accumulate(args.integers)
42+
#print(args.accumulate(args.integers))
4343

4444
shim="""
4545
import os

vb_suite/test_perf.py

Lines changed: 152 additions & 63 deletions
Original file line numberDiff line numberDiff line change
@@ -26,6 +26,21 @@
2626
2727
"""
2828

29+
# IMPORTANT NOTE
30+
#
31+
# This script should run on pandas versions at least as far back as 0.9.1.
32+
# devs should be able to use the latest version of this script with
33+
# any dusty old commit and expect it to "just work".
34+
# One way in which this is useful is when collecting historical data,
35+
# where writing some logic around this script may prove easier
36+
# in some cases then running vbench directly (think perf bisection).
37+
#
38+
# *please*, when you modify this script for whatever reason,
39+
# make sure you do not break it's functionality when running under older
40+
# pandas versions.
41+
# Note that depreaction warnings are turned off in main(), so there's
42+
# no need to change the actual code to supress such warnings.
43+
2944
import shutil
3045
import os
3146
import sys
@@ -37,19 +52,25 @@
3752
import random
3853
import numpy as np
3954

55+
import pandas as pd
4056
from pandas import DataFrame, Series
4157

4258
from suite import REPO_PATH
43-
59+
VB_DIR = os.path.dirname(os.path.abspath(__file__))
4460
DEFAULT_MIN_DURATION = 0.01
4561
HEAD_COL="head[ms]"
4662
BASE_COL="base[ms]"
4763

64+
try:
65+
import git # gitpython
66+
except Exception:
67+
print("Error: Please install the `gitpython` package\n")
68+
sys.exit(1)
4869

4970
class RevParseAction(argparse.Action):
5071
def __call__(self, parser, namespace, values, option_string=None):
5172
import subprocess
52-
cmd = 'git rev-parse {0}'.format(values)
73+
cmd = 'git rev-parse --short {0}'.format(values)
5374
rev_parse = subprocess.check_output(cmd, shell=True)
5475
setattr(namespace, self.dest, rev_parse.strip())
5576

@@ -66,6 +87,14 @@ def __call__(self, parser, namespace, values, option_string=None):
6687
parser.add_argument('-t', '--target-commit',
6788
help='The commit to compare against the baseline (default: HEAD).',
6889
type=str, action=RevParseAction)
90+
parser.add_argument('--base-pickle',
91+
help='name of pickle file with timings data generated by a former `-H -d FILE` run. '\
92+
'filename must be of the form <hash>-*.* or specify --base-commit seperately',
93+
type=str)
94+
parser.add_argument('--target-pickle',
95+
help='name of pickle file with timings data generated by a former `-H -d FILE` run '\
96+
'filename must be of the form <hash>-*.* or specify --target-commit seperately',
97+
type=str)
6998
parser.add_argument('-m', '--min-duration',
7099
help='Minimum duration (in ms) of baseline test for inclusion in report (default: %.3f).' % DEFAULT_MIN_DURATION,
71100
type=float,
@@ -187,7 +216,7 @@ def profile_comparative(benchmarks):
187216

188217
# ARGH. reparse the repo, without discarding any commits,
189218
# then overwrite the previous parse results
190-
# prprint ("Slaughtering kittens..." )
219+
# prprint("Slaughtering kittens...")
191220
(repo.shas, repo.messages,
192221
repo.timestamps, repo.authors) = _parse_commit_log(None,REPO_PATH,
193222
args.base_commit)
@@ -213,30 +242,82 @@ def profile_comparative(benchmarks):
213242

214243
head_res = get_results_df(db, h_head)
215244
baseline_res = get_results_df(db, h_baseline)
216-
ratio = head_res['timing'] / baseline_res['timing']
217-
totals = DataFrame({HEAD_COL:head_res['timing'],
218-
BASE_COL:baseline_res['timing'],
219-
'ratio':ratio,
220-
'name':baseline_res.name},
221-
columns=[HEAD_COL, BASE_COL, "ratio", "name"])
222-
totals = totals.ix[totals[HEAD_COL] > args.min_duration]
223-
# ignore below threshold
224-
totals = totals.dropna(
225-
).sort("ratio").set_index('name') # sort in ascending order
226-
227-
h_msg = repo.messages.get(h_head, "")
228-
b_msg = repo.messages.get(h_baseline, "")
229-
230-
print_report(totals,h_head=h_head,h_msg=h_msg,
231-
h_baseline=h_baseline,b_msg=b_msg)
232-
233-
if args.outdf:
234-
prprint("The results DataFrame was written to '%s'\n" % args.outdf)
235-
totals.save(args.outdf)
245+
246+
report_comparative(head_res,baseline_res)
247+
236248
finally:
237249
# print("Disposing of TMP_DIR: %s" % TMP_DIR)
238250
shutil.rmtree(TMP_DIR)
239251

252+
def prep_pickle_for_total(df, agg_name='median'):
253+
"""
254+
accepts a datafram resulting from invocation with -H -d o.pickle
255+
If multiple data columns are present (-N was used), the
256+
`agg_name` attr of the datafram will be used to reduce
257+
them to a single value per vbench, df.median is used by defa
258+
ult.
259+
260+
Returns a datadrame of the form expected by prep_totals
261+
"""
262+
def prep(df):
263+
agg = getattr(df,agg_name)
264+
df = DataFrame(agg(1))
265+
cols = list(df.columns)
266+
cols[0]='timing'
267+
df.columns=cols
268+
df['name'] = list(df.index)
269+
return df
270+
271+
return prep(df)
272+
273+
def prep_totals(head_res, baseline_res):
274+
"""
275+
Each argument should be a dataframe with 'timing' and 'name' columns
276+
where name is the name of the vbench.
277+
278+
returns a 'totals' dataframe, suitable as input for print_report.
279+
"""
280+
head_res, baseline_res = head_res.align(baseline_res)
281+
ratio = head_res['timing'] / baseline_res['timing']
282+
totals = DataFrame({HEAD_COL:head_res['timing'],
283+
BASE_COL:baseline_res['timing'],
284+
'ratio':ratio,
285+
'name':baseline_res.name},
286+
columns=[HEAD_COL, BASE_COL, "ratio", "name"])
287+
totals = totals.ix[totals[HEAD_COL] > args.min_duration]
288+
# ignore below threshold
289+
totals = totals.dropna(
290+
).sort("ratio").set_index('name') # sort in ascending order
291+
return totals
292+
293+
def report_comparative(head_res,baseline_res):
294+
try:
295+
r=git.Repo(VB_DIR)
296+
except:
297+
import pdb
298+
pdb.set_trace()
299+
300+
totals = prep_totals(head_res,baseline_res)
301+
302+
h_head = args.target_commit
303+
h_baseline = args.base_commit
304+
h_msg = b_msg = "Unknown"
305+
try:
306+
h_msg = r.commit(h_head).message.strip()
307+
except git.exc.BadObject:
308+
pass
309+
try:
310+
b_msg = r.commit(h_baseline).message.strip()
311+
except git.exc.BadObject:
312+
pass
313+
314+
315+
print_report(totals,h_head=h_head,h_msg=h_msg,
316+
h_baseline=h_baseline,b_msg=b_msg)
317+
318+
if args.outdf:
319+
prprint("The results DataFrame was written to '%s'\n" % args.outdf)
320+
totals.save(args.outdf)
240321

241322
def profile_head_single(benchmark):
242323
import gc
@@ -373,11 +454,6 @@ def print_report(df,h_head=None,h_msg="",h_baseline=None,b_msg=""):
373454

374455
def main():
375456
from suite import benchmarks
376-
# GitRepo wants exactly 7 character hash?
377-
if args.base_commit:
378-
args.base_commit = args.base_commit[:7]
379-
if args.target_commit:
380-
args.target_commit = args.target_commit[:7]
381457

382458
if not args.log_file:
383459
args.log_file = os.path.abspath(
@@ -395,38 +471,22 @@ def main():
395471
random.seed(args.seed)
396472
np.random.seed(args.seed)
397473

398-
affinity_set = False
474+
if args.base_pickle and args.target_pickle:
475+
baseline_res = prep_pickle_for_total(pd.load(args.base_pickle))
476+
target_res = prep_pickle_for_total(pd.load(args.target_pickle))
399477

400-
# try psutil first since it is more commonly present and better
401-
# maintained. Some people experienced problems with affinity package
402-
# (see https://code.google.com/p/psutil/issues/detail?id=238 for more references)
403-
try:
404-
import psutil
405-
if hasattr(psutil.Process, 'set_cpu_affinity'):
406-
psutil.Process(os.getpid()).set_cpu_affinity([args.affinity])
407-
affinity_set = True
408-
except ImportError:
409-
pass
478+
report_comparative(target_res, baseline_res)
479+
sys.exit(0)
410480

411-
if not affinity_set:
412-
try:
413-
import affinity
414-
affinity.set_process_affinity_mask(0, args.affinity)
415-
assert affinity.get_process_affinity_mask(0) == args.affinity
416-
affinity_set = True
481+
if args.affinity is not None:
482+
try: # use psutil rather then stale affinity module. Thanks @yarikoptic
483+
import psutil
484+
if hasattr(psutil.Process, 'set_cpu_affinity'):
485+
psutil.Process(os.getpid()).set_cpu_affinity([args.affinity])
486+
print("CPU affinity set to %d" % args.affinity)
417487
except ImportError:
418-
pass
419-
420-
if not affinity_set:
421-
import warnings
422-
warnings.warn("\n\n"
423-
"!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!\n"
424-
"The 'affinity' or 'psutil' >= 0.5.0 modules are not available, results may be unreliable\n"
425-
"!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!\n\n"
426-
)
427-
time.sleep(2)
428-
else:
429-
print("CPU affinity set to %d" % args.affinity)
488+
print("-a/--affinity specified, but the 'psutil' module is not available, aborting.\n")
489+
sys.exit(1)
430490

431491
print("\n")
432492
prprint("LOG_FILE = %s" % args.log_file)
@@ -508,10 +568,39 @@ def inner(repo_path):
508568

509569
if __name__ == '__main__':
510570
args = parser.parse_args()
511-
if not args.head and (not args.base_commit and not args.target_commit):
571+
if (not args.head
572+
and not (args.base_commit and args.target_commit)
573+
and not (args.base_pickle and args.target_pickle)):
512574
parser.print_help()
513-
else:
514-
import warnings
515-
warnings.filterwarnings('ignore',category=FutureWarning)
516-
warnings.filterwarnings('ignore',category=DeprecationWarning)
517-
main()
575+
sys.exit(1)
576+
elif ((args.base_pickle or args.target_pickle) and not
577+
(args.base_pickle and args.target_pickle)):
578+
print("Must specify Both --base-pickle and --target-pickle.")
579+
sys.exit(1)
580+
581+
if ((args.base_pickle or args.target_pickle) and not
582+
(args.base_commit and args.target_commit)):
583+
if not args.base_commit:
584+
print("base_commit not specified, Assuming base_pickle is named <commit>-foo.*")
585+
args.base_commit = args.base_pickle.split('-')[0]
586+
if not args.target_commit:
587+
print("target_commit not specified, Assuming target_pickle is named <commit>-foo.*")
588+
args.target_commit = args.target_pickle.split('-')[0]
589+
590+
import warnings
591+
warnings.filterwarnings('ignore',category=FutureWarning)
592+
warnings.filterwarnings('ignore',category=DeprecationWarning)
593+
594+
if args.base_commit and args.target_commit:
595+
print("Verifying specified commits exist in repo...")
596+
r=git.Repo(VB_DIR)
597+
for c in [ args.base_commit, args.target_commit ]:
598+
try:
599+
msg = r.commit(c).message.strip()
600+
except git.BadObject:
601+
print("The commit '%s' was not found, aborting..." % c)
602+
sys.exit(1)
603+
else:
604+
print("%s: %s" % (c,msg))
605+
606+
main()

0 commit comments

Comments
 (0)