diff --git a/asv_bench/benchmarks/algorithms.py b/asv_bench/benchmarks/algorithms.py index 7ffb180b49e09..45d62163ae80b 100644 --- a/asv_bench/benchmarks/algorithms.py +++ b/asv_bench/benchmarks/algorithms.py @@ -11,6 +11,8 @@ except: pass +from .pandas_vb_common import setup # noqa + class Factorize(object): @@ -21,7 +23,6 @@ class Factorize(object): def setup(self, sort): N = 10**5 - np.random.seed(1234) self.int_idx = pd.Int64Index(np.arange(N).repeat(5)) self.float_idx = pd.Float64Index(np.random.randn(N).repeat(5)) self.string_idx = tm.makeStringIndex(N) @@ -45,7 +46,6 @@ class Duplicated(object): def setup(self, keep): N = 10**5 - np.random.seed(1234) self.int_idx = pd.Int64Index(np.arange(N).repeat(5)) self.float_idx = pd.Float64Index(np.random.randn(N).repeat(5)) self.string_idx = tm.makeStringIndex(N) @@ -79,7 +79,6 @@ class Match(object): goal_time = 0.2 def setup(self): - np.random.seed(1234) self.uniques = tm.makeStringIndex(1000).values self.all = self.uniques.repeat(10) @@ -92,7 +91,6 @@ class Hashing(object): goal_time = 0.2 def setup_cache(self): - np.random.seed(1234) N = 10**5 df = pd.DataFrame( diff --git a/asv_bench/benchmarks/binary_ops.py b/asv_bench/benchmarks/binary_ops.py index 14169ced4b71f..cc8766e1fa39c 100644 --- a/asv_bench/benchmarks/binary_ops.py +++ b/asv_bench/benchmarks/binary_ops.py @@ -6,6 +6,8 @@ except ImportError: import pandas.computation.expressions as expr +from .pandas_vb_common import setup # noqa + class Ops(object): @@ -15,7 +17,6 @@ class Ops(object): param_names = ['use_numexpr', 'threads'] def setup(self, use_numexpr, threads): - np.random.seed(1234) self.df = DataFrame(np.random.randn(20000, 100)) self.df2 = DataFrame(np.random.randn(20000, 100)) @@ -47,7 +48,6 @@ class Ops2(object): def setup(self): N = 10**3 - np.random.seed(1234) self.df = DataFrame(np.random.randn(N, N)) self.df2 = DataFrame(np.random.randn(N, N)) @@ -89,14 +89,12 @@ class Timeseries(object): param_names = ['tz'] def setup(self, tz): - self.N = 10**6 - self.halfway = ((self.N // 2) - 1) - self.s = Series(date_range('20010101', periods=self.N, freq='T', - tz=tz)) - self.ts = self.s[self.halfway] + N = 10**6 + halfway = (N // 2) - 1 + self.s = Series(date_range('20010101', periods=N, freq='T', tz=tz)) + self.ts = self.s[halfway] - self.s2 = Series(date_range('20010101', periods=self.N, freq='s', - tz=tz)) + self.s2 = Series(date_range('20010101', periods=N, freq='s', tz=tz)) def time_series_timestamp_compare(self, tz): self.s <= self.ts @@ -131,7 +129,6 @@ class AddOverflowArray(object): goal_time = 0.2 def setup(self): - np.random.seed(1234) N = 10**6 self.arr = np.arange(N) self.arr_rev = np.arange(-N, 0) diff --git a/asv_bench/benchmarks/categoricals.py b/asv_bench/benchmarks/categoricals.py index df41a2afad1f8..1613ca1b97f4b 100644 --- a/asv_bench/benchmarks/categoricals.py +++ b/asv_bench/benchmarks/categoricals.py @@ -9,6 +9,8 @@ except ImportError: pass +from .pandas_vb_common import setup # noqa + class Concat(object): @@ -76,7 +78,6 @@ class ValueCounts(object): def setup(self, dropna): n = 5 * 10**5 - np.random.seed(2718281) arr = ['s%04d' % i for i in np.random.randint(0, n // 10, size=n)] self.ts = pd.Series(arr).astype('category') @@ -101,7 +102,6 @@ class SetCategories(object): def setup(self): n = 5 * 10**5 - np.random.seed(2718281) arr = ['s%04d' % i for i in np.random.randint(0, n // 10, size=n)] self.ts = pd.Series(arr).astype('category') @@ -116,7 +116,6 @@ class Rank(object): def setup(self): N = 10**5 ncats = 100 - np.random.seed(1234) self.s_str = pd.Series(tm.makeCategoricalIndex(N, ncats)).astype(str) self.s_str_cat = self.s_str.astype('category') diff --git a/asv_bench/benchmarks/ctors.py b/asv_bench/benchmarks/ctors.py index 2c9c382e2db86..6276dc324ca0d 100644 --- a/asv_bench/benchmarks/ctors.py +++ b/asv_bench/benchmarks/ctors.py @@ -1,6 +1,8 @@ import numpy as np from pandas import DataFrame, Series, Index, DatetimeIndex, Timestamp +from .pandas_vb_common import setup # noqa + class Constructors(object): @@ -8,7 +10,6 @@ class Constructors(object): def setup(self): N = 10**2 - np.random.seed(1234) self.arr = np.random.randn(N, N) self.arr_str = np.array(['foo', 'bar', 'baz'], dtype=object) diff --git a/asv_bench/benchmarks/eval.py b/asv_bench/benchmarks/eval.py index fd18b3f21cf45..8e581dcf22b4c 100644 --- a/asv_bench/benchmarks/eval.py +++ b/asv_bench/benchmarks/eval.py @@ -5,6 +5,8 @@ except ImportError: import pandas.computation.expressions as expr +from .pandas_vb_common import setup # noqa + class Eval(object): @@ -14,7 +16,6 @@ class Eval(object): param_names = ['engine', 'threads'] def setup(self, engine, threads): - np.random.seed(1234) self.df = pd.DataFrame(np.random.randn(20000, 100)) self.df2 = pd.DataFrame(np.random.randn(20000, 100)) self.df3 = pd.DataFrame(np.random.randn(20000, 100)) @@ -45,17 +46,16 @@ class Query(object): goal_time = 0.2 def setup(self): - np.random.seed(1234) - self.N = 10**6 - self.halfway = (self.N // 2) - 1 - self.index = pd.date_range('20010101', periods=self.N, freq='T') - self.s = pd.Series(self.index) - self.ts = self.s.iloc[self.halfway] - self.df = pd.DataFrame({'a': np.random.randn(self.N), 'dates': self.s}, - index=self.index) - self.data = np.random.randn(self.N) - self.min_val = self.data.min() - self.max_val = self.data.max() + N = 10**6 + halfway = (N // 2) - 1 + index = pd.date_range('20010101', periods=N, freq='T') + s = pd.Series(index) + self.ts = s.iloc[halfway] + self.df = pd.DataFrame({'a': np.random.randn(N), 'dates': s}, + index=index) + data = np.random.randn(N) + self.min_val = data.min() + self.max_val = data.max() def time_query_datetime_index(self): self.df.query('index < @self.ts') diff --git a/asv_bench/benchmarks/frame_ctor.py b/asv_bench/benchmarks/frame_ctor.py index d577ebc20a31c..5f465a91d38d3 100644 --- a/asv_bench/benchmarks/frame_ctor.py +++ b/asv_bench/benchmarks/frame_ctor.py @@ -4,27 +4,23 @@ try: from pandas.tseries import offsets except: - from pandas.core.datetools import * + from pandas.core.datetools import * # noqa +from .pandas_vb_common import setup # noqa -# ---------------------------------------------------------------------- -# Creation from nested dict class FromDicts(object): goal_time = 0.2 def setup(self): - np.random.seed(1234) N, K = 5000, 50 - self.index = tm.makeStringIndex(N) - self.columns = tm.makeStringIndex(K) - self.frame = DataFrame(np.random.randn(N, K), - index=self.index, - columns=self.columns) - self.data = self.frame.to_dict() + index = tm.makeStringIndex(N) + columns = tm.makeStringIndex(K) + frame = DataFrame(np.random.randn(N, K), index=index, columns=columns) + self.data = frame.to_dict() self.some_dict = list(self.data.values())[0] - self.dict_list = self.frame.to_dict(orient='records') + self.dict_list = frame.to_dict(orient='records') self.data2 = {i: {j: float(j) for j in range(100)} for i in range(2000)} @@ -42,14 +38,13 @@ def time_frame_ctor_nested_dict_int64(self): DataFrame(self.data2) -# from a mi-series - class FromSeries(object): + goal_time = 0.2 def setup(self): - self.mi = MultiIndex.from_product([range(100), range(100)]) - self.s = Series(np.random.randn(10000), index=self.mi) + mi = MultiIndex.from_product([range(100), range(100)]) + self.s = Series(np.random.randn(10000), index=mi) def time_frame_from_mi_series(self): DataFrame(self.s) diff --git a/asv_bench/benchmarks/frame_methods.py b/asv_bench/benchmarks/frame_methods.py index 7ed341425e561..2b48168238ee8 100644 --- a/asv_bench/benchmarks/frame_methods.py +++ b/asv_bench/benchmarks/frame_methods.py @@ -3,7 +3,8 @@ import pandas.util.testing as tm from pandas import (DataFrame, Series, MultiIndex, date_range, period_range, isnull, NaT) -from .pandas_vb_common import setup + +from .pandas_vb_common import setup # noqa class GetNumericData(object):