From a55856d01a968027a7729c6ec3a62efefdbd2590 Mon Sep 17 00:00:00 2001 From: Jeff Reback Date: Sat, 4 Nov 2017 15:46:31 -0400 Subject: [PATCH 1/5] TST: add back test_generic.py, accid removed in the big reorg --- pandas/core/generic.py | 22 +- pandas/tests/frame/test_analytics.py | 18 + pandas/tests/frame/test_timeseries.py | 82 +- pandas/tests/generic/__init__.py | 0 pandas/tests/generic/test_frame.py | 268 +++++++ pandas/tests/generic/test_generic.py | 1026 +++++++++++++++++++++++++ pandas/tests/generic/test_panel.py | 95 +++ pandas/tests/generic/test_series.py | 223 ++++++ setup.py | 1 + 9 files changed, 1722 insertions(+), 13 deletions(-) create mode 100644 pandas/tests/generic/__init__.py create mode 100644 pandas/tests/generic/test_frame.py create mode 100644 pandas/tests/generic/test_generic.py create mode 100644 pandas/tests/generic/test_panel.py create mode 100644 pandas/tests/generic/test_series.py diff --git a/pandas/core/generic.py b/pandas/core/generic.py index 14bf9710fca6a..48e6f8d4d50d3 100644 --- a/pandas/core/generic.py +++ b/pandas/core/generic.py @@ -352,7 +352,7 @@ def _get_axis_number(self, axis): else: try: return self._AXIS_NUMBERS[axis] - except: + except KeyError: pass raise ValueError('No axis named {0} for object type {1}' .format(axis, type(self))) @@ -365,7 +365,7 @@ def _get_axis_name(self, axis): else: try: return self._AXIS_NAMES[axis] - except: + except KeyError: pass raise ValueError('No axis named {0} for object type {1}' .format(axis, type(self))) @@ -701,7 +701,7 @@ def squeeze(self, axis=None): return self.iloc[ tuple([0 if i in axis and len(a) == 1 else slice(None) for i, a in enumerate(self.axes)])] - except: + except Exception: return self def swaplevel(self, i=-2, j=-1, axis=0): @@ -1021,7 +1021,7 @@ def __invert__(self): try: arr = operator.inv(_values_from_object(self)) return self.__array_wrap__(arr) - except: + except Exception: # inv fails with 0 len if not np.prod(self.shape): @@ -1907,7 +1907,7 @@ def _maybe_update_cacher(self, clear=False, verify_is_copy=True): else: try: ref._maybe_cache_changed(cacher[0], self) - except: + except Exception: pass if verify_is_copy: @@ -2016,7 +2016,7 @@ def _check_setitem_copy(self, stacklevel=4, t='setting', force=False): if not gc.get_referents(self.is_copy()): self.is_copy = None return - except: + except Exception: pass # we might be a false positive @@ -2024,7 +2024,7 @@ def _check_setitem_copy(self, stacklevel=4, t='setting', force=False): if self.is_copy().shape == self.shape: self.is_copy = None return - except: + except Exception: pass # a custom message @@ -2999,7 +2999,7 @@ def reindex(self, *args, **kwargs): if self._needs_reindex_multi(axes, method, level): try: return self._reindex_multi(axes, copy, fill_value) - except: + except Exception: pass # perform the reindex on the axes @@ -3715,7 +3715,7 @@ def _check_inplace_setting(self, value): try: if np.isnan(value): return True - except: + except Exception: pass raise TypeError('Cannot do inplace boolean setting on ' @@ -5005,6 +5005,8 @@ def clip(self, lower=None, upper=None, axis=None, inplace=False, inplace = validate_bool_kwarg(inplace, 'inplace') axis = nv.validate_clip_with_axis(axis, args, kwargs) + if axis is not None: + axis = self._get_axis_number(axis) # GH 17276 # numpy doesn't like NaN as a clip value @@ -5916,7 +5918,7 @@ def _where(self, cond, other=np.nan, inplace=False, axis=None, level=None, new_other = _values_from_object(self).copy() new_other[icond] = other other = new_other - except: + except Exception: try_quick = False # let's create a new (if we failed at the above diff --git a/pandas/tests/frame/test_analytics.py b/pandas/tests/frame/test_analytics.py index 1bac4037e99c9..cfdb18cefee64 100644 --- a/pandas/tests/frame/test_analytics.py +++ b/pandas/tests/frame/test_analytics.py @@ -1822,6 +1822,24 @@ def test_built_in_round(self): {'col1': [1., 2., 3.], 'col2': [1., 2., 3.]}) tm.assert_frame_equal(round(df), expected_rounded) + def test_pct_change(self): + # GH 11150 + pnl = DataFrame([np.arange(0, 40, 10), np.arange(0, 40, 10), np.arange( + 0, 40, 10)]).astype(np.float64) + pnl.iat[1, 0] = np.nan + pnl.iat[1, 1] = np.nan + pnl.iat[2, 3] = 60 + + mask = pnl.isnull() + + for axis in range(2): + expected = pnl.ffill(axis=axis) / pnl.ffill(axis=axis).shift( + axis=axis) - 1 + expected[mask] = np.nan + result = pnl.pct_change(axis=axis, fill_method='pad') + + tm.assert_frame_equal(result, expected) + # Clip def test_clip(self): diff --git a/pandas/tests/frame/test_timeseries.py b/pandas/tests/frame/test_timeseries.py index 26a2c6f9a5045..5cd5a3793ab46 100644 --- a/pandas/tests/frame/test_timeseries.py +++ b/pandas/tests/frame/test_timeseries.py @@ -11,12 +11,15 @@ import numpy as np from pandas import (DataFrame, Series, Index, - Timestamp, DatetimeIndex, - to_datetime, date_range) + Timestamp, DatetimeIndex, MultiIndex, + to_datetime, date_range, period_range) import pandas as pd import pandas.tseries.offsets as offsets -from pandas.util.testing import assert_series_equal, assert_frame_equal +from pandas.util.testing import (assert_series_equal, + assert_frame_equal, + assert_index_equal, + assert_raises_regex) import pandas.util.testing as tm from pandas.compat import product @@ -601,3 +604,76 @@ def test_frame_to_period(self): tm.assert_index_equal(pts.columns, exp.columns.asfreq('M')) pytest.raises(ValueError, df.to_period, axis=2) + + @pytest.mark.parametrize("fn", ['tz_localize', 'tz_convert']) + def test_tz_convert_and_localize(self, fn): + l0 = date_range('20140701', periods=5, freq='D') + + # TODO: l1 should be a PeriodIndex for testing + # after GH2106 is addressed + with pytest.raises(NotImplementedError): + period_range('20140701', periods=1).tz_convert('UTC') + with pytest.raises(NotImplementedError): + period_range('20140701', periods=1).tz_localize('UTC') + # l1 = period_range('20140701', periods=5, freq='D') + l1 = date_range('20140701', periods=5, freq='D') + + int_idx = Index(range(5)) + + if fn == 'tz_convert': + l0 = l0.tz_localize('UTC') + l1 = l1.tz_localize('UTC') + + for idx in [l0, l1]: + + l0_expected = getattr(idx, fn)('US/Pacific') + l1_expected = getattr(idx, fn)('US/Pacific') + + df1 = DataFrame(np.ones(5), index=l0) + df1 = getattr(df1, fn)('US/Pacific') + assert_index_equal(df1.index, l0_expected) + + # MultiIndex + # GH7846 + df2 = DataFrame(np.ones(5), MultiIndex.from_arrays([l0, l1])) + + df3 = getattr(df2, fn)('US/Pacific', level=0) + assert not df3.index.levels[0].equals(l0) + assert_index_equal(df3.index.levels[0], l0_expected) + assert_index_equal(df3.index.levels[1], l1) + assert not df3.index.levels[1].equals(l1_expected) + + df3 = getattr(df2, fn)('US/Pacific', level=1) + assert_index_equal(df3.index.levels[0], l0) + assert not df3.index.levels[0].equals(l0_expected) + assert_index_equal(df3.index.levels[1], l1_expected) + assert not df3.index.levels[1].equals(l1) + + df4 = DataFrame(np.ones(5), + MultiIndex.from_arrays([int_idx, l0])) + + # TODO: untested + df5 = getattr(df4, fn)('US/Pacific', level=1) # noqa + + assert_index_equal(df3.index.levels[0], l0) + assert not df3.index.levels[0].equals(l0_expected) + assert_index_equal(df3.index.levels[1], l1_expected) + assert not df3.index.levels[1].equals(l1) + + # Bad Inputs + + # Not DatetimeIndex / PeriodIndex + with assert_raises_regex(TypeError, 'DatetimeIndex'): + df = DataFrame(index=int_idx) + df = getattr(df, fn)('US/Pacific') + + # Not DatetimeIndex / PeriodIndex + with assert_raises_regex(TypeError, 'DatetimeIndex'): + df = DataFrame(np.ones(5), + MultiIndex.from_arrays([int_idx, l0])) + df = getattr(df, fn)('US/Pacific', level=0) + + # Invalid level + with assert_raises_regex(ValueError, 'not valid'): + df = DataFrame(index=l0) + df = getattr(df, fn)('US/Pacific', level=1) diff --git a/pandas/tests/generic/__init__.py b/pandas/tests/generic/__init__.py new file mode 100644 index 0000000000000..e69de29bb2d1d diff --git a/pandas/tests/generic/test_frame.py b/pandas/tests/generic/test_frame.py new file mode 100644 index 0000000000000..ae73664e224cf --- /dev/null +++ b/pandas/tests/generic/test_frame.py @@ -0,0 +1,268 @@ +# -*- coding: utf-8 -*- +# pylint: disable-msg=E1101,W0612 + +from operator import methodcaller +from copy import deepcopy +from distutils.version import LooseVersion + +import pytest +import numpy as np +import pandas as pd + +from pandas import Series, DataFrame, date_range, MultiIndex + +from pandas.compat import range +from pandas.util.testing import (assert_series_equal, + assert_frame_equal, + assert_almost_equal) + +import pandas.util.testing as tm +from .test_generic import Generic + +try: + import xarray + _XARRAY_INSTALLED = True +except ImportError: + _XARRAY_INSTALLED = False + + +class TestDataFrame(Generic): + _typ = DataFrame + _comparator = lambda self, x, y: assert_frame_equal(x, y) + + def test_rename_mi(self): + df = DataFrame([ + 11, 21, 31 + ], index=MultiIndex.from_tuples([("A", x) for x in ["a", "B", "c"]])) + df.rename(str.lower) + + def test_set_axis_name(self): + df = pd.DataFrame([[1, 2], [3, 4]]) + funcs = ['_set_axis_name', 'rename_axis'] + for func in funcs: + result = methodcaller(func, 'foo')(df) + assert df.index.name is None + assert result.index.name == 'foo' + + result = methodcaller(func, 'cols', axis=1)(df) + assert df.columns.name is None + assert result.columns.name == 'cols' + + def test_set_axis_name_mi(self): + df = DataFrame( + np.empty((3, 3)), + index=MultiIndex.from_tuples([("A", x) for x in list('aBc')]), + columns=MultiIndex.from_tuples([('C', x) for x in list('xyz')]) + ) + + level_names = ['L1', 'L2'] + funcs = ['_set_axis_name', 'rename_axis'] + for func in funcs: + result = methodcaller(func, level_names)(df) + assert result.index.names == level_names + assert result.columns.names == [None, None] + + result = methodcaller(func, level_names, axis=1)(df) + assert result.columns.names == ["L1", "L2"] + assert result.index.names == [None, None] + + def test_nonzero_single_element(self): + + # allow single item via bool method + df = DataFrame([[True]]) + assert df.bool() + + df = DataFrame([[False]]) + assert not df.bool() + + df = DataFrame([[False, False]]) + pytest.raises(ValueError, lambda: df.bool()) + pytest.raises(ValueError, lambda: bool(df)) + + def test_get_numeric_data_preserve_dtype(self): + + # get the numeric data + o = DataFrame({'A': [1, '2', 3.]}) + result = o._get_numeric_data() + expected = DataFrame(index=[0, 1, 2], dtype=object) + self._compare(result, expected) + + def test_metadata_propagation_indiv(self): + + # groupby + df = DataFrame( + {'A': ['foo', 'bar', 'foo', 'bar', 'foo', 'bar', 'foo', 'foo'], + 'B': ['one', 'one', 'two', 'three', 'two', 'two', 'one', 'three'], + 'C': np.random.randn(8), + 'D': np.random.randn(8)}) + result = df.groupby('A').sum() + self.check_metadata(df, result) + + # resample + df = DataFrame(np.random.randn(1000, 2), + index=date_range('20130101', periods=1000, freq='s')) + result = df.resample('1T') + self.check_metadata(df, result) + + # merging with override + # GH 6923 + _metadata = DataFrame._metadata + _finalize = DataFrame.__finalize__ + + np.random.seed(10) + df1 = DataFrame(np.random.randint(0, 4, (3, 2)), columns=['a', 'b']) + df2 = DataFrame(np.random.randint(0, 4, (3, 2)), columns=['c', 'd']) + DataFrame._metadata = ['filename'] + df1.filename = 'fname1.csv' + df2.filename = 'fname2.csv' + + def finalize(self, other, method=None, **kwargs): + + for name in self._metadata: + if method == 'merge': + left, right = other.left, other.right + value = getattr(left, name, '') + '|' + getattr(right, + name, '') + object.__setattr__(self, name, value) + else: + object.__setattr__(self, name, getattr(other, name, '')) + + return self + + DataFrame.__finalize__ = finalize + result = df1.merge(df2, left_on=['a'], right_on=['c'], how='inner') + assert result.filename == 'fname1.csv|fname2.csv' + + # concat + # GH 6927 + DataFrame._metadata = ['filename'] + df1 = DataFrame(np.random.randint(0, 4, (3, 2)), columns=list('ab')) + df1.filename = 'foo' + + def finalize(self, other, method=None, **kwargs): + for name in self._metadata: + if method == 'concat': + value = '+'.join([getattr( + o, name) for o in other.objs if getattr(o, name, None) + ]) + object.__setattr__(self, name, value) + else: + object.__setattr__(self, name, getattr(other, name, None)) + + return self + + DataFrame.__finalize__ = finalize + + result = pd.concat([df1, df1]) + assert result.filename == 'foo+foo' + + # reset + DataFrame._metadata = _metadata + DataFrame.__finalize__ = _finalize + + def test_set_attribute(self): + # Test for consistent setattr behavior when an attribute and a column + # have the same name (Issue #8994) + df = DataFrame({'x': [1, 2, 3]}) + + df.y = 2 + df['y'] = [2, 4, 6] + df.y = 5 + + assert df.y == 5 + assert_series_equal(df['y'], Series([2, 4, 6], name='y')) + + @pytest.mark.skipif(not _XARRAY_INSTALLED or _XARRAY_INSTALLED and + LooseVersion(xarray.__version__) < '0.10.0', + reason='xarray >= 0.10.0 required') + @pytest.mark.parametrize( + "index", ['FloatIndex', 'IntIndex', + 'StringIndex', 'UnicodeIndex', + 'DateIndex', 'PeriodIndex', + 'CategoricalIndex', 'TimedeltaIndex']) + def test_to_xarray_index_types(self, index): + from xarray import Dataset + + index = getattr(tm, 'make{}'.format(index)) + df = DataFrame({'a': list('abc'), + 'b': list(range(1, 4)), + 'c': np.arange(3, 6).astype('u1'), + 'd': np.arange(4.0, 7.0, dtype='float64'), + 'e': [True, False, True], + 'f': pd.Categorical(list('abc')), + 'g': pd.date_range('20130101', periods=3), + 'h': pd.date_range('20130101', + periods=3, + tz='US/Eastern')} + ) + + df.index = index(3) + df.index.name = 'foo' + df.columns.name = 'bar' + result = df.to_xarray() + assert result.dims['foo'] == 3 + assert len(result.coords) == 1 + assert len(result.data_vars) == 8 + assert_almost_equal(list(result.coords.keys()), ['foo']) + assert isinstance(result, Dataset) + + # idempotency + # categoricals are not preserved + # datetimes w/tz are not preserved + # column names are lost + expected = df.copy() + expected['f'] = expected['f'].astype(object) + expected['h'] = expected['h'].astype('datetime64[ns]') + expected.columns.name = None + assert_frame_equal(result.to_dataframe(), expected, + check_index_type=False, check_categorical=False) + + def test_to_xarray(self): + tm._skip_if_no_xarray() + from xarray import Dataset + + df = DataFrame({'a': list('abc'), + 'b': list(range(1, 4)), + 'c': np.arange(3, 6).astype('u1'), + 'd': np.arange(4.0, 7.0, dtype='float64'), + 'e': [True, False, True], + 'f': pd.Categorical(list('abc')), + 'g': pd.date_range('20130101', periods=3), + 'h': pd.date_range('20130101', + periods=3, + tz='US/Eastern')} + ) + + df.index.name = 'foo' + result = df[0:0].to_xarray() + assert result.dims['foo'] == 0 + assert isinstance(result, Dataset) + + # available in 0.7.1 + # MultiIndex + df.index = pd.MultiIndex.from_product([['a'], range(3)], + names=['one', 'two']) + result = df.to_xarray() + assert result.dims['one'] == 1 + assert result.dims['two'] == 3 + assert len(result.coords) == 2 + assert len(result.data_vars) == 8 + assert_almost_equal(list(result.coords.keys()), ['one', 'two']) + assert isinstance(result, Dataset) + + result = result.to_dataframe() + expected = df.copy() + expected['f'] = expected['f'].astype(object) + expected['h'] = expected['h'].astype('datetime64[ns]') + expected.columns.name = None + assert_frame_equal(result, + expected, + check_index_type=False) + + def test_deepcopy_empty(self): + # This test covers empty frame copying with non-empty column sets + # as reported in issue GH15370 + empty_frame = DataFrame(data=[], index=[], columns=['A']) + empty_frame_copy = deepcopy(empty_frame) + + self._compare(empty_frame_copy, empty_frame) diff --git a/pandas/tests/generic/test_generic.py b/pandas/tests/generic/test_generic.py new file mode 100644 index 0000000000000..a37c1649e5677 --- /dev/null +++ b/pandas/tests/generic/test_generic.py @@ -0,0 +1,1026 @@ +# -*- coding: utf-8 -*- +# pylint: disable-msg=E1101,W0612 + +from copy import copy, deepcopy +from warnings import catch_warnings + +import pytest +import numpy as np +import pandas as pd + +from pandas.core.dtypes.common import is_scalar +from pandas import (Series, DataFrame, Panel, + date_range, Panel4D, + MultiIndex) + +import pandas.io.formats.printing as printing + +from pandas.compat import range, zip, PY3 +from pandas.util.testing import (assert_raises_regex, + assert_series_equal, + assert_panel_equal, + assert_frame_equal) + +import pandas.util.testing as tm + + +# ---------------------------------------------------------------------- +# Generic types test cases + +class Generic(object): + + @property + def _ndim(self): + return self._typ._AXIS_LEN + + def _axes(self): + """ return the axes for my object typ """ + return self._typ._AXIS_ORDERS + + def _construct(self, shape, value=None, dtype=None, **kwargs): + """ construct an object for the given shape + if value is specified use that if its a scalar + if value is an array, repeat it as needed """ + + if isinstance(shape, int): + shape = tuple([shape] * self._ndim) + if value is not None: + if is_scalar(value): + if value == 'empty': + arr = None + + # remove the info axis + kwargs.pop(self._typ._info_axis_name, None) + else: + arr = np.empty(shape, dtype=dtype) + arr.fill(value) + else: + fshape = np.prod(shape) + arr = value.ravel() + new_shape = fshape / arr.shape[0] + if fshape % arr.shape[0] != 0: + raise Exception("invalid value passed in _construct") + + arr = np.repeat(arr, new_shape).reshape(shape) + else: + arr = np.random.randn(*shape) + return self._typ(arr, dtype=dtype, **kwargs) + + def _compare(self, result, expected): + self._comparator(result, expected) + + def test_rename(self): + + # single axis + idx = list('ABCD') + # relabeling values passed into self.rename + args = [ + str.lower, + {x: x.lower() for x in idx}, + Series({x: x.lower() for x in idx}), + ] + + for axis in self._axes(): + kwargs = {axis: idx} + obj = self._construct(4, **kwargs) + + for arg in args: + # rename a single axis + result = obj.rename(**{axis: arg}) + expected = obj.copy() + setattr(expected, axis, list('abcd')) + self._compare(result, expected) + + # multiple axes at once + + def test_get_numeric_data(self): + + n = 4 + kwargs = {} + for i in range(self._ndim): + kwargs[self._typ._AXIS_NAMES[i]] = list(range(n)) + + # get the numeric data + o = self._construct(n, **kwargs) + result = o._get_numeric_data() + self._compare(result, o) + + # non-inclusion + result = o._get_bool_data() + expected = self._construct(n, value='empty', **kwargs) + self._compare(result, expected) + + # get the bool data + arr = np.array([True, True, False, True]) + o = self._construct(n, value=arr, **kwargs) + result = o._get_numeric_data() + self._compare(result, o) + + # _get_numeric_data is includes _get_bool_data, so can't test for + # non-inclusion + + def test_get_default(self): + + # GH 7725 + d0 = "a", "b", "c", "d" + d1 = np.arange(4, dtype='int64') + others = "e", 10 + + for data, index in ((d0, d1), (d1, d0)): + s = Series(data, index=index) + for i, d in zip(index, data): + assert s.get(i) == d + assert s.get(i, d) == d + assert s.get(i, "z") == d + for other in others: + assert s.get(other, "z") == "z" + assert s.get(other, other) == other + + def test_nonzero(self): + + # GH 4633 + # look at the boolean/nonzero behavior for objects + obj = self._construct(shape=4) + pytest.raises(ValueError, lambda: bool(obj == 0)) + pytest.raises(ValueError, lambda: bool(obj == 1)) + pytest.raises(ValueError, lambda: bool(obj)) + + obj = self._construct(shape=4, value=1) + pytest.raises(ValueError, lambda: bool(obj == 0)) + pytest.raises(ValueError, lambda: bool(obj == 1)) + pytest.raises(ValueError, lambda: bool(obj)) + + obj = self._construct(shape=4, value=np.nan) + pytest.raises(ValueError, lambda: bool(obj == 0)) + pytest.raises(ValueError, lambda: bool(obj == 1)) + pytest.raises(ValueError, lambda: bool(obj)) + + # empty + obj = self._construct(shape=0) + pytest.raises(ValueError, lambda: bool(obj)) + + # invalid behaviors + + obj1 = self._construct(shape=4, value=1) + obj2 = self._construct(shape=4, value=1) + + def f(): + if obj1: + printing.pprint_thing("this works and shouldn't") + + pytest.raises(ValueError, f) + pytest.raises(ValueError, lambda: obj1 and obj2) + pytest.raises(ValueError, lambda: obj1 or obj2) + pytest.raises(ValueError, lambda: not obj1) + + def test_downcast(self): + # test close downcasting + + o = self._construct(shape=4, value=9, dtype=np.int64) + result = o.copy() + result._data = o._data.downcast(dtypes='infer') + self._compare(result, o) + + o = self._construct(shape=4, value=9.) + expected = o.astype(np.int64) + result = o.copy() + result._data = o._data.downcast(dtypes='infer') + self._compare(result, expected) + + o = self._construct(shape=4, value=9.5) + result = o.copy() + result._data = o._data.downcast(dtypes='infer') + self._compare(result, o) + + # are close + o = self._construct(shape=4, value=9.000000000005) + result = o.copy() + result._data = o._data.downcast(dtypes='infer') + expected = o.astype(np.int64) + self._compare(result, expected) + + def test_constructor_compound_dtypes(self): + # GH 5191 + # compound dtypes should raise not-implementederror + + def f(dtype): + return self._construct(shape=3, dtype=dtype) + + pytest.raises(NotImplementedError, f, [("A", "datetime64[h]"), + ("B", "str"), + ("C", "int32")]) + + # these work (though results may be unexpected) + f('int64') + f('float64') + f('M8[ns]') + + def check_metadata(self, x, y=None): + for m in x._metadata: + v = getattr(x, m, None) + if y is None: + assert v is None + else: + assert v == getattr(y, m, None) + + def test_metadata_propagation(self): + # check that the metadata matches up on the resulting ops + + o = self._construct(shape=3) + o.name = 'foo' + o2 = self._construct(shape=3) + o2.name = 'bar' + + # TODO + # Once panel can do non-trivial combine operations + # (currently there is an a raise in the Panel arith_ops to prevent + # this, though it actually does work) + # can remove all of these try: except: blocks on the actual operations + + # ---------- + # preserving + # ---------- + + # simple ops with scalars + for op in ['__add__', '__sub__', '__truediv__', '__mul__']: + result = getattr(o, op)(1) + self.check_metadata(o, result) + + # ops with like + for op in ['__add__', '__sub__', '__truediv__', '__mul__']: + try: + result = getattr(o, op)(o) + self.check_metadata(o, result) + except (ValueError, AttributeError): + pass + + # simple boolean + for op in ['__eq__', '__le__', '__ge__']: + v1 = getattr(o, op)(o) + self.check_metadata(o, v1) + + try: + self.check_metadata(o, v1 & v1) + except (ValueError): + pass + + try: + self.check_metadata(o, v1 | v1) + except (ValueError): + pass + + # combine_first + try: + result = o.combine_first(o2) + self.check_metadata(o, result) + except (AttributeError): + pass + + # --------------------------- + # non-preserving (by default) + # --------------------------- + + # add non-like + try: + result = o + o2 + self.check_metadata(result) + except (ValueError, AttributeError): + pass + + # simple boolean + for op in ['__eq__', '__le__', '__ge__']: + + # this is a name matching op + v1 = getattr(o, op)(o) + + v2 = getattr(o, op)(o2) + self.check_metadata(v2) + + try: + self.check_metadata(v1 & v2) + except (ValueError): + pass + + try: + self.check_metadata(v1 | v2) + except (ValueError): + pass + + def test_head_tail(self): + # GH5370 + + o = self._construct(shape=10) + + # check all index types + for index in [tm.makeFloatIndex, tm.makeIntIndex, tm.makeStringIndex, + tm.makeUnicodeIndex, tm.makeDateIndex, + tm.makePeriodIndex]: + axis = o._get_axis_name(0) + setattr(o, axis, index(len(getattr(o, axis)))) + + # Panel + dims + try: + o.head() + except (NotImplementedError): + pytest.skip('not implemented on {0}'.format( + o.__class__.__name__)) + + self._compare(o.head(), o.iloc[:5]) + self._compare(o.tail(), o.iloc[-5:]) + + # 0-len + self._compare(o.head(0), o.iloc[0:0]) + self._compare(o.tail(0), o.iloc[0:0]) + + # bounded + self._compare(o.head(len(o) + 1), o) + self._compare(o.tail(len(o) + 1), o) + + # neg index + self._compare(o.head(-3), o.head(7)) + self._compare(o.tail(-3), o.tail(7)) + + def test_sample(self): + # Fixes issue: 2419 + + o = self._construct(shape=10) + + ### + # Check behavior of random_state argument + ### + + # Check for stability when receives seed or random state -- run 10 + # times. + for test in range(10): + seed = np.random.randint(0, 100) + self._compare( + o.sample(n=4, random_state=seed), o.sample(n=4, + random_state=seed)) + self._compare( + o.sample(frac=0.7, random_state=seed), o.sample( + frac=0.7, random_state=seed)) + + self._compare( + o.sample(n=4, random_state=np.random.RandomState(test)), + o.sample(n=4, random_state=np.random.RandomState(test))) + + self._compare( + o.sample(frac=0.7, random_state=np.random.RandomState(test)), + o.sample(frac=0.7, random_state=np.random.RandomState(test))) + + os1, os2 = [], [] + for _ in range(2): + np.random.seed(test) + os1.append(o.sample(n=4)) + os2.append(o.sample(frac=0.7)) + self._compare(*os1) + self._compare(*os2) + + # Check for error when random_state argument invalid. + with pytest.raises(ValueError): + o.sample(random_state='astring!') + + ### + # Check behavior of `frac` and `N` + ### + + # Giving both frac and N throws error + with pytest.raises(ValueError): + o.sample(n=3, frac=0.3) + + # Check that raises right error for negative lengths + with pytest.raises(ValueError): + o.sample(n=-3) + with pytest.raises(ValueError): + o.sample(frac=-0.3) + + # Make sure float values of `n` give error + with pytest.raises(ValueError): + o.sample(n=3.2) + + # Check lengths are right + assert len(o.sample(n=4) == 4) + assert len(o.sample(frac=0.34) == 3) + assert len(o.sample(frac=0.36) == 4) + + ### + # Check weights + ### + + # Weight length must be right + with pytest.raises(ValueError): + o.sample(n=3, weights=[0, 1]) + + with pytest.raises(ValueError): + bad_weights = [0.5] * 11 + o.sample(n=3, weights=bad_weights) + + with pytest.raises(ValueError): + bad_weight_series = Series([0, 0, 0.2]) + o.sample(n=4, weights=bad_weight_series) + + # Check won't accept negative weights + with pytest.raises(ValueError): + bad_weights = [-0.1] * 10 + o.sample(n=3, weights=bad_weights) + + # Check inf and -inf throw errors: + with pytest.raises(ValueError): + weights_with_inf = [0.1] * 10 + weights_with_inf[0] = np.inf + o.sample(n=3, weights=weights_with_inf) + + with pytest.raises(ValueError): + weights_with_ninf = [0.1] * 10 + weights_with_ninf[0] = -np.inf + o.sample(n=3, weights=weights_with_ninf) + + # All zeros raises errors + zero_weights = [0] * 10 + with pytest.raises(ValueError): + o.sample(n=3, weights=zero_weights) + + # All missing weights + nan_weights = [np.nan] * 10 + with pytest.raises(ValueError): + o.sample(n=3, weights=nan_weights) + + # Check np.nan are replaced by zeros. + weights_with_nan = [np.nan] * 10 + weights_with_nan[5] = 0.5 + self._compare( + o.sample(n=1, axis=0, weights=weights_with_nan), o.iloc[5:6]) + + # Check None are also replaced by zeros. + weights_with_None = [None] * 10 + weights_with_None[5] = 0.5 + self._compare( + o.sample(n=1, axis=0, weights=weights_with_None), o.iloc[5:6]) + + def test_size_compat(self): + # GH8846 + # size property should be defined + + o = self._construct(shape=10) + assert o.size == np.prod(o.shape) + assert o.size == 10 ** len(o.axes) + + def test_split_compat(self): + # xref GH8846 + o = self._construct(shape=10) + assert len(np.array_split(o, 5)) == 5 + assert len(np.array_split(o, 2)) == 2 + + def test_unexpected_keyword(self): # GH8597 + df = DataFrame(np.random.randn(5, 2), columns=['jim', 'joe']) + ca = pd.Categorical([0, 0, 2, 2, 3, np.nan]) + ts = df['joe'].copy() + ts[2] = np.nan + + with assert_raises_regex(TypeError, 'unexpected keyword'): + df.drop('joe', axis=1, in_place=True) + + with assert_raises_regex(TypeError, 'unexpected keyword'): + df.reindex([1, 0], inplace=True) + + with assert_raises_regex(TypeError, 'unexpected keyword'): + ca.fillna(0, inplace=True) + + with assert_raises_regex(TypeError, 'unexpected keyword'): + ts.fillna(0, in_place=True) + + # See gh-12301 + def test_stat_unexpected_keyword(self): + obj = self._construct(5) + starwars = 'Star Wars' + errmsg = 'unexpected keyword' + + with assert_raises_regex(TypeError, errmsg): + obj.max(epic=starwars) # stat_function + with assert_raises_regex(TypeError, errmsg): + obj.var(epic=starwars) # stat_function_ddof + with assert_raises_regex(TypeError, errmsg): + obj.sum(epic=starwars) # cum_function + with assert_raises_regex(TypeError, errmsg): + obj.any(epic=starwars) # logical_function + + def test_api_compat(self): + + # GH 12021 + # compat for __name__, __qualname__ + + obj = self._construct(5) + for func in ['sum', 'cumsum', 'any', 'var']: + f = getattr(obj, func) + assert f.__name__ == func + if PY3: + assert f.__qualname__.endswith(func) + + def test_stat_non_defaults_args(self): + obj = self._construct(5) + out = np.array([0]) + errmsg = "the 'out' parameter is not supported" + + with assert_raises_regex(ValueError, errmsg): + obj.max(out=out) # stat_function + with assert_raises_regex(ValueError, errmsg): + obj.var(out=out) # stat_function_ddof + with assert_raises_regex(ValueError, errmsg): + obj.sum(out=out) # cum_function + with assert_raises_regex(ValueError, errmsg): + obj.any(out=out) # logical_function + + def test_truncate_out_of_bounds(self): + # GH11382 + + # small + shape = [int(2e3)] + ([1] * (self._ndim - 1)) + small = self._construct(shape, dtype='int8') + self._compare(small.truncate(), small) + self._compare(small.truncate(before=0, after=3e3), small) + self._compare(small.truncate(before=-1, after=2e3), small) + + # big + shape = [int(2e6)] + ([1] * (self._ndim - 1)) + big = self._construct(shape, dtype='int8') + self._compare(big.truncate(), big) + self._compare(big.truncate(before=0, after=3e6), big) + self._compare(big.truncate(before=-1, after=2e6), big) + + def test_validate_bool_args(self): + df = DataFrame({'a': [1, 2, 3], 'b': [4, 5, 6]}) + invalid_values = [1, "True", [1, 2, 3], 5.0] + + for value in invalid_values: + with pytest.raises(ValueError): + super(DataFrame, df).rename_axis(mapper={'a': 'x', 'b': 'y'}, + axis=1, inplace=value) + + with pytest.raises(ValueError): + super(DataFrame, df).drop('a', axis=1, inplace=value) + + with pytest.raises(ValueError): + super(DataFrame, df).sort_index(inplace=value) + + with pytest.raises(ValueError): + super(DataFrame, df)._consolidate(inplace=value) + + with pytest.raises(ValueError): + super(DataFrame, df).fillna(value=0, inplace=value) + + with pytest.raises(ValueError): + super(DataFrame, df).replace(to_replace=1, value=7, + inplace=value) + + with pytest.raises(ValueError): + super(DataFrame, df).interpolate(inplace=value) + + with pytest.raises(ValueError): + super(DataFrame, df)._where(cond=df.a > 2, inplace=value) + + with pytest.raises(ValueError): + super(DataFrame, df).mask(cond=df.a > 2, inplace=value) + + def test_copy_and_deepcopy(self): + # GH 15444 + for shape in [0, 1, 2]: + obj = self._construct(shape) + for func in [copy, + deepcopy, + lambda x: x.copy(deep=False), + lambda x: x.copy(deep=True)]: + obj_copy = func(obj) + assert obj_copy is not obj + self._compare(obj_copy, obj) + + +class TestNDFrame(object): + # tests that don't fit elsewhere + + def test_sample(sel): + # Fixes issue: 2419 + # additional specific object based tests + + # A few dataframe test with degenerate weights. + easy_weight_list = [0] * 10 + easy_weight_list[5] = 1 + + df = pd.DataFrame({'col1': range(10, 20), + 'col2': range(20, 30), + 'colString': ['a'] * 10, + 'easyweights': easy_weight_list}) + sample1 = df.sample(n=1, weights='easyweights') + assert_frame_equal(sample1, df.iloc[5:6]) + + # Ensure proper error if string given as weight for Series, panel, or + # DataFrame with axis = 1. + s = Series(range(10)) + with pytest.raises(ValueError): + s.sample(n=3, weights='weight_column') + + with catch_warnings(record=True): + panel = Panel(items=[0, 1, 2], major_axis=[2, 3, 4], + minor_axis=[3, 4, 5]) + with pytest.raises(ValueError): + panel.sample(n=1, weights='weight_column') + + with pytest.raises(ValueError): + df.sample(n=1, weights='weight_column', axis=1) + + # Check weighting key error + with pytest.raises(KeyError): + df.sample(n=3, weights='not_a_real_column_name') + + # Check that re-normalizes weights that don't sum to one. + weights_less_than_1 = [0] * 10 + weights_less_than_1[0] = 0.5 + tm.assert_frame_equal( + df.sample(n=1, weights=weights_less_than_1), df.iloc[:1]) + + ### + # Test axis argument + ### + + # Test axis argument + df = pd.DataFrame({'col1': range(10), 'col2': ['a'] * 10}) + second_column_weight = [0, 1] + assert_frame_equal( + df.sample(n=1, axis=1, weights=second_column_weight), df[['col2']]) + + # Different axis arg types + assert_frame_equal(df.sample(n=1, axis='columns', + weights=second_column_weight), + df[['col2']]) + + weight = [0] * 10 + weight[5] = 0.5 + assert_frame_equal(df.sample(n=1, axis='rows', weights=weight), + df.iloc[5:6]) + assert_frame_equal(df.sample(n=1, axis='index', weights=weight), + df.iloc[5:6]) + + # Check out of range axis values + with pytest.raises(ValueError): + df.sample(n=1, axis=2) + + with pytest.raises(ValueError): + df.sample(n=1, axis='not_a_name') + + with pytest.raises(ValueError): + s = pd.Series(range(10)) + s.sample(n=1, axis=1) + + # Test weight length compared to correct axis + with pytest.raises(ValueError): + df.sample(n=1, axis=1, weights=[0.5] * 10) + + # Check weights with axis = 1 + easy_weight_list = [0] * 3 + easy_weight_list[2] = 1 + + df = pd.DataFrame({'col1': range(10, 20), + 'col2': range(20, 30), + 'colString': ['a'] * 10}) + sample1 = df.sample(n=1, axis=1, weights=easy_weight_list) + assert_frame_equal(sample1, df[['colString']]) + + # Test default axes + with catch_warnings(record=True): + p = Panel(items=['a', 'b', 'c'], major_axis=[2, 4, 6], + minor_axis=[1, 3, 5]) + assert_panel_equal( + p.sample(n=3, random_state=42), p.sample(n=3, axis=1, + random_state=42)) + assert_frame_equal( + df.sample(n=3, random_state=42), df.sample(n=3, axis=0, + random_state=42)) + + # Test that function aligns weights with frame + df = DataFrame( + {'col1': [5, 6, 7], + 'col2': ['a', 'b', 'c'], }, index=[9, 5, 3]) + s = Series([1, 0, 0], index=[3, 5, 9]) + assert_frame_equal(df.loc[[3]], df.sample(1, weights=s)) + + # Weights have index values to be dropped because not in + # sampled DataFrame + s2 = Series([0.001, 0, 10000], index=[3, 5, 10]) + assert_frame_equal(df.loc[[3]], df.sample(1, weights=s2)) + + # Weights have empty values to be filed with zeros + s3 = Series([0.01, 0], index=[3, 5]) + assert_frame_equal(df.loc[[3]], df.sample(1, weights=s3)) + + # No overlap in weight and sampled DataFrame indices + s4 = Series([1, 0], index=[1, 2]) + with pytest.raises(ValueError): + df.sample(1, weights=s4) + + def test_squeeze(self): + # noop + for s in [tm.makeFloatSeries(), tm.makeStringSeries(), + tm.makeObjectSeries()]: + tm.assert_series_equal(s.squeeze(), s) + for df in [tm.makeTimeDataFrame()]: + tm.assert_frame_equal(df.squeeze(), df) + with catch_warnings(record=True): + for p in [tm.makePanel()]: + tm.assert_panel_equal(p.squeeze(), p) + with catch_warnings(record=True): + for p4d in [tm.makePanel4D()]: + tm.assert_panel4d_equal(p4d.squeeze(), p4d) + + # squeezing + df = tm.makeTimeDataFrame().reindex(columns=['A']) + tm.assert_series_equal(df.squeeze(), df['A']) + + with catch_warnings(record=True): + p = tm.makePanel().reindex(items=['ItemA']) + tm.assert_frame_equal(p.squeeze(), p['ItemA']) + + p = tm.makePanel().reindex(items=['ItemA'], minor_axis=['A']) + tm.assert_series_equal(p.squeeze(), p.loc['ItemA', :, 'A']) + + with catch_warnings(record=True): + p4d = tm.makePanel4D().reindex(labels=['label1']) + tm.assert_panel_equal(p4d.squeeze(), p4d['label1']) + + with catch_warnings(record=True): + p4d = tm.makePanel4D().reindex(labels=['label1'], items=['ItemA']) + tm.assert_frame_equal(p4d.squeeze(), p4d.loc['label1', 'ItemA']) + + # don't fail with 0 length dimensions GH11229 & GH8999 + empty_series = Series([], name='five') + empty_frame = DataFrame([empty_series]) + with catch_warnings(record=True): + empty_panel = Panel({'six': empty_frame}) + + [tm.assert_series_equal(empty_series, higher_dim.squeeze()) + for higher_dim in [empty_series, empty_frame, empty_panel]] + + # axis argument + df = tm.makeTimeDataFrame(nper=1).iloc[:, :1] + assert df.shape == (1, 1) + tm.assert_series_equal(df.squeeze(axis=0), df.iloc[0]) + tm.assert_series_equal(df.squeeze(axis='index'), df.iloc[0]) + tm.assert_series_equal(df.squeeze(axis=1), df.iloc[:, 0]) + tm.assert_series_equal(df.squeeze(axis='columns'), df.iloc[:, 0]) + assert df.squeeze() == df.iloc[0, 0] + pytest.raises(ValueError, df.squeeze, axis=2) + pytest.raises(ValueError, df.squeeze, axis='x') + + df = tm.makeTimeDataFrame(3) + tm.assert_frame_equal(df.squeeze(axis=0), df) + + def test_numpy_squeeze(self): + s = tm.makeFloatSeries() + tm.assert_series_equal(np.squeeze(s), s) + + df = tm.makeTimeDataFrame().reindex(columns=['A']) + tm.assert_series_equal(np.squeeze(df), df['A']) + + def test_transpose(self): + msg = (r"transpose\(\) got multiple values for " + r"keyword argument 'axes'") + for s in [tm.makeFloatSeries(), tm.makeStringSeries(), + tm.makeObjectSeries()]: + # calls implementation in pandas/core/base.py + tm.assert_series_equal(s.transpose(), s) + for df in [tm.makeTimeDataFrame()]: + tm.assert_frame_equal(df.transpose().transpose(), df) + + with catch_warnings(record=True): + for p in [tm.makePanel()]: + tm.assert_panel_equal(p.transpose(2, 0, 1) + .transpose(1, 2, 0), p) + tm.assert_raises_regex(TypeError, msg, p.transpose, + 2, 0, 1, axes=(2, 0, 1)) + + with catch_warnings(record=True): + for p4d in [tm.makePanel4D()]: + tm.assert_panel4d_equal(p4d.transpose(2, 0, 3, 1) + .transpose(1, 3, 0, 2), p4d) + tm.assert_raises_regex(TypeError, msg, p4d.transpose, + 2, 0, 3, 1, axes=(2, 0, 3, 1)) + + def test_numpy_transpose(self): + msg = "the 'axes' parameter is not supported" + + s = tm.makeFloatSeries() + tm.assert_series_equal( + np.transpose(s), s) + tm.assert_raises_regex(ValueError, msg, + np.transpose, s, axes=1) + + df = tm.makeTimeDataFrame() + tm.assert_frame_equal(np.transpose( + np.transpose(df)), df) + tm.assert_raises_regex(ValueError, msg, + np.transpose, df, axes=1) + + with catch_warnings(record=True): + p = tm.makePanel() + tm.assert_panel_equal(np.transpose( + np.transpose(p, axes=(2, 0, 1)), + axes=(1, 2, 0)), p) + + with catch_warnings(record=True): + p4d = tm.makePanel4D() + tm.assert_panel4d_equal(np.transpose( + np.transpose(p4d, axes=(2, 0, 3, 1)), + axes=(1, 3, 0, 2)), p4d) + + def test_take(self): + indices = [1, 5, -2, 6, 3, -1] + for s in [tm.makeFloatSeries(), tm.makeStringSeries(), + tm.makeObjectSeries()]: + out = s.take(indices) + expected = Series(data=s.values.take(indices), + index=s.index.take(indices), dtype=s.dtype) + tm.assert_series_equal(out, expected) + for df in [tm.makeTimeDataFrame()]: + out = df.take(indices) + expected = DataFrame(data=df.values.take(indices, axis=0), + index=df.index.take(indices), + columns=df.columns) + tm.assert_frame_equal(out, expected) + + indices = [-3, 2, 0, 1] + with catch_warnings(record=True): + for p in [tm.makePanel()]: + out = p.take(indices) + expected = Panel(data=p.values.take(indices, axis=0), + items=p.items.take(indices), + major_axis=p.major_axis, + minor_axis=p.minor_axis) + tm.assert_panel_equal(out, expected) + + with catch_warnings(record=True): + for p4d in [tm.makePanel4D()]: + out = p4d.take(indices) + expected = Panel4D(data=p4d.values.take(indices, axis=0), + labels=p4d.labels.take(indices), + major_axis=p4d.major_axis, + minor_axis=p4d.minor_axis, + items=p4d.items) + tm.assert_panel4d_equal(out, expected) + + def test_take_invalid_kwargs(self): + indices = [-3, 2, 0, 1] + s = tm.makeFloatSeries() + df = tm.makeTimeDataFrame() + + with catch_warnings(record=True): + p = tm.makePanel() + p4d = tm.makePanel4D() + + for obj in (s, df, p, p4d): + msg = r"take\(\) got an unexpected keyword argument 'foo'" + tm.assert_raises_regex(TypeError, msg, obj.take, + indices, foo=2) + + msg = "the 'out' parameter is not supported" + tm.assert_raises_regex(ValueError, msg, obj.take, + indices, out=indices) + + msg = "the 'mode' parameter is not supported" + tm.assert_raises_regex(ValueError, msg, obj.take, + indices, mode='clip') + + def test_equals(self): + s1 = pd.Series([1, 2, 3], index=[0, 2, 1]) + s2 = s1.copy() + assert s1.equals(s2) + + s1[1] = 99 + assert not s1.equals(s2) + + # NaNs compare as equal + s1 = pd.Series([1, np.nan, 3, np.nan], index=[0, 2, 1, 3]) + s2 = s1.copy() + assert s1.equals(s2) + + s2[0] = 9.9 + assert not s1.equals(s2) + + idx = MultiIndex.from_tuples([(0, 'a'), (1, 'b'), (2, 'c')]) + s1 = Series([1, 2, np.nan], index=idx) + s2 = s1.copy() + assert s1.equals(s2) + + # Add object dtype column with nans + index = np.random.random(10) + df1 = DataFrame( + np.random.random(10, ), index=index, columns=['floats']) + df1['text'] = 'the sky is so blue. we could use more chocolate.'.split( + ) + df1['start'] = date_range('2000-1-1', periods=10, freq='T') + df1['end'] = date_range('2000-1-1', periods=10, freq='D') + df1['diff'] = df1['end'] - df1['start'] + df1['bool'] = (np.arange(10) % 3 == 0) + df1.loc[::2] = np.nan + df2 = df1.copy() + assert df1['text'].equals(df2['text']) + assert df1['start'].equals(df2['start']) + assert df1['end'].equals(df2['end']) + assert df1['diff'].equals(df2['diff']) + assert df1['bool'].equals(df2['bool']) + assert df1.equals(df2) + assert not df1.equals(object) + + # different dtype + different = df1.copy() + different['floats'] = different['floats'].astype('float32') + assert not df1.equals(different) + + # different index + different_index = -index + different = df2.set_index(different_index) + assert not df1.equals(different) + + # different columns + different = df2.copy() + different.columns = df2.columns[::-1] + assert not df1.equals(different) + + # DatetimeIndex + index = pd.date_range('2000-1-1', periods=10, freq='T') + df1 = df1.set_index(index) + df2 = df1.copy() + assert df1.equals(df2) + + # MultiIndex + df3 = df1.set_index(['text'], append=True) + df2 = df1.set_index(['text'], append=True) + assert df3.equals(df2) + + df2 = df1.set_index(['floats'], append=True) + assert not df3.equals(df2) + + # NaN in index + df3 = df1.set_index(['floats'], append=True) + df2 = df1.set_index(['floats'], append=True) + assert df3.equals(df2) + + # GH 8437 + a = pd.Series([False, np.nan]) + b = pd.Series([False, np.nan]) + c = pd.Series(index=range(2)) + d = pd.Series(index=range(2)) + e = pd.Series(index=range(2)) + f = pd.Series(index=range(2)) + c[:-1] = d[:-1] = e[0] = f[0] = False + assert a.equals(a) + assert a.equals(b) + assert a.equals(c) + assert a.equals(d) + assert a.equals(e) + assert e.equals(f) + + def test_describe_raises(self): + with catch_warnings(record=True): + with pytest.raises(NotImplementedError): + tm.makePanel().describe() + + def test_pipe(self): + df = DataFrame({'A': [1, 2, 3]}) + f = lambda x, y: x ** y + result = df.pipe(f, 2) + expected = DataFrame({'A': [1, 4, 9]}) + assert_frame_equal(result, expected) + + result = df.A.pipe(f, 2) + assert_series_equal(result, expected.A) + + def test_pipe_tuple(self): + df = DataFrame({'A': [1, 2, 3]}) + f = lambda x, y: y + result = df.pipe((f, 'y'), 0) + assert_frame_equal(result, df) + + result = df.A.pipe((f, 'y'), 0) + assert_series_equal(result, df.A) + + def test_pipe_tuple_error(self): + df = DataFrame({"A": [1, 2, 3]}) + f = lambda x, y: y + with pytest.raises(ValueError): + df.pipe((f, 'y'), x=1, y=0) + + with pytest.raises(ValueError): + df.A.pipe((f, 'y'), x=1, y=0) + + def test_pipe_panel(self): + with catch_warnings(record=True): + wp = Panel({'r1': DataFrame({"A": [1, 2, 3]})}) + f = lambda x, y: x + y + result = wp.pipe(f, 2) + expected = wp + 2 + assert_panel_equal(result, expected) + + result = wp.pipe((f, 'y'), x=1) + expected = wp + 1 + assert_panel_equal(result, expected) + + with pytest.raises(ValueError): + result = wp.pipe((f, 'y'), x=1, y=1) diff --git a/pandas/tests/generic/test_panel.py b/pandas/tests/generic/test_panel.py new file mode 100644 index 0000000000000..9beb160b74a30 --- /dev/null +++ b/pandas/tests/generic/test_panel.py @@ -0,0 +1,95 @@ +# -*- coding: utf-8 -*- +# pylint: disable-msg=E1101,W0612 + +from warnings import catch_warnings + +import pytest + +from pandas import Panel, Panel4D +from pandas.util.testing import (assert_panel_equal, + assert_panel4d_equal, + assert_almost_equal) + +import pandas.util.testing as tm +from .test_generic import Generic + + +class TestPanel(Generic): + _typ = Panel + _comparator = lambda self, x, y: assert_panel_equal(x, y, by_blocks=True) + + def test_to_xarray(self): + + tm._skip_if_no_xarray() + from xarray import DataArray + + with catch_warnings(record=True): + p = tm.makePanel() + + result = p.to_xarray() + assert isinstance(result, DataArray) + assert len(result.coords) == 3 + assert_almost_equal(list(result.coords.keys()), + ['items', 'major_axis', 'minor_axis']) + assert len(result.dims) == 3 + + # idempotency + assert_panel_equal(result.to_pandas(), p) + + +class TestPanel4D(Generic): + _typ = Panel4D + _comparator = lambda self, x, y: assert_panel4d_equal(x, y, by_blocks=True) + + def test_sample(self): + pytest.skip("sample on Panel4D") + + def test_to_xarray(self): + + tm._skip_if_no_xarray() + from xarray import DataArray + + with catch_warnings(record=True): + p = tm.makePanel4D() + + result = p.to_xarray() + assert isinstance(result, DataArray) + assert len(result.coords) == 4 + assert_almost_equal(list(result.coords.keys()), + ['labels', 'items', 'major_axis', + 'minor_axis']) + assert len(result.dims) == 4 + + # non-convertible + pytest.raises(ValueError, lambda: result.to_pandas()) + + +# run all the tests, but wrap each in a warning catcher +for t in ['test_rename', 'test_rename_axis', 'test_get_numeric_data', + 'test_get_default', 'test_nonzero', + 'test_numpy_1_7_compat_numeric_methods', + 'test_downcast', 'test_constructor_compound_dtypes', + 'test_head_tail', + 'test_size_compat', 'test_split_compat', + 'test_unexpected_keyword', + 'test_stat_unexpected_keyword', 'test_api_compat', + 'test_stat_non_defaults_args', + 'test_clip', 'test_truncate_out_of_bounds', 'test_numpy_clip', + 'test_metadata_propagation', 'test_copy_and_deepcopy', + 'test_sample']: + + def f(): + def tester(self): + with catch_warnings(record=True): + return getattr(super(TestPanel, self), t)() + return tester + + setattr(TestPanel, t, f()) + + def f(): + def tester(self): + with catch_warnings(record=True): + return getattr(super(TestPanel4D, self), t)() + return tester + + setattr(TestPanel4D, t, f()) diff --git a/pandas/tests/generic/test_series.py b/pandas/tests/generic/test_series.py new file mode 100644 index 0000000000000..4773ff69e0982 --- /dev/null +++ b/pandas/tests/generic/test_series.py @@ -0,0 +1,223 @@ +# -*- coding: utf-8 -*- +# pylint: disable-msg=E1101,W0612 + +from operator import methodcaller + +import pytest +import numpy as np +import pandas as pd + +from distutils.version import LooseVersion +from pandas import Series, date_range, MultiIndex + +from pandas.compat import range +from pandas.util.testing import (assert_series_equal, + assert_almost_equal) + +import pandas.util.testing as tm +from .test_generic import Generic + +try: + import xarray + _XARRAY_INSTALLED = True +except ImportError: + _XARRAY_INSTALLED = False + + +class TestSeries(Generic): + _typ = Series + _comparator = lambda self, x, y: assert_series_equal(x, y) + + def setup_method(self): + self.ts = tm.makeTimeSeries() # Was at top level in test_series + self.ts.name = 'ts' + + self.series = tm.makeStringSeries() + self.series.name = 'series' + + def test_rename_mi(self): + s = Series([11, 21, 31], + index=MultiIndex.from_tuples( + [("A", x) for x in ["a", "B", "c"]])) + s.rename(str.lower) + + def test_set_axis_name(self): + s = Series([1, 2, 3], index=['a', 'b', 'c']) + funcs = ['rename_axis', '_set_axis_name'] + name = 'foo' + for func in funcs: + result = methodcaller(func, name)(s) + assert s.index.name is None + assert result.index.name == name + + def test_set_axis_name_mi(self): + s = Series([11, 21, 31], index=MultiIndex.from_tuples( + [("A", x) for x in ["a", "B", "c"]], + names=['l1', 'l2']) + ) + funcs = ['rename_axis', '_set_axis_name'] + for func in funcs: + result = methodcaller(func, ['L1', 'L2'])(s) + assert s.index.name is None + assert s.index.names == ['l1', 'l2'] + assert result.index.name is None + assert result.index.names, ['L1', 'L2'] + + def test_set_axis_name_raises(self): + s = pd.Series([1]) + with pytest.raises(ValueError): + s._set_axis_name(name='a', axis=1) + + def test_get_numeric_data_preserve_dtype(self): + + # get the numeric data + o = Series([1, 2, 3]) + result = o._get_numeric_data() + self._compare(result, o) + + o = Series([1, '2', 3.]) + result = o._get_numeric_data() + expected = Series([], dtype=object, index=pd.Index([], dtype=object)) + self._compare(result, expected) + + o = Series([True, False, True]) + result = o._get_numeric_data() + self._compare(result, o) + + o = Series([True, False, True]) + result = o._get_bool_data() + self._compare(result, o) + + o = Series(date_range('20130101', periods=3)) + result = o._get_numeric_data() + expected = Series([], dtype='M8[ns]', index=pd.Index([], dtype=object)) + self._compare(result, expected) + + def test_nonzero_single_element(self): + + # allow single item via bool method + s = Series([True]) + assert s.bool() + + s = Series([False]) + assert not s.bool() + + # single item nan to raise + for s in [Series([np.nan]), Series([pd.NaT]), Series([True]), + Series([False])]: + pytest.raises(ValueError, lambda: bool(s)) + + for s in [Series([np.nan]), Series([pd.NaT])]: + pytest.raises(ValueError, lambda: s.bool()) + + # multiple bool are still an error + for s in [Series([True, True]), Series([False, False])]: + pytest.raises(ValueError, lambda: bool(s)) + pytest.raises(ValueError, lambda: s.bool()) + + # single non-bool are an error + for s in [Series([1]), Series([0]), Series(['a']), Series([0.0])]: + pytest.raises(ValueError, lambda: bool(s)) + pytest.raises(ValueError, lambda: s.bool()) + + def test_metadata_propagation_indiv(self): + # check that the metadata matches up on the resulting ops + + o = Series(range(3), range(3)) + o.name = 'foo' + o2 = Series(range(3), range(3)) + o2.name = 'bar' + + result = o.T + self.check_metadata(o, result) + + # resample + ts = Series(np.random.rand(1000), + index=date_range('20130101', periods=1000, freq='s'), + name='foo') + result = ts.resample('1T').mean() + self.check_metadata(ts, result) + + result = ts.resample('1T').min() + self.check_metadata(ts, result) + + result = ts.resample('1T').apply(lambda x: x.sum()) + self.check_metadata(ts, result) + + _metadata = Series._metadata + _finalize = Series.__finalize__ + Series._metadata = ['name', 'filename'] + o.filename = 'foo' + o2.filename = 'bar' + + def finalize(self, other, method=None, **kwargs): + for name in self._metadata: + if method == 'concat' and name == 'filename': + value = '+'.join([getattr( + o, name) for o in other.objs if getattr(o, name, None) + ]) + object.__setattr__(self, name, value) + else: + object.__setattr__(self, name, getattr(other, name, None)) + + return self + + Series.__finalize__ = finalize + + result = pd.concat([o, o2]) + assert result.filename == 'foo+bar' + assert result.name is None + + # reset + Series._metadata = _metadata + Series.__finalize__ = _finalize + + @pytest.mark.skipif(not _XARRAY_INSTALLED or _XARRAY_INSTALLED and + LooseVersion(xarray.__version__) < '0.10.0', + reason='xarray >= 0.10.0 required') + @pytest.mark.parametrize( + "index", + ['FloatIndex', 'IntIndex', + 'StringIndex', 'UnicodeIndex', + 'DateIndex', 'PeriodIndex', + 'TimedeltaIndex', 'CategoricalIndex']) + def test_to_xarray_index_types(self, index): + from xarray import DataArray + + index = getattr(tm, 'make{}'.format(index)) + s = Series(range(6), index=index(6)) + s.index.name = 'foo' + result = s.to_xarray() + repr(result) + assert len(result) == 6 + assert len(result.coords) == 1 + assert_almost_equal(list(result.coords.keys()), ['foo']) + assert isinstance(result, DataArray) + + # idempotency + assert_series_equal(result.to_series(), s, + check_index_type=False, + check_categorical=True) + + def test_to_xarray(self): + + tm._skip_if_no_xarray() + from xarray import DataArray + + s = Series([]) + s.index.name = 'foo' + result = s.to_xarray() + assert len(result) == 0 + assert len(result.coords) == 1 + assert_almost_equal(list(result.coords.keys()), ['foo']) + assert isinstance(result, DataArray) + + s = Series(range(6)) + s.index.name = 'foo' + s.index = pd.MultiIndex.from_product([['a', 'b'], range(3)], + names=['one', 'two']) + result = s.to_xarray() + assert len(result) == 2 + assert_almost_equal(list(result.coords.keys()), ['one', 'two']) + assert isinstance(result, DataArray) + assert_series_equal(result.to_series(), s) diff --git a/setup.py b/setup.py index bd7c8f175607c..5531256387e49 100755 --- a/setup.py +++ b/setup.py @@ -740,6 +740,7 @@ def pxd(name): 'pandas.tests.computation', 'pandas.tests.sparse', 'pandas.tests.frame', + 'pandas.tests.generic', 'pandas.tests.indexing', 'pandas.tests.indexes', 'pandas.tests.indexes.datetimes', From 08753a8fe35b335228888786e28136bef9fca684 Mon Sep 17 00:00:00 2001 From: Jeff Reback Date: Sun, 5 Nov 2017 08:31:10 -0500 Subject: [PATCH 2/5] remove deprecations --- pandas/tests/generic/test_panel.py | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/pandas/tests/generic/test_panel.py b/pandas/tests/generic/test_panel.py index 9beb160b74a30..1dc8f43fbbdda 100644 --- a/pandas/tests/generic/test_panel.py +++ b/pandas/tests/generic/test_panel.py @@ -67,14 +67,13 @@ def test_to_xarray(self): # run all the tests, but wrap each in a warning catcher for t in ['test_rename', 'test_rename_axis', 'test_get_numeric_data', 'test_get_default', 'test_nonzero', - 'test_numpy_1_7_compat_numeric_methods', 'test_downcast', 'test_constructor_compound_dtypes', 'test_head_tail', 'test_size_compat', 'test_split_compat', 'test_unexpected_keyword', 'test_stat_unexpected_keyword', 'test_api_compat', 'test_stat_non_defaults_args', - 'test_clip', 'test_truncate_out_of_bounds', 'test_numpy_clip', + 'test_truncate_out_of_bounds', 'test_metadata_propagation', 'test_copy_and_deepcopy', 'test_sample']: From 69c57f9af5798fe3f93f21d60a06e8adee1dec3f Mon Sep 17 00:00:00 2001 From: Jeff Reback Date: Tue, 7 Nov 2017 08:33:21 -0500 Subject: [PATCH 3/5] more deprecation --- pandas/util/testing.py | 21 +++++++++++---------- 1 file changed, 11 insertions(+), 10 deletions(-) diff --git a/pandas/util/testing.py b/pandas/util/testing.py index dec67bbea854f..43a62e6d53296 100644 --- a/pandas/util/testing.py +++ b/pandas/util/testing.py @@ -526,7 +526,7 @@ def get_locales(prefix=None, normalize=True, """ try: raw_locales = locale_getter() - except: + except Exception: return None try: @@ -757,7 +757,7 @@ def set_trace(): from IPython.core.debugger import Pdb try: Pdb(color_scheme='Linux').set_trace(sys._getframe().f_back) - except: + except Exception: from pdb import Pdb as OldPdb OldPdb().set_trace(sys._getframe().f_back) @@ -1265,9 +1265,9 @@ def assert_series_equal(left, right, check_dtype=True, check_dtype=check_dtype) elif is_interval_dtype(left) or is_interval_dtype(right): # TODO: big hack here - l = pd.IntervalIndex(left) - r = pd.IntervalIndex(right) - assert_index_equal(l, r, obj='{obj}.index'.format(obj=obj)) + left = pd.IntervalIndex(left) + right = pd.IntervalIndex(right) + assert_index_equal(left, right, obj='{obj}.index'.format(obj=obj)) else: _testing.assert_almost_equal(left.get_values(), right.get_values(), @@ -1439,8 +1439,9 @@ def assert_panelnd_equal(left, right, assert_index_equal(left_ind, right_ind, check_names=check_names) if by_blocks: - rblocks = right.blocks - lblocks = left.blocks + with warnings.catch_warnings(record=True): + rblocks = right.blocks + lblocks = left.blocks for dtype in list(set(list(lblocks.keys()) + list(rblocks.keys()))): assert dtype in lblocks assert dtype in rblocks @@ -2345,7 +2346,7 @@ def wrapper(*args, **kwargs): try: e_str = traceback.format_exc(e) - except: + except Exception: e_str = str(e) if any([m.lower() in e_str.lower() for m in _skip_on_messages]): @@ -2582,7 +2583,7 @@ def assert_produces_warning(expected_warning=Warning, filter_level="always", for m in clear: try: m.__warningregistry__.clear() - except: + except Exception: pass saw_warning = False @@ -2849,7 +2850,7 @@ def setTZ(tz): if tz is None: try: del os.environ['TZ'] - except: + except KeyError: pass else: os.environ['TZ'] = tz From 1ad2aae58afc547ee3bf9ff43de6af5a2363c864 Mon Sep 17 00:00:00 2001 From: Jeff Reback Date: Tue, 7 Nov 2017 10:13:31 -0500 Subject: [PATCH 4/5] moar --- pandas/util/testing.py | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/pandas/util/testing.py b/pandas/util/testing.py index 43a62e6d53296..a13ecef5dd1bf 100644 --- a/pandas/util/testing.py +++ b/pandas/util/testing.py @@ -1439,9 +1439,8 @@ def assert_panelnd_equal(left, right, assert_index_equal(left_ind, right_ind, check_names=check_names) if by_blocks: - with warnings.catch_warnings(record=True): - rblocks = right.blocks - lblocks = left.blocks + rblocks = right._to_dict_of_blocks() + lblocks = left._to_dict_of_blocks() for dtype in list(set(list(lblocks.keys()) + list(rblocks.keys()))): assert dtype in lblocks assert dtype in rblocks From a63f66dd9fe804094f64cbe127bccd5cf350f99d Mon Sep 17 00:00:00 2001 From: Jeff Reback Date: Tue, 7 Nov 2017 10:17:28 -0500 Subject: [PATCH 5/5] try more --- pandas/tests/generic/test_panel.py | 8 +++++--- 1 file changed, 5 insertions(+), 3 deletions(-) diff --git a/pandas/tests/generic/test_panel.py b/pandas/tests/generic/test_panel.py index 1dc8f43fbbdda..b1d9af9c8b0af 100644 --- a/pandas/tests/generic/test_panel.py +++ b/pandas/tests/generic/test_panel.py @@ -65,7 +65,7 @@ def test_to_xarray(self): # run all the tests, but wrap each in a warning catcher -for t in ['test_rename', 'test_rename_axis', 'test_get_numeric_data', +for t in ['test_rename', 'test_get_numeric_data', 'test_get_default', 'test_nonzero', 'test_downcast', 'test_constructor_compound_dtypes', 'test_head_tail', @@ -79,16 +79,18 @@ def test_to_xarray(self): def f(): def tester(self): + f = getattr(super(TestPanel, self), t) with catch_warnings(record=True): - return getattr(super(TestPanel, self), t)() + f() return tester setattr(TestPanel, t, f()) def f(): def tester(self): + f = getattr(super(TestPanel4D, self), t) with catch_warnings(record=True): - return getattr(super(TestPanel4D, self), t)() + f() return tester setattr(TestPanel4D, t, f())