diff --git a/pandas/tests/frame/test_missing.py b/pandas/tests/frame/test_missing.py index 0411fb4d96e82..f7214751f29a2 100644 --- a/pandas/tests/frame/test_missing.py +++ b/pandas/tests/frame/test_missing.py @@ -2,6 +2,7 @@ from __future__ import print_function +from distutils.version import LooseVersion from numpy import nan, random import numpy as np @@ -18,6 +19,13 @@ from pandas.tests.frame.common import TestData, _check_mixed_float +def _skip_if_no_pchip(): + try: + from scipy.interpolate import pchip_interpolate # noqa + except ImportError: + raise nose.SkipTest('scipy.interpolate.pchip missing') + + class TestDataFrameMissingData(tm.TestCase, TestData): _multiprocess_can_split_ = True @@ -436,6 +444,218 @@ def test_fill_value_when_combine_const(self): assert_frame_equal(res, exp) +class TestDataFrameInterpolate(tm.TestCase, TestData): + + def test_interp_basic(self): + df = DataFrame({'A': [1, 2, np.nan, 4], + 'B': [1, 4, 9, np.nan], + 'C': [1, 2, 3, 5], + 'D': list('abcd')}) + expected = DataFrame({'A': [1., 2., 3., 4.], + 'B': [1., 4., 9., 9.], + 'C': [1, 2, 3, 5], + 'D': list('abcd')}) + result = df.interpolate() + assert_frame_equal(result, expected) + + result = df.set_index('C').interpolate() + expected = df.set_index('C') + expected.loc[3, 'A'] = 3 + expected.loc[5, 'B'] = 9 + assert_frame_equal(result, expected) + + def test_interp_bad_method(self): + df = DataFrame({'A': [1, 2, np.nan, 4], + 'B': [1, 4, 9, np.nan], + 'C': [1, 2, 3, 5], + 'D': list('abcd')}) + with tm.assertRaises(ValueError): + df.interpolate(method='not_a_method') + + def test_interp_combo(self): + df = DataFrame({'A': [1., 2., np.nan, 4.], + 'B': [1, 4, 9, np.nan], + 'C': [1, 2, 3, 5], + 'D': list('abcd')}) + + result = df['A'].interpolate() + expected = Series([1., 2., 3., 4.], name='A') + assert_series_equal(result, expected) + + result = df['A'].interpolate(downcast='infer') + expected = Series([1, 2, 3, 4], name='A') + assert_series_equal(result, expected) + + def test_interp_nan_idx(self): + df = DataFrame({'A': [1, 2, np.nan, 4], 'B': [np.nan, 2, 3, 4]}) + df = df.set_index('A') + with tm.assertRaises(NotImplementedError): + df.interpolate(method='values') + + def test_interp_various(self): + tm._skip_if_no_scipy() + df = DataFrame({'A': [1, 2, np.nan, 4, 5, np.nan, 7], + 'C': [1, 2, 3, 5, 8, 13, 21]}) + df = df.set_index('C') + expected = df.copy() + result = df.interpolate(method='polynomial', order=1) + + expected.A.loc[3] = 2.66666667 + expected.A.loc[13] = 5.76923076 + assert_frame_equal(result, expected) + + result = df.interpolate(method='cubic') + expected.A.loc[3] = 2.81621174 + expected.A.loc[13] = 5.64146581 + assert_frame_equal(result, expected) + + result = df.interpolate(method='nearest') + expected.A.loc[3] = 2 + expected.A.loc[13] = 5 + assert_frame_equal(result, expected, check_dtype=False) + + result = df.interpolate(method='quadratic') + expected.A.loc[3] = 2.82533638 + expected.A.loc[13] = 6.02817974 + assert_frame_equal(result, expected) + + result = df.interpolate(method='slinear') + expected.A.loc[3] = 2.66666667 + expected.A.loc[13] = 5.76923077 + assert_frame_equal(result, expected) + + result = df.interpolate(method='zero') + expected.A.loc[3] = 2. + expected.A.loc[13] = 5 + assert_frame_equal(result, expected, check_dtype=False) + + result = df.interpolate(method='quadratic') + expected.A.loc[3] = 2.82533638 + expected.A.loc[13] = 6.02817974 + assert_frame_equal(result, expected) + + def test_interp_alt_scipy(self): + tm._skip_if_no_scipy() + df = DataFrame({'A': [1, 2, np.nan, 4, 5, np.nan, 7], + 'C': [1, 2, 3, 5, 8, 13, 21]}) + result = df.interpolate(method='barycentric') + expected = df.copy() + expected.ix[2, 'A'] = 3 + expected.ix[5, 'A'] = 6 + assert_frame_equal(result, expected) + + result = df.interpolate(method='barycentric', downcast='infer') + assert_frame_equal(result, expected.astype(np.int64)) + + result = df.interpolate(method='krogh') + expectedk = df.copy() + expectedk['A'] = expected['A'] + assert_frame_equal(result, expectedk) + + _skip_if_no_pchip() + import scipy + result = df.interpolate(method='pchip') + expected.ix[2, 'A'] = 3 + + if LooseVersion(scipy.__version__) >= '0.17.0': + expected.ix[5, 'A'] = 6.0 + else: + expected.ix[5, 'A'] = 6.125 + + assert_frame_equal(result, expected) + + def test_interp_rowwise(self): + df = DataFrame({0: [1, 2, np.nan, 4], + 1: [2, 3, 4, np.nan], + 2: [np.nan, 4, 5, 6], + 3: [4, np.nan, 6, 7], + 4: [1, 2, 3, 4]}) + result = df.interpolate(axis=1) + expected = df.copy() + expected.loc[3, 1] = 5 + expected.loc[0, 2] = 3 + expected.loc[1, 3] = 3 + expected[4] = expected[4].astype(np.float64) + assert_frame_equal(result, expected) + + # scipy route + tm._skip_if_no_scipy() + result = df.interpolate(axis=1, method='values') + assert_frame_equal(result, expected) + + result = df.interpolate(axis=0) + expected = df.interpolate() + assert_frame_equal(result, expected) + + def test_rowwise_alt(self): + df = DataFrame({0: [0, .5, 1., np.nan, 4, 8, np.nan, np.nan, 64], + 1: [1, 2, 3, 4, 3, 2, 1, 0, -1]}) + df.interpolate(axis=0) + + def test_interp_leading_nans(self): + df = DataFrame({"A": [np.nan, np.nan, .5, .25, 0], + "B": [np.nan, -3, -3.5, np.nan, -4]}) + result = df.interpolate() + expected = df.copy() + expected['B'].loc[3] = -3.75 + assert_frame_equal(result, expected) + + tm._skip_if_no_scipy() + result = df.interpolate(method='polynomial', order=1) + assert_frame_equal(result, expected) + + def test_interp_raise_on_only_mixed(self): + df = DataFrame({'A': [1, 2, np.nan, 4], + 'B': ['a', 'b', 'c', 'd'], + 'C': [np.nan, 2, 5, 7], + 'D': [np.nan, np.nan, 9, 9], + 'E': [1, 2, 3, 4]}) + with tm.assertRaises(TypeError): + df.interpolate(axis=1) + + def test_interp_inplace(self): + df = DataFrame({'a': [1., 2., np.nan, 4.]}) + expected = DataFrame({'a': [1., 2., 3., 4.]}) + result = df.copy() + result['a'].interpolate(inplace=True) + assert_frame_equal(result, expected) + + result = df.copy() + result['a'].interpolate(inplace=True, downcast='infer') + assert_frame_equal(result, expected.astype('int64')) + + def test_interp_inplace_row(self): + # GH 10395 + result = DataFrame({'a': [1., 2., 3., 4.], + 'b': [np.nan, 2., 3., 4.], + 'c': [3, 2, 2, 2]}) + expected = result.interpolate(method='linear', axis=1, inplace=False) + result.interpolate(method='linear', axis=1, inplace=True) + assert_frame_equal(result, expected) + + def test_interp_ignore_all_good(self): + # GH + df = DataFrame({'A': [1, 2, np.nan, 4], + 'B': [1, 2, 3, 4], + 'C': [1., 2., np.nan, 4.], + 'D': [1., 2., 3., 4.]}) + expected = DataFrame({'A': np.array( + [1, 2, 3, 4], dtype='float64'), + 'B': np.array( + [1, 2, 3, 4], dtype='int64'), + 'C': np.array( + [1., 2., 3, 4.], dtype='float64'), + 'D': np.array( + [1., 2., 3., 4.], dtype='float64')}) + + result = df.interpolate(downcast=None) + assert_frame_equal(result, expected) + + # all good + result = df[['B', 'D']].interpolate(downcast=None) + assert_frame_equal(result, df[['B', 'D']]) + + if __name__ == '__main__': import nose nose.runmodule(argv=[__file__, '-vvs', '-x', '--pdb', '--pdb-failure'], diff --git a/pandas/tests/series/test_missing.py b/pandas/tests/series/test_missing.py index 302e05ef3ae8a..f0f28f692f3c2 100644 --- a/pandas/tests/series/test_missing.py +++ b/pandas/tests/series/test_missing.py @@ -7,7 +7,8 @@ import numpy as np import pandas as pd -from pandas import Series, isnull +from pandas import (Series, isnull, date_range, + MultiIndex, Index) from pandas.tseries.index import Timestamp from pandas.compat import range @@ -17,6 +18,20 @@ from .common import TestData +def _skip_if_no_pchip(): + try: + from scipy.interpolate import pchip_interpolate # noqa + except ImportError: + raise nose.SkipTest('scipy.interpolate.pchip missing') + + +def _skip_if_no_akima(): + try: + from scipy.interpolate import Akima1DInterpolator # noqa + except ImportError: + raise nose.SkipTest('scipy.interpolate.Akima1DInterpolator missing') + + class TestSeriesMissingData(TestData, tm.TestCase): _multiprocess_can_split_ = True @@ -462,6 +477,361 @@ def test_fill_value_when_combine_const(self): assert_series_equal(res, exp) +class TestSeriesInterpolateData(TestData, tm.TestCase): + + def test_interpolate(self): + ts = Series(np.arange(len(self.ts), dtype=float), self.ts.index) + + ts_copy = ts.copy() + ts_copy[5:10] = np.NaN + + linear_interp = ts_copy.interpolate(method='linear') + self.assert_numpy_array_equal(linear_interp, ts) + + ord_ts = Series([d.toordinal() for d in self.ts.index], + index=self.ts.index).astype(float) + + ord_ts_copy = ord_ts.copy() + ord_ts_copy[5:10] = np.NaN + + time_interp = ord_ts_copy.interpolate(method='time') + self.assert_numpy_array_equal(time_interp, ord_ts) + + # try time interpolation on a non-TimeSeries + # Only raises ValueError if there are NaNs. + non_ts = self.series.copy() + non_ts[0] = np.NaN + self.assertRaises(ValueError, non_ts.interpolate, method='time') + + def test_interpolate_pchip(self): + tm._skip_if_no_scipy() + _skip_if_no_pchip() + + ser = Series(np.sort(np.random.uniform(size=100))) + + # interpolate at new_index + new_index = ser.index.union(Index([49.25, 49.5, 49.75, 50.25, 50.5, + 50.75])) + interp_s = ser.reindex(new_index).interpolate(method='pchip') + # does not blow up, GH5977 + interp_s[49:51] + + def test_interpolate_akima(self): + tm._skip_if_no_scipy() + _skip_if_no_akima() + + ser = Series([10, 11, 12, 13]) + + expected = Series([11.00, 11.25, 11.50, 11.75, + 12.00, 12.25, 12.50, 12.75, 13.00], + index=Index([1.0, 1.25, 1.5, 1.75, + 2.0, 2.25, 2.5, 2.75, 3.0])) + # interpolate at new_index + new_index = ser.index.union(Index([1.25, 1.5, 1.75, 2.25, 2.5, 2.75])) + interp_s = ser.reindex(new_index).interpolate(method='akima') + assert_series_equal(interp_s[1:3], expected) + + def test_interpolate_corners(self): + s = Series([np.nan, np.nan]) + assert_series_equal(s.interpolate(), s) + + s = Series([]).interpolate() + assert_series_equal(s.interpolate(), s) + + tm._skip_if_no_scipy() + s = Series([np.nan, np.nan]) + assert_series_equal(s.interpolate(method='polynomial', order=1), s) + + s = Series([]).interpolate() + assert_series_equal(s.interpolate(method='polynomial', order=1), s) + + def test_interpolate_index_values(self): + s = Series(np.nan, index=np.sort(np.random.rand(30))) + s[::3] = np.random.randn(10) + + vals = s.index.values.astype(float) + + result = s.interpolate(method='index') + + expected = s.copy() + bad = isnull(expected.values) + good = ~bad + expected = Series(np.interp(vals[bad], vals[good], + s.values[good]), + index=s.index[bad]) + + assert_series_equal(result[bad], expected) + + # 'values' is synonymous with 'index' for the method kwarg + other_result = s.interpolate(method='values') + + assert_series_equal(other_result, result) + assert_series_equal(other_result[bad], expected) + + def test_interpolate_non_ts(self): + s = Series([1, 3, np.nan, np.nan, np.nan, 11]) + with tm.assertRaises(ValueError): + s.interpolate(method='time') + + # New interpolation tests + def test_nan_interpolate(self): + s = Series([0, 1, np.nan, 3]) + result = s.interpolate() + expected = Series([0., 1., 2., 3.]) + assert_series_equal(result, expected) + + tm._skip_if_no_scipy() + result = s.interpolate(method='polynomial', order=1) + assert_series_equal(result, expected) + + def test_nan_irregular_index(self): + s = Series([1, 2, np.nan, 4], index=[1, 3, 5, 9]) + result = s.interpolate() + expected = Series([1., 2., 3., 4.], index=[1, 3, 5, 9]) + assert_series_equal(result, expected) + + def test_nan_str_index(self): + s = Series([0, 1, 2, np.nan], index=list('abcd')) + result = s.interpolate() + expected = Series([0., 1., 2., 2.], index=list('abcd')) + assert_series_equal(result, expected) + + def test_interp_quad(self): + tm._skip_if_no_scipy() + sq = Series([1, 4, np.nan, 16], index=[1, 2, 3, 4]) + result = sq.interpolate(method='quadratic') + expected = Series([1., 4., 9., 16.], index=[1, 2, 3, 4]) + assert_series_equal(result, expected) + + def test_interp_scipy_basic(self): + tm._skip_if_no_scipy() + s = Series([1, 3, np.nan, 12, np.nan, 25]) + # slinear + expected = Series([1., 3., 7.5, 12., 18.5, 25.]) + result = s.interpolate(method='slinear') + assert_series_equal(result, expected) + + result = s.interpolate(method='slinear', downcast='infer') + assert_series_equal(result, expected) + # nearest + expected = Series([1, 3, 3, 12, 12, 25]) + result = s.interpolate(method='nearest') + assert_series_equal(result, expected.astype('float')) + + result = s.interpolate(method='nearest', downcast='infer') + assert_series_equal(result, expected) + # zero + expected = Series([1, 3, 3, 12, 12, 25]) + result = s.interpolate(method='zero') + assert_series_equal(result, expected.astype('float')) + + result = s.interpolate(method='zero', downcast='infer') + assert_series_equal(result, expected) + # quadratic + expected = Series([1, 3., 6.769231, 12., 18.230769, 25.]) + result = s.interpolate(method='quadratic') + assert_series_equal(result, expected) + + result = s.interpolate(method='quadratic', downcast='infer') + assert_series_equal(result, expected) + # cubic + expected = Series([1., 3., 6.8, 12., 18.2, 25.]) + result = s.interpolate(method='cubic') + assert_series_equal(result, expected) + + def test_interp_limit(self): + s = Series([1, 3, np.nan, np.nan, np.nan, 11]) + + expected = Series([1., 3., 5., 7., np.nan, 11.]) + result = s.interpolate(method='linear', limit=2) + assert_series_equal(result, expected) + + def test_interp_limit_forward(self): + s = Series([1, 3, np.nan, np.nan, np.nan, 11]) + + # Provide 'forward' (the default) explicitly here. + expected = Series([1., 3., 5., 7., np.nan, 11.]) + + result = s.interpolate(method='linear', limit=2, + limit_direction='forward') + assert_series_equal(result, expected) + + result = s.interpolate(method='linear', limit=2, + limit_direction='FORWARD') + assert_series_equal(result, expected) + + def test_interp_limit_bad_direction(self): + s = Series([1, 3, np.nan, np.nan, np.nan, 11]) + + self.assertRaises(ValueError, s.interpolate, method='linear', limit=2, + limit_direction='abc') + + # raises an error even if no limit is specified. + self.assertRaises(ValueError, s.interpolate, method='linear', + limit_direction='abc') + + def test_interp_limit_direction(self): + # These tests are for issue #9218 -- fill NaNs in both directions. + s = Series([1, 3, np.nan, np.nan, np.nan, 11]) + + expected = Series([1., 3., np.nan, 7., 9., 11.]) + result = s.interpolate(method='linear', limit=2, + limit_direction='backward') + assert_series_equal(result, expected) + + expected = Series([1., 3., 5., np.nan, 9., 11.]) + result = s.interpolate(method='linear', limit=1, + limit_direction='both') + assert_series_equal(result, expected) + + # Check that this works on a longer series of nans. + s = Series([1, 3, np.nan, np.nan, np.nan, 7, 9, np.nan, np.nan, 12, + np.nan]) + + expected = Series([1., 3., 4., 5., 6., 7., 9., 10., 11., 12., 12.]) + result = s.interpolate(method='linear', limit=2, + limit_direction='both') + assert_series_equal(result, expected) + + expected = Series([1., 3., 4., np.nan, 6., 7., 9., 10., 11., 12., 12.]) + result = s.interpolate(method='linear', limit=1, + limit_direction='both') + assert_series_equal(result, expected) + + def test_interp_limit_to_ends(self): + # These test are for issue #10420 -- flow back to beginning. + s = Series([np.nan, np.nan, 5, 7, 9, np.nan]) + + expected = Series([5., 5., 5., 7., 9., np.nan]) + result = s.interpolate(method='linear', limit=2, + limit_direction='backward') + assert_series_equal(result, expected) + + expected = Series([5., 5., 5., 7., 9., 9.]) + result = s.interpolate(method='linear', limit=2, + limit_direction='both') + assert_series_equal(result, expected) + + def test_interp_limit_before_ends(self): + # These test are for issue #11115 -- limit ends properly. + s = Series([np.nan, np.nan, 5, 7, np.nan, np.nan]) + + expected = Series([np.nan, np.nan, 5., 7., 7., np.nan]) + result = s.interpolate(method='linear', limit=1, + limit_direction='forward') + assert_series_equal(result, expected) + + expected = Series([np.nan, 5., 5., 7., np.nan, np.nan]) + result = s.interpolate(method='linear', limit=1, + limit_direction='backward') + assert_series_equal(result, expected) + + expected = Series([np.nan, 5., 5., 7., 7., np.nan]) + result = s.interpolate(method='linear', limit=1, + limit_direction='both') + assert_series_equal(result, expected) + + def test_interp_all_good(self): + # scipy + tm._skip_if_no_scipy() + s = Series([1, 2, 3]) + result = s.interpolate(method='polynomial', order=1) + assert_series_equal(result, s) + + # non-scipy + result = s.interpolate() + assert_series_equal(result, s) + + def test_interp_multiIndex(self): + idx = MultiIndex.from_tuples([(0, 'a'), (1, 'b'), (2, 'c')]) + s = Series([1, 2, np.nan], index=idx) + + expected = s.copy() + expected.loc[2] = 2 + result = s.interpolate() + assert_series_equal(result, expected) + + tm._skip_if_no_scipy() + with tm.assertRaises(ValueError): + s.interpolate(method='polynomial', order=1) + + def test_interp_nonmono_raise(self): + tm._skip_if_no_scipy() + s = Series([1, np.nan, 3], index=[0, 2, 1]) + with tm.assertRaises(ValueError): + s.interpolate(method='krogh') + + def test_interp_datetime64(self): + tm._skip_if_no_scipy() + df = Series([1, np.nan, 3], index=date_range('1/1/2000', periods=3)) + result = df.interpolate(method='nearest') + expected = Series([1., 1., 3.], + index=date_range('1/1/2000', periods=3)) + assert_series_equal(result, expected) + + def test_interp_limit_no_nans(self): + # GH 7173 + s = pd.Series([1., 2., 3.]) + result = s.interpolate(limit=1) + expected = s + assert_series_equal(result, expected) + + def test_no_order(self): + tm._skip_if_no_scipy() + s = Series([0, 1, np.nan, 3]) + with tm.assertRaises(ValueError): + s.interpolate(method='polynomial') + with tm.assertRaises(ValueError): + s.interpolate(method='spline') + + def test_spline(self): + tm._skip_if_no_scipy() + s = Series([1, 2, np.nan, 4, 5, np.nan, 7]) + result = s.interpolate(method='spline', order=1) + expected = Series([1., 2., 3., 4., 5., 6., 7.]) + assert_series_equal(result, expected) + + def test_spline_extrapolate(self): + tm.skip_if_no_package( + 'scipy', '0.15', + 'setting ext on scipy.interpolate.UnivariateSpline') + s = Series([1, 2, 3, 4, np.nan, 6, np.nan]) + result3 = s.interpolate(method='spline', order=1, ext=3) + expected3 = Series([1., 2., 3., 4., 5., 6., 6.]) + assert_series_equal(result3, expected3) + + result1 = s.interpolate(method='spline', order=1, ext=0) + expected1 = Series([1., 2., 3., 4., 5., 6., 7.]) + assert_series_equal(result1, expected1) + + def test_spline_smooth(self): + tm._skip_if_no_scipy() + s = Series([1, 2, np.nan, 4, 5.1, np.nan, 7]) + self.assertNotEqual(s.interpolate(method='spline', order=3, s=0)[5], + s.interpolate(method='spline', order=3)[5]) + + def test_spline_interpolation(self): + tm._skip_if_no_scipy() + + s = Series(np.arange(10) ** 2) + s[np.random.randint(0, 9, 3)] = np.nan + result1 = s.interpolate(method='spline', order=1) + expected1 = s.interpolate(method='spline', order=1) + assert_series_equal(result1, expected1) + + # GH #10633 + def test_spline_error(self): + tm._skip_if_no_scipy() + + s = pd.Series(np.arange(10) ** 2) + s[np.random.randint(0, 9, 3)] = np.nan + with tm.assertRaises(ValueError): + s.interpolate(method='spline') + + with tm.assertRaises(ValueError): + s.interpolate(method='spline', order=0) + + if __name__ == '__main__': import nose nose.runmodule(argv=[__file__, '-vvs', '-x', '--pdb', '--pdb-failure'], diff --git a/pandas/tests/test_generic.py b/pandas/tests/test_generic.py index 56838184a3670..1c2494e7d6b09 100644 --- a/pandas/tests/test_generic.py +++ b/pandas/tests/test_generic.py @@ -7,7 +7,6 @@ from numpy import nan import pandas as pd -from distutils.version import LooseVersion from pandas import (Index, Series, DataFrame, Panel, isnull, date_range, period_range, Panel4D) from pandas.core.index import MultiIndex @@ -28,19 +27,6 @@ import pandas.util.testing as tm -def _skip_if_no_pchip(): - try: - from scipy.interpolate import pchip_interpolate # noqa - except ImportError: - raise nose.SkipTest('scipy.interpolate.pchip missing') - - -def _skip_if_no_akima(): - try: - from scipy.interpolate import Akima1DInterpolator # noqa - except ImportError: - raise nose.SkipTest('scipy.interpolate.Akima1DInterpolator missing') - # ---------------------------------------------------------------------- # Generic types test cases @@ -781,303 +767,6 @@ def finalize(self, other, method=None, **kwargs): Series._metadata = _metadata Series.__finalize__ = _finalize - def test_interpolate(self): - ts = Series(np.arange(len(self.ts), dtype=float), self.ts.index) - - ts_copy = ts.copy() - ts_copy[5:10] = np.NaN - - linear_interp = ts_copy.interpolate(method='linear') - self.assert_numpy_array_equal(linear_interp, ts) - - ord_ts = Series([d.toordinal() for d in self.ts.index], - index=self.ts.index).astype(float) - - ord_ts_copy = ord_ts.copy() - ord_ts_copy[5:10] = np.NaN - - time_interp = ord_ts_copy.interpolate(method='time') - self.assert_numpy_array_equal(time_interp, ord_ts) - - # try time interpolation on a non-TimeSeries - # Only raises ValueError if there are NaNs. - non_ts = self.series.copy() - non_ts[0] = np.NaN - self.assertRaises(ValueError, non_ts.interpolate, method='time') - - def test_interpolate_pchip(self): - tm._skip_if_no_scipy() - _skip_if_no_pchip() - - ser = Series(np.sort(np.random.uniform(size=100))) - - # interpolate at new_index - new_index = ser.index.union(Index([49.25, 49.5, 49.75, 50.25, 50.5, - 50.75])) - interp_s = ser.reindex(new_index).interpolate(method='pchip') - # does not blow up, GH5977 - interp_s[49:51] - - def test_interpolate_akima(self): - tm._skip_if_no_scipy() - _skip_if_no_akima() - - ser = Series([10, 11, 12, 13]) - - expected = Series([11.00, 11.25, 11.50, 11.75, - 12.00, 12.25, 12.50, 12.75, 13.00], - index=Index([1.0, 1.25, 1.5, 1.75, - 2.0, 2.25, 2.5, 2.75, 3.0])) - # interpolate at new_index - new_index = ser.index.union(Index([1.25, 1.5, 1.75, 2.25, 2.5, 2.75])) - interp_s = ser.reindex(new_index).interpolate(method='akima') - assert_series_equal(interp_s[1:3], expected) - - def test_interpolate_corners(self): - s = Series([np.nan, np.nan]) - assert_series_equal(s.interpolate(), s) - - s = Series([]).interpolate() - assert_series_equal(s.interpolate(), s) - - tm._skip_if_no_scipy() - s = Series([np.nan, np.nan]) - assert_series_equal(s.interpolate(method='polynomial', order=1), s) - - s = Series([]).interpolate() - assert_series_equal(s.interpolate(method='polynomial', order=1), s) - - def test_interpolate_index_values(self): - s = Series(np.nan, index=np.sort(np.random.rand(30))) - s[::3] = np.random.randn(10) - - vals = s.index.values.astype(float) - - result = s.interpolate(method='index') - - expected = s.copy() - bad = isnull(expected.values) - good = ~bad - expected = Series(np.interp(vals[bad], vals[good], - s.values[good]), - index=s.index[bad]) - - assert_series_equal(result[bad], expected) - - # 'values' is synonymous with 'index' for the method kwarg - other_result = s.interpolate(method='values') - - assert_series_equal(other_result, result) - assert_series_equal(other_result[bad], expected) - - def test_interpolate_non_ts(self): - s = Series([1, 3, np.nan, np.nan, np.nan, 11]) - with tm.assertRaises(ValueError): - s.interpolate(method='time') - - # New interpolation tests - def test_nan_interpolate(self): - s = Series([0, 1, np.nan, 3]) - result = s.interpolate() - expected = Series([0., 1., 2., 3.]) - assert_series_equal(result, expected) - - tm._skip_if_no_scipy() - result = s.interpolate(method='polynomial', order=1) - assert_series_equal(result, expected) - - def test_nan_irregular_index(self): - s = Series([1, 2, np.nan, 4], index=[1, 3, 5, 9]) - result = s.interpolate() - expected = Series([1., 2., 3., 4.], index=[1, 3, 5, 9]) - assert_series_equal(result, expected) - - def test_nan_str_index(self): - s = Series([0, 1, 2, np.nan], index=list('abcd')) - result = s.interpolate() - expected = Series([0., 1., 2., 2.], index=list('abcd')) - assert_series_equal(result, expected) - - def test_interp_quad(self): - tm._skip_if_no_scipy() - sq = Series([1, 4, np.nan, 16], index=[1, 2, 3, 4]) - result = sq.interpolate(method='quadratic') - expected = Series([1., 4., 9., 16.], index=[1, 2, 3, 4]) - assert_series_equal(result, expected) - - def test_interp_scipy_basic(self): - tm._skip_if_no_scipy() - s = Series([1, 3, np.nan, 12, np.nan, 25]) - # slinear - expected = Series([1., 3., 7.5, 12., 18.5, 25.]) - result = s.interpolate(method='slinear') - assert_series_equal(result, expected) - - result = s.interpolate(method='slinear', downcast='infer') - assert_series_equal(result, expected) - # nearest - expected = Series([1, 3, 3, 12, 12, 25]) - result = s.interpolate(method='nearest') - assert_series_equal(result, expected.astype('float')) - - result = s.interpolate(method='nearest', downcast='infer') - assert_series_equal(result, expected) - # zero - expected = Series([1, 3, 3, 12, 12, 25]) - result = s.interpolate(method='zero') - assert_series_equal(result, expected.astype('float')) - - result = s.interpolate(method='zero', downcast='infer') - assert_series_equal(result, expected) - # quadratic - expected = Series([1, 3., 6.769231, 12., 18.230769, 25.]) - result = s.interpolate(method='quadratic') - assert_series_equal(result, expected) - - result = s.interpolate(method='quadratic', downcast='infer') - assert_series_equal(result, expected) - # cubic - expected = Series([1., 3., 6.8, 12., 18.2, 25.]) - result = s.interpolate(method='cubic') - assert_series_equal(result, expected) - - def test_interp_limit(self): - s = Series([1, 3, np.nan, np.nan, np.nan, 11]) - - expected = Series([1., 3., 5., 7., np.nan, 11.]) - result = s.interpolate(method='linear', limit=2) - assert_series_equal(result, expected) - - def test_interp_limit_forward(self): - s = Series([1, 3, np.nan, np.nan, np.nan, 11]) - - # Provide 'forward' (the default) explicitly here. - expected = Series([1., 3., 5., 7., np.nan, 11.]) - - result = s.interpolate(method='linear', limit=2, - limit_direction='forward') - assert_series_equal(result, expected) - - result = s.interpolate(method='linear', limit=2, - limit_direction='FORWARD') - assert_series_equal(result, expected) - - def test_interp_limit_bad_direction(self): - s = Series([1, 3, np.nan, np.nan, np.nan, 11]) - - self.assertRaises(ValueError, s.interpolate, method='linear', limit=2, - limit_direction='abc') - - # raises an error even if no limit is specified. - self.assertRaises(ValueError, s.interpolate, method='linear', - limit_direction='abc') - - def test_interp_limit_direction(self): - # These tests are for issue #9218 -- fill NaNs in both directions. - s = Series([1, 3, np.nan, np.nan, np.nan, 11]) - - expected = Series([1., 3., np.nan, 7., 9., 11.]) - result = s.interpolate(method='linear', limit=2, - limit_direction='backward') - assert_series_equal(result, expected) - - expected = Series([1., 3., 5., np.nan, 9., 11.]) - result = s.interpolate(method='linear', limit=1, - limit_direction='both') - assert_series_equal(result, expected) - - # Check that this works on a longer series of nans. - s = Series([1, 3, np.nan, np.nan, np.nan, 7, 9, np.nan, np.nan, 12, - np.nan]) - - expected = Series([1., 3., 4., 5., 6., 7., 9., 10., 11., 12., 12.]) - result = s.interpolate(method='linear', limit=2, - limit_direction='both') - assert_series_equal(result, expected) - - expected = Series([1., 3., 4., np.nan, 6., 7., 9., 10., 11., 12., 12.]) - result = s.interpolate(method='linear', limit=1, - limit_direction='both') - assert_series_equal(result, expected) - - def test_interp_limit_to_ends(self): - # These test are for issue #10420 -- flow back to beginning. - s = Series([np.nan, np.nan, 5, 7, 9, np.nan]) - - expected = Series([5., 5., 5., 7., 9., np.nan]) - result = s.interpolate(method='linear', limit=2, - limit_direction='backward') - assert_series_equal(result, expected) - - expected = Series([5., 5., 5., 7., 9., 9.]) - result = s.interpolate(method='linear', limit=2, - limit_direction='both') - assert_series_equal(result, expected) - - def test_interp_limit_before_ends(self): - # These test are for issue #11115 -- limit ends properly. - s = Series([np.nan, np.nan, 5, 7, np.nan, np.nan]) - - expected = Series([np.nan, np.nan, 5., 7., 7., np.nan]) - result = s.interpolate(method='linear', limit=1, - limit_direction='forward') - assert_series_equal(result, expected) - - expected = Series([np.nan, 5., 5., 7., np.nan, np.nan]) - result = s.interpolate(method='linear', limit=1, - limit_direction='backward') - assert_series_equal(result, expected) - - expected = Series([np.nan, 5., 5., 7., 7., np.nan]) - result = s.interpolate(method='linear', limit=1, - limit_direction='both') - assert_series_equal(result, expected) - - def test_interp_all_good(self): - # scipy - tm._skip_if_no_scipy() - s = Series([1, 2, 3]) - result = s.interpolate(method='polynomial', order=1) - assert_series_equal(result, s) - - # non-scipy - result = s.interpolate() - assert_series_equal(result, s) - - def test_interp_multiIndex(self): - idx = MultiIndex.from_tuples([(0, 'a'), (1, 'b'), (2, 'c')]) - s = Series([1, 2, np.nan], index=idx) - - expected = s.copy() - expected.loc[2] = 2 - result = s.interpolate() - assert_series_equal(result, expected) - - tm._skip_if_no_scipy() - with tm.assertRaises(ValueError): - s.interpolate(method='polynomial', order=1) - - def test_interp_nonmono_raise(self): - tm._skip_if_no_scipy() - s = Series([1, np.nan, 3], index=[0, 2, 1]) - with tm.assertRaises(ValueError): - s.interpolate(method='krogh') - - def test_interp_datetime64(self): - tm._skip_if_no_scipy() - df = Series([1, np.nan, 3], index=date_range('1/1/2000', periods=3)) - result = df.interpolate(method='nearest') - expected = Series([1., 1., 3.], - index=date_range('1/1/2000', periods=3)) - assert_series_equal(result, expected) - - def test_interp_limit_no_nans(self): - # GH 7173 - s = pd.Series([1., 2., 3.]) - result = s.interpolate(limit=1) - expected = s - assert_series_equal(result, expected) - def test_describe(self): self.series.describe() self.ts.describe() @@ -1228,215 +917,6 @@ def test_get_numeric_data_preserve_dtype(self): expected = DataFrame(index=[0, 1, 2], dtype=object) self._compare(result, expected) - def test_interp_basic(self): - df = DataFrame({'A': [1, 2, np.nan, 4], - 'B': [1, 4, 9, np.nan], - 'C': [1, 2, 3, 5], - 'D': list('abcd')}) - expected = DataFrame({'A': [1., 2., 3., 4.], - 'B': [1., 4., 9., 9.], - 'C': [1, 2, 3, 5], - 'D': list('abcd')}) - result = df.interpolate() - assert_frame_equal(result, expected) - - result = df.set_index('C').interpolate() - expected = df.set_index('C') - expected.loc[3, 'A'] = 3 - expected.loc[5, 'B'] = 9 - assert_frame_equal(result, expected) - - def test_interp_bad_method(self): - df = DataFrame({'A': [1, 2, np.nan, 4], - 'B': [1, 4, 9, np.nan], - 'C': [1, 2, 3, 5], - 'D': list('abcd')}) - with tm.assertRaises(ValueError): - df.interpolate(method='not_a_method') - - def test_interp_combo(self): - df = DataFrame({'A': [1., 2., np.nan, 4.], - 'B': [1, 4, 9, np.nan], - 'C': [1, 2, 3, 5], - 'D': list('abcd')}) - - result = df['A'].interpolate() - expected = Series([1., 2., 3., 4.], name='A') - assert_series_equal(result, expected) - - result = df['A'].interpolate(downcast='infer') - expected = Series([1, 2, 3, 4], name='A') - assert_series_equal(result, expected) - - def test_interp_nan_idx(self): - df = DataFrame({'A': [1, 2, np.nan, 4], 'B': [np.nan, 2, 3, 4]}) - df = df.set_index('A') - with tm.assertRaises(NotImplementedError): - df.interpolate(method='values') - - def test_interp_various(self): - tm._skip_if_no_scipy() - df = DataFrame({'A': [1, 2, np.nan, 4, 5, np.nan, 7], - 'C': [1, 2, 3, 5, 8, 13, 21]}) - df = df.set_index('C') - expected = df.copy() - result = df.interpolate(method='polynomial', order=1) - - expected.A.loc[3] = 2.66666667 - expected.A.loc[13] = 5.76923076 - assert_frame_equal(result, expected) - - result = df.interpolate(method='cubic') - expected.A.loc[3] = 2.81621174 - expected.A.loc[13] = 5.64146581 - assert_frame_equal(result, expected) - - result = df.interpolate(method='nearest') - expected.A.loc[3] = 2 - expected.A.loc[13] = 5 - assert_frame_equal(result, expected, check_dtype=False) - - result = df.interpolate(method='quadratic') - expected.A.loc[3] = 2.82533638 - expected.A.loc[13] = 6.02817974 - assert_frame_equal(result, expected) - - result = df.interpolate(method='slinear') - expected.A.loc[3] = 2.66666667 - expected.A.loc[13] = 5.76923077 - assert_frame_equal(result, expected) - - result = df.interpolate(method='zero') - expected.A.loc[3] = 2. - expected.A.loc[13] = 5 - assert_frame_equal(result, expected, check_dtype=False) - - result = df.interpolate(method='quadratic') - expected.A.loc[3] = 2.82533638 - expected.A.loc[13] = 6.02817974 - assert_frame_equal(result, expected) - - def test_interp_alt_scipy(self): - tm._skip_if_no_scipy() - df = DataFrame({'A': [1, 2, np.nan, 4, 5, np.nan, 7], - 'C': [1, 2, 3, 5, 8, 13, 21]}) - result = df.interpolate(method='barycentric') - expected = df.copy() - expected.ix[2, 'A'] = 3 - expected.ix[5, 'A'] = 6 - assert_frame_equal(result, expected) - - result = df.interpolate(method='barycentric', downcast='infer') - assert_frame_equal(result, expected.astype(np.int64)) - - result = df.interpolate(method='krogh') - expectedk = df.copy() - expectedk['A'] = expected['A'] - assert_frame_equal(result, expectedk) - - _skip_if_no_pchip() - import scipy - result = df.interpolate(method='pchip') - expected.ix[2, 'A'] = 3 - - if LooseVersion(scipy.__version__) >= '0.17.0': - expected.ix[5, 'A'] = 6.0 - else: - expected.ix[5, 'A'] = 6.125 - - assert_frame_equal(result, expected) - - def test_interp_rowwise(self): - df = DataFrame({0: [1, 2, np.nan, 4], - 1: [2, 3, 4, np.nan], - 2: [np.nan, 4, 5, 6], - 3: [4, np.nan, 6, 7], - 4: [1, 2, 3, 4]}) - result = df.interpolate(axis=1) - expected = df.copy() - expected.loc[3, 1] = 5 - expected.loc[0, 2] = 3 - expected.loc[1, 3] = 3 - expected[4] = expected[4].astype(np.float64) - assert_frame_equal(result, expected) - - # scipy route - tm._skip_if_no_scipy() - result = df.interpolate(axis=1, method='values') - assert_frame_equal(result, expected) - - result = df.interpolate(axis=0) - expected = df.interpolate() - assert_frame_equal(result, expected) - - def test_rowwise_alt(self): - df = DataFrame({0: [0, .5, 1., np.nan, 4, 8, np.nan, np.nan, 64], - 1: [1, 2, 3, 4, 3, 2, 1, 0, -1]}) - df.interpolate(axis=0) - - def test_interp_leading_nans(self): - df = DataFrame({"A": [np.nan, np.nan, .5, .25, 0], - "B": [np.nan, -3, -3.5, np.nan, -4]}) - result = df.interpolate() - expected = df.copy() - expected['B'].loc[3] = -3.75 - assert_frame_equal(result, expected) - - tm._skip_if_no_scipy() - result = df.interpolate(method='polynomial', order=1) - assert_frame_equal(result, expected) - - def test_interp_raise_on_only_mixed(self): - df = DataFrame({'A': [1, 2, np.nan, 4], - 'B': ['a', 'b', 'c', 'd'], - 'C': [np.nan, 2, 5, 7], - 'D': [np.nan, np.nan, 9, 9], - 'E': [1, 2, 3, 4]}) - with tm.assertRaises(TypeError): - df.interpolate(axis=1) - - def test_interp_inplace(self): - df = DataFrame({'a': [1., 2., np.nan, 4.]}) - expected = DataFrame({'a': [1., 2., 3., 4.]}) - result = df.copy() - result['a'].interpolate(inplace=True) - assert_frame_equal(result, expected) - - result = df.copy() - result['a'].interpolate(inplace=True, downcast='infer') - assert_frame_equal(result, expected.astype('int64')) - - def test_interp_inplace_row(self): - # GH 10395 - result = DataFrame({'a': [1., 2., 3., 4.], - 'b': [np.nan, 2., 3., 4.], - 'c': [3, 2, 2, 2]}) - expected = result.interpolate(method='linear', axis=1, inplace=False) - result.interpolate(method='linear', axis=1, inplace=True) - assert_frame_equal(result, expected) - - def test_interp_ignore_all_good(self): - # GH - df = DataFrame({'A': [1, 2, np.nan, 4], - 'B': [1, 2, 3, 4], - 'C': [1., 2., np.nan, 4.], - 'D': [1., 2., 3., 4.]}) - expected = DataFrame({'A': np.array( - [1, 2, 3, 4], dtype='float64'), - 'B': np.array( - [1, 2, 3, 4], dtype='int64'), - 'C': np.array( - [1., 2., 3, 4.], dtype='float64'), - 'D': np.array( - [1., 2., 3., 4.], dtype='float64')}) - - result = df.interpolate(downcast=None) - assert_frame_equal(result, expected) - - # all good - result = df[['B', 'D']].interpolate(downcast=None) - assert_frame_equal(result, df[['B', 'D']]) - def test_describe(self): tm.makeDataFrame().describe() tm.makeMixedDataFrame().describe() @@ -1683,61 +1163,6 @@ def test_describe_multi_index_df_column_names(self): self.assertTrue(non_hierarchical_index_df.describe().columns.names == ['A']) - def test_no_order(self): - tm._skip_if_no_scipy() - s = Series([0, 1, np.nan, 3]) - with tm.assertRaises(ValueError): - s.interpolate(method='polynomial') - with tm.assertRaises(ValueError): - s.interpolate(method='spline') - - def test_spline(self): - tm._skip_if_no_scipy() - s = Series([1, 2, np.nan, 4, 5, np.nan, 7]) - result = s.interpolate(method='spline', order=1) - expected = Series([1., 2., 3., 4., 5., 6., 7.]) - assert_series_equal(result, expected) - - def test_spline_extrapolate(self): - tm.skip_if_no_package( - 'scipy', '0.15', - 'setting ext on scipy.interpolate.UnivariateSpline') - s = Series([1, 2, 3, 4, np.nan, 6, np.nan]) - result3 = s.interpolate(method='spline', order=1, ext=3) - expected3 = Series([1., 2., 3., 4., 5., 6., 6.]) - assert_series_equal(result3, expected3) - - result1 = s.interpolate(method='spline', order=1, ext=0) - expected1 = Series([1., 2., 3., 4., 5., 6., 7.]) - assert_series_equal(result1, expected1) - - def test_spline_smooth(self): - tm._skip_if_no_scipy() - s = Series([1, 2, np.nan, 4, 5.1, np.nan, 7]) - self.assertNotEqual(s.interpolate(method='spline', order=3, s=0)[5], - s.interpolate(method='spline', order=3)[5]) - - def test_spline_interpolation(self): - tm._skip_if_no_scipy() - - s = Series(np.arange(10) ** 2) - s[np.random.randint(0, 9, 3)] = np.nan - result1 = s.interpolate(method='spline', order=1) - expected1 = s.interpolate(method='spline', order=1) - assert_series_equal(result1, expected1) - - # GH #10633 - def test_spline_error(self): - tm._skip_if_no_scipy() - - s = pd.Series(np.arange(10) ** 2) - s[np.random.randint(0, 9, 3)] = np.nan - with tm.assertRaises(ValueError): - s.interpolate(method='spline') - - with tm.assertRaises(ValueError): - s.interpolate(method='spline', order=0) - def test_metadata_propagation_indiv(self): # groupby