From da891dea902d90e51e705df69ad6d350b585aea0 Mon Sep 17 00:00:00 2001 From: Matthew Roeschke Date: Wed, 20 Oct 2021 22:20:02 -0700 Subject: [PATCH] TST: Move some window/moments tests to test_ewm --- .../moments/test_moments_consistency_ewm.py | 52 --- .../tests/window/moments/test_moments_ewm.py | 291 --------------- pandas/tests/window/test_ewm.py | 340 ++++++++++++++++++ 3 files changed, 340 insertions(+), 343 deletions(-) diff --git a/pandas/tests/window/moments/test_moments_consistency_ewm.py b/pandas/tests/window/moments/test_moments_consistency_ewm.py index c79d02fd3237e..b41d2ec23a52d 100644 --- a/pandas/tests/window/moments/test_moments_consistency_ewm.py +++ b/pandas/tests/window/moments/test_moments_consistency_ewm.py @@ -18,58 +18,6 @@ def test_ewm_pairwise_cov_corr(func, frame): tm.assert_series_equal(result, expected, check_names=False) -@pytest.mark.parametrize("name", ["cov", "corr"]) -def test_ewm_corr_cov(name): - A = Series(np.random.randn(50), index=np.arange(50)) - B = A[2:] + np.random.randn(48) - - A[:10] = np.NaN - B[-10:] = np.NaN - - result = getattr(A.ewm(com=20, min_periods=5), name)(B) - assert np.isnan(result.values[:14]).all() - assert not np.isnan(result.values[14:]).any() - - -@pytest.mark.parametrize("min_periods", [0, 1, 2]) -@pytest.mark.parametrize("name", ["cov", "corr"]) -def test_ewm_corr_cov_min_periods(name, min_periods): - # GH 7898 - A = Series(np.random.randn(50), index=np.arange(50)) - B = A[2:] + np.random.randn(48) - - A[:10] = np.NaN - B[-10:] = np.NaN - - result = getattr(A.ewm(com=20, min_periods=min_periods), name)(B) - # binary functions (ewmcov, ewmcorr) with bias=False require at - # least two values - assert np.isnan(result.values[:11]).all() - assert not np.isnan(result.values[11:]).any() - - # check series of length 0 - empty = Series([], dtype=np.float64) - result = getattr(empty.ewm(com=50, min_periods=min_periods), name)(empty) - tm.assert_series_equal(result, empty) - - # check series of length 1 - result = getattr(Series([1.0]).ewm(com=50, min_periods=min_periods), name)( - Series([1.0]) - ) - tm.assert_series_equal(result, Series([np.NaN])) - - -@pytest.mark.parametrize("name", ["cov", "corr"]) -def test_different_input_array_raise_exception(name): - A = Series(np.random.randn(50), index=np.arange(50)) - A[:10] = np.NaN - - msg = "other must be a DataFrame or Series" - # exception raised is Exception - with pytest.raises(ValueError, match=msg): - getattr(A.ewm(com=20, min_periods=5), name)(np.random.randn(50)) - - def create_mock_weights(obj, com, adjust, ignore_na): if isinstance(obj, DataFrame): if not len(obj.columns): diff --git a/pandas/tests/window/moments/test_moments_ewm.py b/pandas/tests/window/moments/test_moments_ewm.py index a7b1d3fbca3fb..f87ff654e554a 100644 --- a/pandas/tests/window/moments/test_moments_ewm.py +++ b/pandas/tests/window/moments/test_moments_ewm.py @@ -1,4 +1,3 @@ -import numpy as np import pytest from pandas import ( @@ -20,187 +19,6 @@ def test_ewma_frame(frame, name): assert isinstance(frame_result, DataFrame) -def test_ewma_adjust(): - vals = Series(np.zeros(1000)) - vals[5] = 1 - result = vals.ewm(span=100, adjust=False).mean().sum() - assert np.abs(result - 1) < 1e-2 - - -@pytest.mark.parametrize("adjust", [True, False]) -@pytest.mark.parametrize("ignore_na", [True, False]) -def test_ewma_cases(adjust, ignore_na): - # try adjust/ignore_na args matrix - - s = Series([1.0, 2.0, 4.0, 8.0]) - - if adjust: - expected = Series([1.0, 1.6, 2.736842, 4.923077]) - else: - expected = Series([1.0, 1.333333, 2.222222, 4.148148]) - - result = s.ewm(com=2.0, adjust=adjust, ignore_na=ignore_na).mean() - tm.assert_series_equal(result, expected) - - -def test_ewma_nan_handling(): - s = Series([1.0] + [np.nan] * 5 + [1.0]) - result = s.ewm(com=5).mean() - tm.assert_series_equal(result, Series([1.0] * len(s))) - - s = Series([np.nan] * 2 + [1.0] + [np.nan] * 2 + [1.0]) - result = s.ewm(com=5).mean() - tm.assert_series_equal(result, Series([np.nan] * 2 + [1.0] * 4)) - - -@pytest.mark.parametrize( - "s, adjust, ignore_na, w", - [ - ( - Series([np.nan, 1.0, 101.0]), - True, - False, - [np.nan, (1.0 - (1.0 / (1.0 + 2.0))), 1.0], - ), - ( - Series([np.nan, 1.0, 101.0]), - True, - True, - [np.nan, (1.0 - (1.0 / (1.0 + 2.0))), 1.0], - ), - ( - Series([np.nan, 1.0, 101.0]), - False, - False, - [np.nan, (1.0 - (1.0 / (1.0 + 2.0))), (1.0 / (1.0 + 2.0))], - ), - ( - Series([np.nan, 1.0, 101.0]), - False, - True, - [np.nan, (1.0 - (1.0 / (1.0 + 2.0))), (1.0 / (1.0 + 2.0))], - ), - ( - Series([1.0, np.nan, 101.0]), - True, - False, - [(1.0 - (1.0 / (1.0 + 2.0))) ** 2, np.nan, 1.0], - ), - ( - Series([1.0, np.nan, 101.0]), - True, - True, - [(1.0 - (1.0 / (1.0 + 2.0))), np.nan, 1.0], - ), - ( - Series([1.0, np.nan, 101.0]), - False, - False, - [(1.0 - (1.0 / (1.0 + 2.0))) ** 2, np.nan, (1.0 / (1.0 + 2.0))], - ), - ( - Series([1.0, np.nan, 101.0]), - False, - True, - [(1.0 - (1.0 / (1.0 + 2.0))), np.nan, (1.0 / (1.0 + 2.0))], - ), - ( - Series([np.nan, 1.0, np.nan, np.nan, 101.0, np.nan]), - True, - False, - [np.nan, (1.0 - (1.0 / (1.0 + 2.0))) ** 3, np.nan, np.nan, 1.0, np.nan], - ), - ( - Series([np.nan, 1.0, np.nan, np.nan, 101.0, np.nan]), - True, - True, - [np.nan, (1.0 - (1.0 / (1.0 + 2.0))), np.nan, np.nan, 1.0, np.nan], - ), - ( - Series([np.nan, 1.0, np.nan, np.nan, 101.0, np.nan]), - False, - False, - [ - np.nan, - (1.0 - (1.0 / (1.0 + 2.0))) ** 3, - np.nan, - np.nan, - (1.0 / (1.0 + 2.0)), - np.nan, - ], - ), - ( - Series([np.nan, 1.0, np.nan, np.nan, 101.0, np.nan]), - False, - True, - [ - np.nan, - (1.0 - (1.0 / (1.0 + 2.0))), - np.nan, - np.nan, - (1.0 / (1.0 + 2.0)), - np.nan, - ], - ), - ( - Series([1.0, np.nan, 101.0, 50.0]), - True, - False, - [ - (1.0 - (1.0 / (1.0 + 2.0))) ** 3, - np.nan, - (1.0 - (1.0 / (1.0 + 2.0))), - 1.0, - ], - ), - ( - Series([1.0, np.nan, 101.0, 50.0]), - True, - True, - [ - (1.0 - (1.0 / (1.0 + 2.0))) ** 2, - np.nan, - (1.0 - (1.0 / (1.0 + 2.0))), - 1.0, - ], - ), - ( - Series([1.0, np.nan, 101.0, 50.0]), - False, - False, - [ - (1.0 - (1.0 / (1.0 + 2.0))) ** 3, - np.nan, - (1.0 - (1.0 / (1.0 + 2.0))) * (1.0 / (1.0 + 2.0)), - (1.0 / (1.0 + 2.0)) - * ((1.0 - (1.0 / (1.0 + 2.0))) ** 2 + (1.0 / (1.0 + 2.0))), - ], - ), - ( - Series([1.0, np.nan, 101.0, 50.0]), - False, - True, - [ - (1.0 - (1.0 / (1.0 + 2.0))) ** 2, - np.nan, - (1.0 - (1.0 / (1.0 + 2.0))) * (1.0 / (1.0 + 2.0)), - (1.0 / (1.0 + 2.0)), - ], - ), - ], -) -def test_ewma_nan_handling_cases(s, adjust, ignore_na, w): - # GH 7603 - expected = (s.multiply(w).cumsum() / Series(w).cumsum()).fillna(method="ffill") - result = s.ewm(com=2.0, adjust=adjust, ignore_na=ignore_na).mean() - - tm.assert_series_equal(result, expected) - if ignore_na is False: - # check that ignore_na defaults to False - result = s.ewm(com=2.0, adjust=adjust).mean() - tm.assert_series_equal(result, expected) - - def test_ewma_span_com_args(series): A = series.ewm(com=9.5).mean() B = series.ewm(span=20).mean() @@ -230,22 +48,6 @@ def test_ewma_halflife_arg(series): series.ewm() -def test_ewm_alpha(): - # GH 10789 - arr = np.random.randn(100) - locs = np.arange(20, 40) - arr[locs] = np.NaN - - s = Series(arr) - a = s.ewm(alpha=0.61722699889169674).mean() - b = s.ewm(com=0.62014947789973052).mean() - c = s.ewm(span=2.240298955799461).mean() - d = s.ewm(halflife=0.721792864318).mean() - tm.assert_series_equal(a, b) - tm.assert_series_equal(a, c) - tm.assert_series_equal(a, d) - - def test_ewm_alpha_arg(series): # GH 10789 s = series @@ -260,96 +62,3 @@ def test_ewm_alpha_arg(series): s.ewm(span=10.0, alpha=0.5) with pytest.raises(ValueError, match=msg): s.ewm(halflife=10.0, alpha=0.5) - - -def test_ewm_domain_checks(): - # GH 12492 - arr = np.random.randn(100) - locs = np.arange(20, 40) - arr[locs] = np.NaN - - s = Series(arr) - msg = "comass must satisfy: comass >= 0" - with pytest.raises(ValueError, match=msg): - s.ewm(com=-0.1) - s.ewm(com=0.0) - s.ewm(com=0.1) - - msg = "span must satisfy: span >= 1" - with pytest.raises(ValueError, match=msg): - s.ewm(span=-0.1) - with pytest.raises(ValueError, match=msg): - s.ewm(span=0.0) - with pytest.raises(ValueError, match=msg): - s.ewm(span=0.9) - s.ewm(span=1.0) - s.ewm(span=1.1) - - msg = "halflife must satisfy: halflife > 0" - with pytest.raises(ValueError, match=msg): - s.ewm(halflife=-0.1) - with pytest.raises(ValueError, match=msg): - s.ewm(halflife=0.0) - s.ewm(halflife=0.1) - - msg = "alpha must satisfy: 0 < alpha <= 1" - with pytest.raises(ValueError, match=msg): - s.ewm(alpha=-0.1) - with pytest.raises(ValueError, match=msg): - s.ewm(alpha=0.0) - s.ewm(alpha=0.1) - s.ewm(alpha=1.0) - with pytest.raises(ValueError, match=msg): - s.ewm(alpha=1.1) - - -@pytest.mark.parametrize("method", ["mean", "std", "var"]) -def test_ew_empty_series(method): - vals = Series([], dtype=np.float64) - - ewm = vals.ewm(3) - result = getattr(ewm, method)() - tm.assert_almost_equal(result, vals) - - -@pytest.mark.parametrize("min_periods", [0, 1]) -@pytest.mark.parametrize("name", ["mean", "var", "std"]) -def test_ew_min_periods(min_periods, name): - # excluding NaNs correctly - arr = np.random.randn(50) - arr[:10] = np.NaN - arr[-10:] = np.NaN - s = Series(arr) - - # check min_periods - # GH 7898 - result = getattr(s.ewm(com=50, min_periods=2), name)() - assert result[:11].isna().all() - assert not result[11:].isna().any() - - result = getattr(s.ewm(com=50, min_periods=min_periods), name)() - if name == "mean": - assert result[:10].isna().all() - assert not result[10:].isna().any() - else: - # ewm.std, ewm.var (with bias=False) require at least - # two values - assert result[:11].isna().all() - assert not result[11:].isna().any() - - # check series of length 0 - result = getattr(Series(dtype=object).ewm(com=50, min_periods=min_periods), name)() - tm.assert_series_equal(result, Series(dtype="float64")) - - # check series of length 1 - result = getattr(Series([1.0]).ewm(50, min_periods=min_periods), name)() - if name == "mean": - tm.assert_series_equal(result, Series([1.0])) - else: - # ewm.std, ewm.var with bias=False require at least - # two values - tm.assert_series_equal(result, Series([np.NaN])) - - # pass in ints - result2 = getattr(Series(np.arange(50)).ewm(span=10), name)() - assert result2.dtype == np.float_ diff --git a/pandas/tests/window/test_ewm.py b/pandas/tests/window/test_ewm.py index 4cb5d0342572b..4ca090fba4955 100644 --- a/pandas/tests/window/test_ewm.py +++ b/pandas/tests/window/test_ewm.py @@ -260,3 +260,343 @@ def test_ewm_sum(expected_data, ignore): result = data.ewm(alpha=0.5, ignore_na=ignore).sum() expected = Series(expected_data) tm.assert_series_equal(result, expected) + + +def test_ewma_adjust(): + vals = Series(np.zeros(1000)) + vals[5] = 1 + result = vals.ewm(span=100, adjust=False).mean().sum() + assert np.abs(result - 1) < 1e-2 + + +def test_ewma_cases(adjust, ignore_na): + # try adjust/ignore_na args matrix + + s = Series([1.0, 2.0, 4.0, 8.0]) + + if adjust: + expected = Series([1.0, 1.6, 2.736842, 4.923077]) + else: + expected = Series([1.0, 1.333333, 2.222222, 4.148148]) + + result = s.ewm(com=2.0, adjust=adjust, ignore_na=ignore_na).mean() + tm.assert_series_equal(result, expected) + + +def test_ewma_nan_handling(): + s = Series([1.0] + [np.nan] * 5 + [1.0]) + result = s.ewm(com=5).mean() + tm.assert_series_equal(result, Series([1.0] * len(s))) + + s = Series([np.nan] * 2 + [1.0] + [np.nan] * 2 + [1.0]) + result = s.ewm(com=5).mean() + tm.assert_series_equal(result, Series([np.nan] * 2 + [1.0] * 4)) + + +@pytest.mark.parametrize( + "s, adjust, ignore_na, w", + [ + ( + Series([np.nan, 1.0, 101.0]), + True, + False, + [np.nan, (1.0 - (1.0 / (1.0 + 2.0))), 1.0], + ), + ( + Series([np.nan, 1.0, 101.0]), + True, + True, + [np.nan, (1.0 - (1.0 / (1.0 + 2.0))), 1.0], + ), + ( + Series([np.nan, 1.0, 101.0]), + False, + False, + [np.nan, (1.0 - (1.0 / (1.0 + 2.0))), (1.0 / (1.0 + 2.0))], + ), + ( + Series([np.nan, 1.0, 101.0]), + False, + True, + [np.nan, (1.0 - (1.0 / (1.0 + 2.0))), (1.0 / (1.0 + 2.0))], + ), + ( + Series([1.0, np.nan, 101.0]), + True, + False, + [(1.0 - (1.0 / (1.0 + 2.0))) ** 2, np.nan, 1.0], + ), + ( + Series([1.0, np.nan, 101.0]), + True, + True, + [(1.0 - (1.0 / (1.0 + 2.0))), np.nan, 1.0], + ), + ( + Series([1.0, np.nan, 101.0]), + False, + False, + [(1.0 - (1.0 / (1.0 + 2.0))) ** 2, np.nan, (1.0 / (1.0 + 2.0))], + ), + ( + Series([1.0, np.nan, 101.0]), + False, + True, + [(1.0 - (1.0 / (1.0 + 2.0))), np.nan, (1.0 / (1.0 + 2.0))], + ), + ( + Series([np.nan, 1.0, np.nan, np.nan, 101.0, np.nan]), + True, + False, + [np.nan, (1.0 - (1.0 / (1.0 + 2.0))) ** 3, np.nan, np.nan, 1.0, np.nan], + ), + ( + Series([np.nan, 1.0, np.nan, np.nan, 101.0, np.nan]), + True, + True, + [np.nan, (1.0 - (1.0 / (1.0 + 2.0))), np.nan, np.nan, 1.0, np.nan], + ), + ( + Series([np.nan, 1.0, np.nan, np.nan, 101.0, np.nan]), + False, + False, + [ + np.nan, + (1.0 - (1.0 / (1.0 + 2.0))) ** 3, + np.nan, + np.nan, + (1.0 / (1.0 + 2.0)), + np.nan, + ], + ), + ( + Series([np.nan, 1.0, np.nan, np.nan, 101.0, np.nan]), + False, + True, + [ + np.nan, + (1.0 - (1.0 / (1.0 + 2.0))), + np.nan, + np.nan, + (1.0 / (1.0 + 2.0)), + np.nan, + ], + ), + ( + Series([1.0, np.nan, 101.0, 50.0]), + True, + False, + [ + (1.0 - (1.0 / (1.0 + 2.0))) ** 3, + np.nan, + (1.0 - (1.0 / (1.0 + 2.0))), + 1.0, + ], + ), + ( + Series([1.0, np.nan, 101.0, 50.0]), + True, + True, + [ + (1.0 - (1.0 / (1.0 + 2.0))) ** 2, + np.nan, + (1.0 - (1.0 / (1.0 + 2.0))), + 1.0, + ], + ), + ( + Series([1.0, np.nan, 101.0, 50.0]), + False, + False, + [ + (1.0 - (1.0 / (1.0 + 2.0))) ** 3, + np.nan, + (1.0 - (1.0 / (1.0 + 2.0))) * (1.0 / (1.0 + 2.0)), + (1.0 / (1.0 + 2.0)) + * ((1.0 - (1.0 / (1.0 + 2.0))) ** 2 + (1.0 / (1.0 + 2.0))), + ], + ), + ( + Series([1.0, np.nan, 101.0, 50.0]), + False, + True, + [ + (1.0 - (1.0 / (1.0 + 2.0))) ** 2, + np.nan, + (1.0 - (1.0 / (1.0 + 2.0))) * (1.0 / (1.0 + 2.0)), + (1.0 / (1.0 + 2.0)), + ], + ), + ], +) +def test_ewma_nan_handling_cases(s, adjust, ignore_na, w): + # GH 7603 + expected = (s.multiply(w).cumsum() / Series(w).cumsum()).fillna(method="ffill") + result = s.ewm(com=2.0, adjust=adjust, ignore_na=ignore_na).mean() + + tm.assert_series_equal(result, expected) + if ignore_na is False: + # check that ignore_na defaults to False + result = s.ewm(com=2.0, adjust=adjust).mean() + tm.assert_series_equal(result, expected) + + +def test_ewm_alpha(): + # GH 10789 + arr = np.random.randn(100) + locs = np.arange(20, 40) + arr[locs] = np.NaN + + s = Series(arr) + a = s.ewm(alpha=0.61722699889169674).mean() + b = s.ewm(com=0.62014947789973052).mean() + c = s.ewm(span=2.240298955799461).mean() + d = s.ewm(halflife=0.721792864318).mean() + tm.assert_series_equal(a, b) + tm.assert_series_equal(a, c) + tm.assert_series_equal(a, d) + + +def test_ewm_domain_checks(): + # GH 12492 + arr = np.random.randn(100) + locs = np.arange(20, 40) + arr[locs] = np.NaN + + s = Series(arr) + msg = "comass must satisfy: comass >= 0" + with pytest.raises(ValueError, match=msg): + s.ewm(com=-0.1) + s.ewm(com=0.0) + s.ewm(com=0.1) + + msg = "span must satisfy: span >= 1" + with pytest.raises(ValueError, match=msg): + s.ewm(span=-0.1) + with pytest.raises(ValueError, match=msg): + s.ewm(span=0.0) + with pytest.raises(ValueError, match=msg): + s.ewm(span=0.9) + s.ewm(span=1.0) + s.ewm(span=1.1) + + msg = "halflife must satisfy: halflife > 0" + with pytest.raises(ValueError, match=msg): + s.ewm(halflife=-0.1) + with pytest.raises(ValueError, match=msg): + s.ewm(halflife=0.0) + s.ewm(halflife=0.1) + + msg = "alpha must satisfy: 0 < alpha <= 1" + with pytest.raises(ValueError, match=msg): + s.ewm(alpha=-0.1) + with pytest.raises(ValueError, match=msg): + s.ewm(alpha=0.0) + s.ewm(alpha=0.1) + s.ewm(alpha=1.0) + with pytest.raises(ValueError, match=msg): + s.ewm(alpha=1.1) + + +@pytest.mark.parametrize("method", ["mean", "std", "var"]) +def test_ew_empty_series(method): + vals = Series([], dtype=np.float64) + + ewm = vals.ewm(3) + result = getattr(ewm, method)() + tm.assert_almost_equal(result, vals) + + +@pytest.mark.parametrize("min_periods", [0, 1]) +@pytest.mark.parametrize("name", ["mean", "var", "std"]) +def test_ew_min_periods(min_periods, name): + # excluding NaNs correctly + arr = np.random.randn(50) + arr[:10] = np.NaN + arr[-10:] = np.NaN + s = Series(arr) + + # check min_periods + # GH 7898 + result = getattr(s.ewm(com=50, min_periods=2), name)() + assert result[:11].isna().all() + assert not result[11:].isna().any() + + result = getattr(s.ewm(com=50, min_periods=min_periods), name)() + if name == "mean": + assert result[:10].isna().all() + assert not result[10:].isna().any() + else: + # ewm.std, ewm.var (with bias=False) require at least + # two values + assert result[:11].isna().all() + assert not result[11:].isna().any() + + # check series of length 0 + result = getattr(Series(dtype=object).ewm(com=50, min_periods=min_periods), name)() + tm.assert_series_equal(result, Series(dtype="float64")) + + # check series of length 1 + result = getattr(Series([1.0]).ewm(50, min_periods=min_periods), name)() + if name == "mean": + tm.assert_series_equal(result, Series([1.0])) + else: + # ewm.std, ewm.var with bias=False require at least + # two values + tm.assert_series_equal(result, Series([np.NaN])) + + # pass in ints + result2 = getattr(Series(np.arange(50)).ewm(span=10), name)() + assert result2.dtype == np.float_ + + +@pytest.mark.parametrize("name", ["cov", "corr"]) +def test_ewm_corr_cov(name): + A = Series(np.random.randn(50), index=np.arange(50)) + B = A[2:] + np.random.randn(48) + + A[:10] = np.NaN + B[-10:] = np.NaN + + result = getattr(A.ewm(com=20, min_periods=5), name)(B) + assert np.isnan(result.values[:14]).all() + assert not np.isnan(result.values[14:]).any() + + +@pytest.mark.parametrize("min_periods", [0, 1, 2]) +@pytest.mark.parametrize("name", ["cov", "corr"]) +def test_ewm_corr_cov_min_periods(name, min_periods): + # GH 7898 + A = Series(np.random.randn(50), index=np.arange(50)) + B = A[2:] + np.random.randn(48) + + A[:10] = np.NaN + B[-10:] = np.NaN + + result = getattr(A.ewm(com=20, min_periods=min_periods), name)(B) + # binary functions (ewmcov, ewmcorr) with bias=False require at + # least two values + assert np.isnan(result.values[:11]).all() + assert not np.isnan(result.values[11:]).any() + + # check series of length 0 + empty = Series([], dtype=np.float64) + result = getattr(empty.ewm(com=50, min_periods=min_periods), name)(empty) + tm.assert_series_equal(result, empty) + + # check series of length 1 + result = getattr(Series([1.0]).ewm(com=50, min_periods=min_periods), name)( + Series([1.0]) + ) + tm.assert_series_equal(result, Series([np.NaN])) + + +@pytest.mark.parametrize("name", ["cov", "corr"]) +def test_different_input_array_raise_exception(name): + A = Series(np.random.randn(50), index=np.arange(50)) + A[:10] = np.NaN + + msg = "other must be a DataFrame or Series" + # exception raised is Exception + with pytest.raises(ValueError, match=msg): + getattr(A.ewm(com=20, min_periods=5), name)(np.random.randn(50))