From 6151181c2cc0a2019d17f74cf85a09802bc39c4a Mon Sep 17 00:00:00 2001 From: KalyanGokhale <4734245+KalyanGokhale@users.noreply.github.com> Date: Fri, 15 Jun 2018 17:42:16 +0530 Subject: [PATCH 01/20] Initial commit GH21441 --- doc/source/whatsnew/v0.23.2.txt | 6 +++++- pandas/core/generic.py | 14 +++++++------- pandas/tests/generic/test_generic.py | 10 ++++++++++ 3 files changed, 22 insertions(+), 8 deletions(-) diff --git a/doc/source/whatsnew/v0.23.2.txt b/doc/source/whatsnew/v0.23.2.txt index 79a4c3da2ffa4..1edeb3662fc82 100644 --- a/doc/source/whatsnew/v0.23.2.txt +++ b/doc/source/whatsnew/v0.23.2.txt @@ -16,8 +16,12 @@ and bug fixes. We recommend that all users upgrade to this version. Fixed Regressions ~~~~~~~~~~~~~~~~~ +- - -- + +**Other Fixes** + +- Bug in :meth:`first_valid_index` that raised for row index with duplicate values (:issue:`21441`) .. _whatsnew_0232.performance: diff --git a/pandas/core/generic.py b/pandas/core/generic.py index 32f64b1d3e05c..a060e5b2850c9 100644 --- a/pandas/core/generic.py +++ b/pandas/core/generic.py @@ -8970,17 +8970,17 @@ def _find_valid_index(self, how): if how == 'first': # First valid value case - i = is_valid.idxmax() - if not is_valid[i]: - return None - return i + i = is_valid.values[::].argmin() + idxpos = i elif how == 'last': # Last valid value case i = is_valid.values[::-1].argmax() - if not is_valid.iat[len(self) - i - 1]: - return None - return self.index[len(self) - i - 1] + idxpos = len(self) - i - 1 + + if not is_valid.iat[idxpos]: + return None + return self.index[idxpos] @Appender(_shared_docs['valid_index'] % {'position': 'first', 'klass': 'NDFrame'}) diff --git a/pandas/tests/generic/test_generic.py b/pandas/tests/generic/test_generic.py index 311c71f734945..618b2e8206baf 100644 --- a/pandas/tests/generic/test_generic.py +++ b/pandas/tests/generic/test_generic.py @@ -612,6 +612,16 @@ def test_pct_change(self, periods, fill_method, limit, exp): else: tm.assert_series_equal(res, Series(exp)) + @pytest.mark.parametrize("DF,idx,first_idx,last_idx", [ + ({'A': [1, 2, 3]}, [1, 1, 2], 1, 2), + ({'A': [1, 2, 3]}, [1, 2, 2], 1, 2), + ({'A': [1, 2, 3, 4]}, ['d', 'd', 'd', 'd'], 'd', 'd')]) + def test_valid_index(self, DF, idx, first_idx, last_idx): + # GH 21441 + df1 = pd.DataFrame(DF, index=idx) + assert first_idx == df1.first_valid_index() + assert last_idx == df1.last_valid_index() + class TestNDFrame(object): # tests that don't fit elsewhere From 003f801bb23e4ce6840817893aa4843c2245e456 Mon Sep 17 00:00:00 2001 From: KalyanGokhale <4734245+KalyanGokhale@users.noreply.github.com> Date: Sat, 16 Jun 2018 11:32:58 +0530 Subject: [PATCH 02/20] Removed failing test from closed PR GH21441 --- doc/source/whatsnew/v0.23.2.txt | 6 +----- pandas/core/generic.py | 7 ++----- pandas/tests/generic/test_generic.py | 10 +++++----- pandas/tests/test_resample.py | 7 ------- 4 files changed, 8 insertions(+), 22 deletions(-) diff --git a/doc/source/whatsnew/v0.23.2.txt b/doc/source/whatsnew/v0.23.2.txt index 1edeb3662fc82..0646c8c2d6d17 100644 --- a/doc/source/whatsnew/v0.23.2.txt +++ b/doc/source/whatsnew/v0.23.2.txt @@ -16,13 +16,9 @@ and bug fixes. We recommend that all users upgrade to this version. Fixed Regressions ~~~~~~~~~~~~~~~~~ -- +- Bug in :meth:`first_valid_index` raised for a row index with duplicate values (:issue:`21441`) - -**Other Fixes** - -- Bug in :meth:`first_valid_index` that raised for row index with duplicate values (:issue:`21441`) - .. _whatsnew_0232.performance: Performance Improvements diff --git a/pandas/core/generic.py b/pandas/core/generic.py index a060e5b2850c9..9356461951899 100644 --- a/pandas/core/generic.py +++ b/pandas/core/generic.py @@ -8970,13 +8970,10 @@ def _find_valid_index(self, how): if how == 'first': # First valid value case - i = is_valid.values[::].argmin() - idxpos = i - + idxpos = is_valid.values[::].argmin() elif how == 'last': # Last valid value case - i = is_valid.values[::-1].argmax() - idxpos = len(self) - i - 1 + idxpos = len(self) - 1 - is_valid.values[::-1].argmax() if not is_valid.iat[idxpos]: return None diff --git a/pandas/tests/generic/test_generic.py b/pandas/tests/generic/test_generic.py index 618b2e8206baf..9e28fd7af8a8c 100644 --- a/pandas/tests/generic/test_generic.py +++ b/pandas/tests/generic/test_generic.py @@ -612,15 +612,15 @@ def test_pct_change(self, periods, fill_method, limit, exp): else: tm.assert_series_equal(res, Series(exp)) - @pytest.mark.parametrize("DF,idx,first_idx,last_idx", [ + @pytest.mark.parametrize("data,index,expected_first,expected_last", [ ({'A': [1, 2, 3]}, [1, 1, 2], 1, 2), ({'A': [1, 2, 3]}, [1, 2, 2], 1, 2), ({'A': [1, 2, 3, 4]}, ['d', 'd', 'd', 'd'], 'd', 'd')]) - def test_valid_index(self, DF, idx, first_idx, last_idx): + def test_valid_index(self, data, index, expected_first, expected_last): # GH 21441 - df1 = pd.DataFrame(DF, index=idx) - assert first_idx == df1.first_valid_index() - assert last_idx == df1.last_valid_index() + df = DataFrame(data, index=index) + assert expected_first == df.first_valid_index() + assert expected_last == df.last_valid_index() class TestNDFrame(object): diff --git a/pandas/tests/test_resample.py b/pandas/tests/test_resample.py index c1257cce9a9a4..00610db795954 100644 --- a/pandas/tests/test_resample.py +++ b/pandas/tests/test_resample.py @@ -649,13 +649,6 @@ def test_asfreq_fill_value(self): expected = frame.reindex(new_index, fill_value=4.0) assert_frame_equal(result, expected) - def test_resample_interpolate(self): - # # 12925 - df = self.create_series().to_frame('value') - assert_frame_equal( - df.resample('1T').asfreq().interpolate(), - df.resample('1T').interpolate()) - def test_raises_on_non_datetimelike_index(self): # this is a non datetimelike index xp = DataFrame() From 952758ab726f2f738bc54d1bf5011f083493ceed Mon Sep 17 00:00:00 2001 From: KalyanGokhale <4734245+KalyanGokhale@users.noreply.github.com> Date: Sun, 17 Jun 2018 21:21:15 +0530 Subject: [PATCH 03/20] Updated logic GH21441 --- pandas/core/generic.py | 22 +++++++++++++--------- 1 file changed, 13 insertions(+), 9 deletions(-) diff --git a/pandas/core/generic.py b/pandas/core/generic.py index 9356461951899..b00a1b15e8303 100644 --- a/pandas/core/generic.py +++ b/pandas/core/generic.py @@ -8968,16 +8968,20 @@ def _find_valid_index(self, how): if self.ndim == 2: is_valid = is_valid.any(1) # reduce axis 1 - if how == 'first': - # First valid value case - idxpos = is_valid.values[::].argmin() - elif how == 'last': - # Last valid value case - idxpos = len(self) - 1 - is_valid.values[::-1].argmax() - - if not is_valid.iat[idxpos]: + if how == 'last': + is_valid.sort_index(ascending=False, inplace=True) + + idxpos = is_valid.idxmax() + + if isinstance(is_valid[idxpos], ABCSeries): + for chk_notna in is_valid[idxpos]: + chk_notna = True and chk_notna + else: + chk_notna = is_valid[idxpos] + + if not chk_notna: return None - return self.index[idxpos] + return idxpos @Appender(_shared_docs['valid_index'] % {'position': 'first', 'klass': 'NDFrame'}) From 1f4beb0c109fea466969d89caad544f097b18f7b Mon Sep 17 00:00:00 2001 From: KalyanGokhale <4734245+KalyanGokhale@users.noreply.github.com> Date: Sun, 17 Jun 2018 21:37:55 +0530 Subject: [PATCH 04/20] Changed logic from AND to OR for chk_notna GH21441 --- pandas/core/generic.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pandas/core/generic.py b/pandas/core/generic.py index b00a1b15e8303..db8a5577c2cfb 100644 --- a/pandas/core/generic.py +++ b/pandas/core/generic.py @@ -8975,7 +8975,7 @@ def _find_valid_index(self, how): if isinstance(is_valid[idxpos], ABCSeries): for chk_notna in is_valid[idxpos]: - chk_notna = True and chk_notna + chk_notna = False or chk_notna else: chk_notna = is_valid[idxpos] From 675201d9a0c14e81f206cb35f78fc10230e5dc39 Mon Sep 17 00:00:00 2001 From: KalyanGokhale <4734245+KalyanGokhale@users.noreply.github.com> Date: Sun, 17 Jun 2018 22:37:11 +0530 Subject: [PATCH 05/20] Series reverse logic for how == last, edits to whatsnew GH21441 --- doc/source/whatsnew/v0.23.2.txt | 2 +- pandas/core/generic.py | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/doc/source/whatsnew/v0.23.2.txt b/doc/source/whatsnew/v0.23.2.txt index 0646c8c2d6d17..1c7b64befdbb7 100644 --- a/doc/source/whatsnew/v0.23.2.txt +++ b/doc/source/whatsnew/v0.23.2.txt @@ -16,7 +16,7 @@ and bug fixes. We recommend that all users upgrade to this version. Fixed Regressions ~~~~~~~~~~~~~~~~~ -- Bug in :meth:`first_valid_index` raised for a row index with duplicate values (:issue:`21441`) +- Bug in both :meth:`DataFrame.first_valid_index` and :meth:`Series.first_valid_index` raised for a row index with duplicate values (:issue:`21441`) - .. _whatsnew_0232.performance: diff --git a/pandas/core/generic.py b/pandas/core/generic.py index db8a5577c2cfb..8fbc03a9563ae 100644 --- a/pandas/core/generic.py +++ b/pandas/core/generic.py @@ -8969,7 +8969,7 @@ def _find_valid_index(self, how): is_valid = is_valid.any(1) # reduce axis 1 if how == 'last': - is_valid.sort_index(ascending=False, inplace=True) + is_valid = is_valid[::-1] idxpos = is_valid.idxmax() From 06402799673ba432b6f5219bfc89b4b67801eaa8 Mon Sep 17 00:00:00 2001 From: KalyanGokhale <4734245+KalyanGokhale@users.noreply.github.com> Date: Mon, 18 Jun 2018 07:37:24 +0530 Subject: [PATCH 06/20] Reverting how==last logic to original, restoring deleted test GH21441 --- pandas/core/generic.py | 22 ++++++++++++---------- pandas/tests/test_resample.py | 7 +++++++ 2 files changed, 19 insertions(+), 10 deletions(-) diff --git a/pandas/core/generic.py b/pandas/core/generic.py index 8fbc03a9563ae..3ff1b1da1fdfb 100644 --- a/pandas/core/generic.py +++ b/pandas/core/generic.py @@ -8968,20 +8968,22 @@ def _find_valid_index(self, how): if self.ndim == 2: is_valid = is_valid.any(1) # reduce axis 1 - if how == 'last': - is_valid = is_valid[::-1] - - idxpos = is_valid.idxmax() + if how == 'first': + idx = is_valid.idxmax() + if isinstance(is_valid[idx], ABCSeries): + for chk_notna in is_valid[idx]: + chk_notna = False or chk_notna + else: + chk_notna = is_valid[idx] - if isinstance(is_valid[idxpos], ABCSeries): - for chk_notna in is_valid[idxpos]: - chk_notna = False or chk_notna - else: - chk_notna = is_valid[idxpos] + if how == 'last': + idxpos = len(self) - 1 - is_valid.values[::-1].argmax() + chk_notna = is_valid.iat[idxpos] + idx = self.index[idxpos] if not chk_notna: return None - return idxpos + return idx @Appender(_shared_docs['valid_index'] % {'position': 'first', 'klass': 'NDFrame'}) diff --git a/pandas/tests/test_resample.py b/pandas/tests/test_resample.py index 00610db795954..c1257cce9a9a4 100644 --- a/pandas/tests/test_resample.py +++ b/pandas/tests/test_resample.py @@ -649,6 +649,13 @@ def test_asfreq_fill_value(self): expected = frame.reindex(new_index, fill_value=4.0) assert_frame_equal(result, expected) + def test_resample_interpolate(self): + # # 12925 + df = self.create_series().to_frame('value') + assert_frame_equal( + df.resample('1T').asfreq().interpolate(), + df.resample('1T').interpolate()) + def test_raises_on_non_datetimelike_index(self): # this is a non datetimelike index xp = DataFrame() From 177a3f4e830a730a0a332c88d6239c042f86e891 Mon Sep 17 00:00:00 2001 From: KalyanGokhale <4734245+KalyanGokhale@users.noreply.github.com> Date: Tue, 19 Jun 2018 07:56:26 +0530 Subject: [PATCH 07/20] Fixed the if / for for chk_notna, added test cases for NA values in duplicate index GH21441 --- pandas/core/generic.py | 5 +++-- pandas/tests/generic/test_generic.py | 5 ++++- 2 files changed, 7 insertions(+), 3 deletions(-) diff --git a/pandas/core/generic.py b/pandas/core/generic.py index 3ff1b1da1fdfb..d572cf5e955ca 100644 --- a/pandas/core/generic.py +++ b/pandas/core/generic.py @@ -8971,8 +8971,9 @@ def _find_valid_index(self, how): if how == 'first': idx = is_valid.idxmax() if isinstance(is_valid[idx], ABCSeries): - for chk_notna in is_valid[idx]: - chk_notna = False or chk_notna + chk_notna = False + for idxinstance in is_valid[idx]: + chk_notna = chk_notna or idxinstance else: chk_notna = is_valid[idx] diff --git a/pandas/tests/generic/test_generic.py b/pandas/tests/generic/test_generic.py index 9e28fd7af8a8c..20472e06e2153 100644 --- a/pandas/tests/generic/test_generic.py +++ b/pandas/tests/generic/test_generic.py @@ -615,7 +615,10 @@ def test_pct_change(self, periods, fill_method, limit, exp): @pytest.mark.parametrize("data,index,expected_first,expected_last", [ ({'A': [1, 2, 3]}, [1, 1, 2], 1, 2), ({'A': [1, 2, 3]}, [1, 2, 2], 1, 2), - ({'A': [1, 2, 3, 4]}, ['d', 'd', 'd', 'd'], 'd', 'd')]) + ({'A': [1, 2, 3, 4]}, ['d', 'd', 'd', 'd'], 'd', 'd'), + ({'A': [1, np.nan, 3]}, [1, 1, 2], 1, 2), + ({'A': [np.nan, np.nan, 3]}, [1, 1, 2], 2, 2), + ({'A': [1, np.nan, 3]}, [1, 2, 2], 1, 2)]) def test_valid_index(self, data, index, expected_first, expected_last): # GH 21441 df = DataFrame(data, index=index) From e94aad5e170ae9e06a5a41297875826db88b3775 Mon Sep 17 00:00:00 2001 From: KalyanGokhale <4734245+KalyanGokhale@users.noreply.github.com> Date: Fri, 15 Jun 2018 17:42:16 +0530 Subject: [PATCH 08/20] Initial commit GH21441 --- doc/source/whatsnew/v0.23.2.txt | 3 ++- pandas/core/generic.py | 14 +++++++------- pandas/tests/generic/test_generic.py | 10 ++++++++++ 3 files changed, 19 insertions(+), 8 deletions(-) diff --git a/doc/source/whatsnew/v0.23.2.txt b/doc/source/whatsnew/v0.23.2.txt index 0f2c9c4756987..154f998b01c1a 100644 --- a/doc/source/whatsnew/v0.23.2.txt +++ b/doc/source/whatsnew/v0.23.2.txt @@ -16,8 +16,9 @@ and bug fixes. We recommend that all users upgrade to this version. Fixed Regressions ~~~~~~~~~~~~~~~~~ + - Fixed regression in :meth:`to_csv` when handling file-like object incorrectly (:issue:`21471`) -- +- .. _whatsnew_0232.performance: diff --git a/pandas/core/generic.py b/pandas/core/generic.py index 32f64b1d3e05c..a060e5b2850c9 100644 --- a/pandas/core/generic.py +++ b/pandas/core/generic.py @@ -8970,17 +8970,17 @@ def _find_valid_index(self, how): if how == 'first': # First valid value case - i = is_valid.idxmax() - if not is_valid[i]: - return None - return i + i = is_valid.values[::].argmin() + idxpos = i elif how == 'last': # Last valid value case i = is_valid.values[::-1].argmax() - if not is_valid.iat[len(self) - i - 1]: - return None - return self.index[len(self) - i - 1] + idxpos = len(self) - i - 1 + + if not is_valid.iat[idxpos]: + return None + return self.index[idxpos] @Appender(_shared_docs['valid_index'] % {'position': 'first', 'klass': 'NDFrame'}) diff --git a/pandas/tests/generic/test_generic.py b/pandas/tests/generic/test_generic.py index 311c71f734945..618b2e8206baf 100644 --- a/pandas/tests/generic/test_generic.py +++ b/pandas/tests/generic/test_generic.py @@ -612,6 +612,16 @@ def test_pct_change(self, periods, fill_method, limit, exp): else: tm.assert_series_equal(res, Series(exp)) + @pytest.mark.parametrize("DF,idx,first_idx,last_idx", [ + ({'A': [1, 2, 3]}, [1, 1, 2], 1, 2), + ({'A': [1, 2, 3]}, [1, 2, 2], 1, 2), + ({'A': [1, 2, 3, 4]}, ['d', 'd', 'd', 'd'], 'd', 'd')]) + def test_valid_index(self, DF, idx, first_idx, last_idx): + # GH 21441 + df1 = pd.DataFrame(DF, index=idx) + assert first_idx == df1.first_valid_index() + assert last_idx == df1.last_valid_index() + class TestNDFrame(object): # tests that don't fit elsewhere From ff58ffd0e77832e298fbda7183ca0666dba7e81b Mon Sep 17 00:00:00 2001 From: KalyanGokhale <4734245+KalyanGokhale@users.noreply.github.com> Date: Sat, 16 Jun 2018 11:32:58 +0530 Subject: [PATCH 09/20] Removed failing test from closed PR GH21441 --- doc/source/whatsnew/v0.23.2.txt | 1 - pandas/core/generic.py | 7 ++----- pandas/tests/generic/test_generic.py | 10 +++++----- pandas/tests/test_resample.py | 7 ------- 4 files changed, 7 insertions(+), 18 deletions(-) diff --git a/doc/source/whatsnew/v0.23.2.txt b/doc/source/whatsnew/v0.23.2.txt index 154f998b01c1a..7629b86e26bf7 100644 --- a/doc/source/whatsnew/v0.23.2.txt +++ b/doc/source/whatsnew/v0.23.2.txt @@ -16,7 +16,6 @@ and bug fixes. We recommend that all users upgrade to this version. Fixed Regressions ~~~~~~~~~~~~~~~~~ - - Fixed regression in :meth:`to_csv` when handling file-like object incorrectly (:issue:`21471`) - diff --git a/pandas/core/generic.py b/pandas/core/generic.py index a060e5b2850c9..9356461951899 100644 --- a/pandas/core/generic.py +++ b/pandas/core/generic.py @@ -8970,13 +8970,10 @@ def _find_valid_index(self, how): if how == 'first': # First valid value case - i = is_valid.values[::].argmin() - idxpos = i - + idxpos = is_valid.values[::].argmin() elif how == 'last': # Last valid value case - i = is_valid.values[::-1].argmax() - idxpos = len(self) - i - 1 + idxpos = len(self) - 1 - is_valid.values[::-1].argmax() if not is_valid.iat[idxpos]: return None diff --git a/pandas/tests/generic/test_generic.py b/pandas/tests/generic/test_generic.py index 618b2e8206baf..9e28fd7af8a8c 100644 --- a/pandas/tests/generic/test_generic.py +++ b/pandas/tests/generic/test_generic.py @@ -612,15 +612,15 @@ def test_pct_change(self, periods, fill_method, limit, exp): else: tm.assert_series_equal(res, Series(exp)) - @pytest.mark.parametrize("DF,idx,first_idx,last_idx", [ + @pytest.mark.parametrize("data,index,expected_first,expected_last", [ ({'A': [1, 2, 3]}, [1, 1, 2], 1, 2), ({'A': [1, 2, 3]}, [1, 2, 2], 1, 2), ({'A': [1, 2, 3, 4]}, ['d', 'd', 'd', 'd'], 'd', 'd')]) - def test_valid_index(self, DF, idx, first_idx, last_idx): + def test_valid_index(self, data, index, expected_first, expected_last): # GH 21441 - df1 = pd.DataFrame(DF, index=idx) - assert first_idx == df1.first_valid_index() - assert last_idx == df1.last_valid_index() + df = DataFrame(data, index=index) + assert expected_first == df.first_valid_index() + assert expected_last == df.last_valid_index() class TestNDFrame(object): diff --git a/pandas/tests/test_resample.py b/pandas/tests/test_resample.py index 6f0ad0535c6b4..42c7051437044 100644 --- a/pandas/tests/test_resample.py +++ b/pandas/tests/test_resample.py @@ -649,13 +649,6 @@ def test_asfreq_fill_value(self): expected = frame.reindex(new_index, fill_value=4.0) assert_frame_equal(result, expected) - def test_resample_interpolate(self): - # # 12925 - df = self.create_series().to_frame('value') - assert_frame_equal( - df.resample('1T').asfreq().interpolate(), - df.resample('1T').interpolate()) - def test_raises_on_non_datetimelike_index(self): # this is a non datetimelike index xp = DataFrame() From d326b0a778359be2d297f6c52ef6caad66b949cc Mon Sep 17 00:00:00 2001 From: KalyanGokhale <4734245+KalyanGokhale@users.noreply.github.com> Date: Sun, 17 Jun 2018 21:21:15 +0530 Subject: [PATCH 10/20] Updated logic GH21441 --- pandas/core/generic.py | 22 +++++++++++++--------- 1 file changed, 13 insertions(+), 9 deletions(-) diff --git a/pandas/core/generic.py b/pandas/core/generic.py index 9356461951899..b00a1b15e8303 100644 --- a/pandas/core/generic.py +++ b/pandas/core/generic.py @@ -8968,16 +8968,20 @@ def _find_valid_index(self, how): if self.ndim == 2: is_valid = is_valid.any(1) # reduce axis 1 - if how == 'first': - # First valid value case - idxpos = is_valid.values[::].argmin() - elif how == 'last': - # Last valid value case - idxpos = len(self) - 1 - is_valid.values[::-1].argmax() - - if not is_valid.iat[idxpos]: + if how == 'last': + is_valid.sort_index(ascending=False, inplace=True) + + idxpos = is_valid.idxmax() + + if isinstance(is_valid[idxpos], ABCSeries): + for chk_notna in is_valid[idxpos]: + chk_notna = True and chk_notna + else: + chk_notna = is_valid[idxpos] + + if not chk_notna: return None - return self.index[idxpos] + return idxpos @Appender(_shared_docs['valid_index'] % {'position': 'first', 'klass': 'NDFrame'}) From b53bb117550bddb1c95911eedd5086a1bf9669bd Mon Sep 17 00:00:00 2001 From: KalyanGokhale <4734245+KalyanGokhale@users.noreply.github.com> Date: Sun, 17 Jun 2018 21:37:55 +0530 Subject: [PATCH 11/20] Changed logic from AND to OR for chk_notna GH21441 --- pandas/core/generic.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pandas/core/generic.py b/pandas/core/generic.py index b00a1b15e8303..db8a5577c2cfb 100644 --- a/pandas/core/generic.py +++ b/pandas/core/generic.py @@ -8975,7 +8975,7 @@ def _find_valid_index(self, how): if isinstance(is_valid[idxpos], ABCSeries): for chk_notna in is_valid[idxpos]: - chk_notna = True and chk_notna + chk_notna = False or chk_notna else: chk_notna = is_valid[idxpos] From 0cb340577ba22a64555d75f25fab635deedb6505 Mon Sep 17 00:00:00 2001 From: KalyanGokhale <4734245+KalyanGokhale@users.noreply.github.com> Date: Sun, 17 Jun 2018 22:37:11 +0530 Subject: [PATCH 12/20] Series reverse logic for how == last, edits to whatsnew GH21441 --- pandas/core/generic.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pandas/core/generic.py b/pandas/core/generic.py index db8a5577c2cfb..8fbc03a9563ae 100644 --- a/pandas/core/generic.py +++ b/pandas/core/generic.py @@ -8969,7 +8969,7 @@ def _find_valid_index(self, how): is_valid = is_valid.any(1) # reduce axis 1 if how == 'last': - is_valid.sort_index(ascending=False, inplace=True) + is_valid = is_valid[::-1] idxpos = is_valid.idxmax() From 11edb51892c2fd106537841bc1fa881c88fe2322 Mon Sep 17 00:00:00 2001 From: KalyanGokhale <4734245+KalyanGokhale@users.noreply.github.com> Date: Mon, 18 Jun 2018 07:37:24 +0530 Subject: [PATCH 13/20] Reverting how==last logic to original, restoring deleted test GH21441 --- pandas/core/generic.py | 22 ++++++++++++---------- pandas/tests/test_resample.py | 7 +++++++ 2 files changed, 19 insertions(+), 10 deletions(-) diff --git a/pandas/core/generic.py b/pandas/core/generic.py index 8fbc03a9563ae..3ff1b1da1fdfb 100644 --- a/pandas/core/generic.py +++ b/pandas/core/generic.py @@ -8968,20 +8968,22 @@ def _find_valid_index(self, how): if self.ndim == 2: is_valid = is_valid.any(1) # reduce axis 1 - if how == 'last': - is_valid = is_valid[::-1] - - idxpos = is_valid.idxmax() + if how == 'first': + idx = is_valid.idxmax() + if isinstance(is_valid[idx], ABCSeries): + for chk_notna in is_valid[idx]: + chk_notna = False or chk_notna + else: + chk_notna = is_valid[idx] - if isinstance(is_valid[idxpos], ABCSeries): - for chk_notna in is_valid[idxpos]: - chk_notna = False or chk_notna - else: - chk_notna = is_valid[idxpos] + if how == 'last': + idxpos = len(self) - 1 - is_valid.values[::-1].argmax() + chk_notna = is_valid.iat[idxpos] + idx = self.index[idxpos] if not chk_notna: return None - return idxpos + return idx @Appender(_shared_docs['valid_index'] % {'position': 'first', 'klass': 'NDFrame'}) diff --git a/pandas/tests/test_resample.py b/pandas/tests/test_resample.py index 42c7051437044..6f0ad0535c6b4 100644 --- a/pandas/tests/test_resample.py +++ b/pandas/tests/test_resample.py @@ -649,6 +649,13 @@ def test_asfreq_fill_value(self): expected = frame.reindex(new_index, fill_value=4.0) assert_frame_equal(result, expected) + def test_resample_interpolate(self): + # # 12925 + df = self.create_series().to_frame('value') + assert_frame_equal( + df.resample('1T').asfreq().interpolate(), + df.resample('1T').interpolate()) + def test_raises_on_non_datetimelike_index(self): # this is a non datetimelike index xp = DataFrame() From ed410e1565da8964e79adffbcbb9bb5a275b0c4e Mon Sep 17 00:00:00 2001 From: KalyanGokhale <4734245+KalyanGokhale@users.noreply.github.com> Date: Tue, 19 Jun 2018 07:56:26 +0530 Subject: [PATCH 14/20] Fixed the if / for for chk_notna, added test cases for NA values in duplicate index GH21441 --- pandas/core/generic.py | 5 +++-- pandas/tests/generic/test_generic.py | 5 ++++- 2 files changed, 7 insertions(+), 3 deletions(-) diff --git a/pandas/core/generic.py b/pandas/core/generic.py index 3ff1b1da1fdfb..d572cf5e955ca 100644 --- a/pandas/core/generic.py +++ b/pandas/core/generic.py @@ -8971,8 +8971,9 @@ def _find_valid_index(self, how): if how == 'first': idx = is_valid.idxmax() if isinstance(is_valid[idx], ABCSeries): - for chk_notna in is_valid[idx]: - chk_notna = False or chk_notna + chk_notna = False + for idxinstance in is_valid[idx]: + chk_notna = chk_notna or idxinstance else: chk_notna = is_valid[idx] diff --git a/pandas/tests/generic/test_generic.py b/pandas/tests/generic/test_generic.py index 9e28fd7af8a8c..20472e06e2153 100644 --- a/pandas/tests/generic/test_generic.py +++ b/pandas/tests/generic/test_generic.py @@ -615,7 +615,10 @@ def test_pct_change(self, periods, fill_method, limit, exp): @pytest.mark.parametrize("data,index,expected_first,expected_last", [ ({'A': [1, 2, 3]}, [1, 1, 2], 1, 2), ({'A': [1, 2, 3]}, [1, 2, 2], 1, 2), - ({'A': [1, 2, 3, 4]}, ['d', 'd', 'd', 'd'], 'd', 'd')]) + ({'A': [1, 2, 3, 4]}, ['d', 'd', 'd', 'd'], 'd', 'd'), + ({'A': [1, np.nan, 3]}, [1, 1, 2], 1, 2), + ({'A': [np.nan, np.nan, 3]}, [1, 1, 2], 2, 2), + ({'A': [1, np.nan, 3]}, [1, 2, 2], 1, 2)]) def test_valid_index(self, data, index, expected_first, expected_last): # GH 21441 df = DataFrame(data, index=index) From 05e8a9968d1acb8a5f9ee13400e3ec8894c72702 Mon Sep 17 00:00:00 2001 From: KalyanGokhale <4734245+KalyanGokhale@users.noreply.github.com> Date: Tue, 19 Jun 2018 08:06:36 +0530 Subject: [PATCH 15/20] Rebased and updated whatsnew GH21441 --- doc/source/whatsnew/v0.23.2.txt | 1 + 1 file changed, 1 insertion(+) diff --git a/doc/source/whatsnew/v0.23.2.txt b/doc/source/whatsnew/v0.23.2.txt index 7629b86e26bf7..09bf337bf0938 100644 --- a/doc/source/whatsnew/v0.23.2.txt +++ b/doc/source/whatsnew/v0.23.2.txt @@ -17,6 +17,7 @@ Fixed Regressions ~~~~~~~~~~~~~~~~~ - Fixed regression in :meth:`to_csv` when handling file-like object incorrectly (:issue:`21471`) +- Bug in both :meth:`DataFrame.first_valid_index` and :meth:`Series.first_valid_index` raised for a row index with duplicate values (:issue:`21441`) - .. _whatsnew_0232.performance: From 01a9f7ee8b07efdc314c76b1579fe652d3ba5b5c Mon Sep 17 00:00:00 2001 From: KalyanGokhale <4734245+KalyanGokhale@users.noreply.github.com> Date: Tue, 19 Jun 2018 19:01:21 +0530 Subject: [PATCH 16/20] Moved tests to test_timeseries GH21441 --- pandas/tests/frame/test_timeseries.py | 15 ++++++++++++++- pandas/tests/generic/test_generic.py | 13 ------------- 2 files changed, 14 insertions(+), 14 deletions(-) diff --git a/pandas/tests/frame/test_timeseries.py b/pandas/tests/frame/test_timeseries.py index 90fbc6e628369..4c9c2a8ae1557 100644 --- a/pandas/tests/frame/test_timeseries.py +++ b/pandas/tests/frame/test_timeseries.py @@ -506,7 +506,15 @@ def test_asfreq_fillvalue(self): actual_series = ts.asfreq(freq='1S', fill_value=9.0) assert_series_equal(expected_series, actual_series) - def test_first_last_valid(self): + @pytest.mark.parametrize("data,index,expected_first,expected_last", [ + ({'A': [1, 2, 3]}, [1, 1, 2], 1, 2), + ({'A': [1, 2, 3]}, [1, 2, 2], 1, 2), + ({'A': [1, 2, 3, 4]}, ['d', 'd', 'd', 'd'], 'd', 'd'), + ({'A': [1, np.nan, 3]}, [1, 1, 2], 1, 2), + ({'A': [np.nan, np.nan, 3]}, [1, 1, 2], 2, 2), + ({'A': [1, np.nan, 3]}, [1, 2, 2], 1, 2)]) + def test_first_last_valid(self, data, index, + expected_first, expected_last): N = len(self.frame.index) mat = randn(N) mat[:5] = nan @@ -539,6 +547,11 @@ def test_first_last_valid(self): assert frame.first_valid_index().freq == frame.index.freq assert frame.last_valid_index().freq == frame.index.freq + # GH 21441 + df = DataFrame(data, index=index) + assert expected_first == df.first_valid_index() + assert expected_last == df.last_valid_index() + def test_first_subset(self): ts = tm.makeTimeDataFrame(freq='12h') result = ts.first('10d') diff --git a/pandas/tests/generic/test_generic.py b/pandas/tests/generic/test_generic.py index 20472e06e2153..311c71f734945 100644 --- a/pandas/tests/generic/test_generic.py +++ b/pandas/tests/generic/test_generic.py @@ -612,19 +612,6 @@ def test_pct_change(self, periods, fill_method, limit, exp): else: tm.assert_series_equal(res, Series(exp)) - @pytest.mark.parametrize("data,index,expected_first,expected_last", [ - ({'A': [1, 2, 3]}, [1, 1, 2], 1, 2), - ({'A': [1, 2, 3]}, [1, 2, 2], 1, 2), - ({'A': [1, 2, 3, 4]}, ['d', 'd', 'd', 'd'], 'd', 'd'), - ({'A': [1, np.nan, 3]}, [1, 1, 2], 1, 2), - ({'A': [np.nan, np.nan, 3]}, [1, 1, 2], 2, 2), - ({'A': [1, np.nan, 3]}, [1, 2, 2], 1, 2)]) - def test_valid_index(self, data, index, expected_first, expected_last): - # GH 21441 - df = DataFrame(data, index=index) - assert expected_first == df.first_valid_index() - assert expected_last == df.last_valid_index() - class TestNDFrame(object): # tests that don't fit elsewhere From 111efb01d97446bc5f72a2ed4d5f26ac3e0ea314 Mon Sep 17 00:00:00 2001 From: KalyanGokhale <4734245+KalyanGokhale@users.noreply.github.com> Date: Tue, 19 Jun 2018 19:16:42 +0530 Subject: [PATCH 17/20] Removed tests from test_generic GH21441 --- pandas/tests/generic/test_generic.py | 13 ------------- 1 file changed, 13 deletions(-) diff --git a/pandas/tests/generic/test_generic.py b/pandas/tests/generic/test_generic.py index 20472e06e2153..311c71f734945 100644 --- a/pandas/tests/generic/test_generic.py +++ b/pandas/tests/generic/test_generic.py @@ -612,19 +612,6 @@ def test_pct_change(self, periods, fill_method, limit, exp): else: tm.assert_series_equal(res, Series(exp)) - @pytest.mark.parametrize("data,index,expected_first,expected_last", [ - ({'A': [1, 2, 3]}, [1, 1, 2], 1, 2), - ({'A': [1, 2, 3]}, [1, 2, 2], 1, 2), - ({'A': [1, 2, 3, 4]}, ['d', 'd', 'd', 'd'], 'd', 'd'), - ({'A': [1, np.nan, 3]}, [1, 1, 2], 1, 2), - ({'A': [np.nan, np.nan, 3]}, [1, 1, 2], 2, 2), - ({'A': [1, np.nan, 3]}, [1, 2, 2], 1, 2)]) - def test_valid_index(self, data, index, expected_first, expected_last): - # GH 21441 - df = DataFrame(data, index=index) - assert expected_first == df.first_valid_index() - assert expected_last == df.last_valid_index() - class TestNDFrame(object): # tests that don't fit elsewhere From 608c09e3346d758c19912b3ede9330dbfd2c5d7a Mon Sep 17 00:00:00 2001 From: KalyanGokhale <4734245+KalyanGokhale@users.noreply.github.com> Date: Tue, 19 Jun 2018 20:50:57 +0530 Subject: [PATCH 18/20] Updated test parameter name GH21441 --- pandas/tests/frame/test_timeseries.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/pandas/tests/frame/test_timeseries.py b/pandas/tests/frame/test_timeseries.py index 4c9c2a8ae1557..fb9bd74d9876d 100644 --- a/pandas/tests/frame/test_timeseries.py +++ b/pandas/tests/frame/test_timeseries.py @@ -506,14 +506,14 @@ def test_asfreq_fillvalue(self): actual_series = ts.asfreq(freq='1S', fill_value=9.0) assert_series_equal(expected_series, actual_series) - @pytest.mark.parametrize("data,index,expected_first,expected_last", [ + @pytest.mark.parametrize("data,idx,expected_first,expected_last", [ ({'A': [1, 2, 3]}, [1, 1, 2], 1, 2), ({'A': [1, 2, 3]}, [1, 2, 2], 1, 2), ({'A': [1, 2, 3, 4]}, ['d', 'd', 'd', 'd'], 'd', 'd'), ({'A': [1, np.nan, 3]}, [1, 1, 2], 1, 2), ({'A': [np.nan, np.nan, 3]}, [1, 1, 2], 2, 2), ({'A': [1, np.nan, 3]}, [1, 2, 2], 1, 2)]) - def test_first_last_valid(self, data, index, + def test_first_last_valid(self, data, idx, expected_first, expected_last): N = len(self.frame.index) mat = randn(N) @@ -548,7 +548,7 @@ def test_first_last_valid(self, data, index, assert frame.last_valid_index().freq == frame.index.freq # GH 21441 - df = DataFrame(data, index=index) + df = DataFrame(data, index=idx) assert expected_first == df.first_valid_index() assert expected_last == df.last_valid_index() From d8fface58e37dd1baad1fd3daea56e7420f185e7 Mon Sep 17 00:00:00 2001 From: KalyanGokhale <4734245+KalyanGokhale@users.noreply.github.com> Date: Tue, 19 Jun 2018 22:19:39 +0530 Subject: [PATCH 19/20] Minor update to whatsnew to force TravisCI build GH21441 --- doc/source/whatsnew/v0.23.2.txt | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/doc/source/whatsnew/v0.23.2.txt b/doc/source/whatsnew/v0.23.2.txt index 09bf337bf0938..2112b68c32bae 100644 --- a/doc/source/whatsnew/v0.23.2.txt +++ b/doc/source/whatsnew/v0.23.2.txt @@ -17,7 +17,7 @@ Fixed Regressions ~~~~~~~~~~~~~~~~~ - Fixed regression in :meth:`to_csv` when handling file-like object incorrectly (:issue:`21471`) -- Bug in both :meth:`DataFrame.first_valid_index` and :meth:`Series.first_valid_index` raised for a row index with duplicate values (:issue:`21441`) +- Bug in both :meth:`DataFrame.first_valid_index` and :meth:`Series.first_valid_index` raised for a row index having duplicate values (:issue:`21441`) - .. _whatsnew_0232.performance: From 751046d1cfbc3f832394703ef6cc994e2cabc686 Mon Sep 17 00:00:00 2001 From: KalyanGokhale <4734245+KalyanGokhale@users.noreply.github.com> Date: Wed, 20 Jun 2018 07:41:31 +0530 Subject: [PATCH 20/20] Mirrored logic for how == first and last GH21441 --- pandas/core/generic.py | 13 ++++--------- 1 file changed, 4 insertions(+), 9 deletions(-) diff --git a/pandas/core/generic.py b/pandas/core/generic.py index d572cf5e955ca..c37516d478d84 100644 --- a/pandas/core/generic.py +++ b/pandas/core/generic.py @@ -8969,18 +8969,13 @@ def _find_valid_index(self, how): is_valid = is_valid.any(1) # reduce axis 1 if how == 'first': - idx = is_valid.idxmax() - if isinstance(is_valid[idx], ABCSeries): - chk_notna = False - for idxinstance in is_valid[idx]: - chk_notna = chk_notna or idxinstance - else: - chk_notna = is_valid[idx] + idxpos = is_valid.values[::].argmax() if how == 'last': idxpos = len(self) - 1 - is_valid.values[::-1].argmax() - chk_notna = is_valid.iat[idxpos] - idx = self.index[idxpos] + + chk_notna = is_valid.iat[idxpos] + idx = self.index[idxpos] if not chk_notna: return None