From 665a522067ee92d3616ed8d96025fe5ff6bd9249 Mon Sep 17 00:00:00 2001 From: Brock Date: Fri, 25 Sep 2020 19:45:30 -0700 Subject: [PATCH 1/4] CLN: dont special-case DatetimeArray indexing --- pandas/core/arrays/datetimelike.py | 5 ++++- pandas/core/indexes/datetimelike.py | 8 +++++--- 2 files changed, 9 insertions(+), 4 deletions(-) diff --git a/pandas/core/arrays/datetimelike.py b/pandas/core/arrays/datetimelike.py index c90610bdd920c..2990a1997f30c 100644 --- a/pandas/core/arrays/datetimelike.py +++ b/pandas/core/arrays/datetimelike.py @@ -530,7 +530,6 @@ def _validate_getitem_key(self, key): key = np.asarray(key, dtype=bool) key = check_array_indexer(self, key) - key = lib.maybe_booleans_to_slice(key.view(np.uint8)) elif isinstance(key, list) and len(key) == 1 and isinstance(key[0], slice): # see https://github.com/pandas-dev/pandas/issues/31299, need to allow # this for now (would otherwise raise in check_array_indexer) @@ -560,6 +559,10 @@ def _get_getitem_freq(self, key): # GH#21282 indexing with Ellipsis is similar to a full slice, # should preserve `freq` attribute freq = self.freq + elif com.is_bool_indexer(key): + new_key = lib.maybe_booleans_to_slice(key.view(np.uint8)) + if isinstance(new_key, slice): + return self._get_getitem_freq(new_key) return freq def __setitem__( diff --git a/pandas/core/indexes/datetimelike.py b/pandas/core/indexes/datetimelike.py index e2f59ceb41db5..922af8810a5ca 100644 --- a/pandas/core/indexes/datetimelike.py +++ b/pandas/core/indexes/datetimelike.py @@ -167,12 +167,14 @@ def take(self, indices, axis=0, allow_fill=True, fill_value=None, **kwargs): indices = ensure_int64(indices) maybe_slice = lib.maybe_indices_to_slice(indices, len(self)) - if isinstance(maybe_slice, slice): - return self[maybe_slice] - return ExtensionIndex.take( + result = ExtensionIndex.take( self, indices, axis, allow_fill, fill_value, **kwargs ) + if isinstance(maybe_slice, slice): + freq = self._data._get_getitem_freq(maybe_slice) + result._data._freq = freq + return result @doc(IndexOpsMixin.searchsorted, klass="Datetime-like Index") def searchsorted(self, value, side="left", sorter=None): From bdea70bd4c9b03ef50e2db51eda00677c85eea3e Mon Sep 17 00:00:00 2001 From: Brock Date: Sat, 26 Sep 2020 10:30:28 -0700 Subject: [PATCH 2/4] use parent class _validate_getitem_key --- pandas/core/arrays/datetimelike.py | 18 +----------------- 1 file changed, 1 insertion(+), 17 deletions(-) diff --git a/pandas/core/arrays/datetimelike.py b/pandas/core/arrays/datetimelike.py index 2990a1997f30c..081ae069a19d8 100644 --- a/pandas/core/arrays/datetimelike.py +++ b/pandas/core/arrays/datetimelike.py @@ -55,7 +55,7 @@ from pandas.core.arrays.base import ExtensionOpsMixin import pandas.core.common as com from pandas.core.construction import array, extract_array -from pandas.core.indexers import check_array_indexer, check_setitem_lengths +from pandas.core.indexers import check_setitem_lengths from pandas.core.ops.common import unpack_zerodim_and_defer from pandas.core.ops.invalid import invalid_comparison, make_invalid_op @@ -522,22 +522,6 @@ def __getitem__(self, key): result._freq = self._get_getitem_freq(key) return result - def _validate_getitem_key(self, key): - if com.is_bool_indexer(key): - # first convert to boolean, because check_array_indexer doesn't - # allow object dtype - if is_object_dtype(key): - key = np.asarray(key, dtype=bool) - - key = check_array_indexer(self, key) - elif isinstance(key, list) and len(key) == 1 and isinstance(key[0], slice): - # see https://github.com/pandas-dev/pandas/issues/31299, need to allow - # this for now (would otherwise raise in check_array_indexer) - pass - else: - key = super()._validate_getitem_key(key) - return key - def _get_getitem_freq(self, key): """ Find the `freq` attribute to assign to the result of a __getitem__ lookup. From 42e8e1573c6ee0966484a33363acb4770fe90487 Mon Sep 17 00:00:00 2001 From: Brock Date: Fri, 2 Oct 2020 19:59:44 -0700 Subject: [PATCH 3/4] test, whatsnew --- doc/source/whatsnew/v1.2.0.rst | 1 + pandas/tests/series/indexing/test_boolean.py | 13 ++++++++++++- 2 files changed, 13 insertions(+), 1 deletion(-) diff --git a/doc/source/whatsnew/v1.2.0.rst b/doc/source/whatsnew/v1.2.0.rst index cb0858fd678f8..aad21dfb19a74 100644 --- a/doc/source/whatsnew/v1.2.0.rst +++ b/doc/source/whatsnew/v1.2.0.rst @@ -358,6 +358,7 @@ Indexing - Bug in :meth:`Index.sort_values` where, when empty values were passed, the method would break by trying to compare missing values instead of pushing them to the end of the sort order. (:issue:`35584`) - Bug in :meth:`Index.get_indexer` and :meth:`Index.get_indexer_non_unique` where int64 arrays are returned instead of intp. (:issue:`36359`) - Bug in :meth:`DataFrame.sort_index` where parameter ascending passed as a list on a single level index gives wrong result. (:issue:`32334`) +- Bug in indexing with boolean masks on datetime-like values sometimes returning a view instead of a copy (:issue:`36210`) Missing ^^^^^^^ diff --git a/pandas/tests/series/indexing/test_boolean.py b/pandas/tests/series/indexing/test_boolean.py index e2b71b1f2f412..28bebd764a866 100644 --- a/pandas/tests/series/indexing/test_boolean.py +++ b/pandas/tests/series/indexing/test_boolean.py @@ -1,7 +1,7 @@ import numpy as np import pytest -from pandas import Index, Series +from pandas import Index, Series, date_range import pandas._testing as tm from pandas.core.indexing import IndexingError @@ -128,3 +128,14 @@ def test_get_set_boolean_different_order(string_series): sel = string_series[ordered > 0] exp = string_series[string_series > 0] tm.assert_series_equal(sel, exp) + + +def test_getitem_boolean_dt64_copies(): + # GH#36210 + dti = date_range("2016-01-01", periods=4, tz="US/Pacific") + key = np.array([True, True, False, False]) + + ser = Series(dti._data) + + res = ser[key] + assert res._values._data.base is None From 2b8ab5997752b66c9b369c21d051bc448902730c Mon Sep 17 00:00:00 2001 From: Brock Date: Sun, 11 Oct 2020 13:32:46 -0700 Subject: [PATCH 4/4] update test --- pandas/tests/series/indexing/test_boolean.py | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/pandas/tests/series/indexing/test_boolean.py b/pandas/tests/series/indexing/test_boolean.py index 28bebd764a866..3f88f4193e770 100644 --- a/pandas/tests/series/indexing/test_boolean.py +++ b/pandas/tests/series/indexing/test_boolean.py @@ -139,3 +139,8 @@ def test_getitem_boolean_dt64_copies(): res = ser[key] assert res._values._data.base is None + + # compare with numeric case for reference + ser2 = Series(range(4)) + res2 = ser2[key] + assert res2._values.base is None