From 9ddb071b4420d5834d2a51f68145340ae9bd83c8 Mon Sep 17 00:00:00 2001 From: jbrockmendel <jbrockmendel@gmail.com> Date: Fri, 12 Jul 2019 09:15:10 -0700 Subject: [PATCH 01/10] Avoid using private loc methods --- pandas/io/pytables.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/pandas/io/pytables.py b/pandas/io/pytables.py index 1db177d792401..e68e30427de4b 100644 --- a/pandas/io/pytables.py +++ b/pandas/io/pytables.py @@ -3985,7 +3985,7 @@ def process_filter(field, filt): filt = filt.union(Index(self.levels)) takers = op(axis_values, filt) - return obj.loc._getitem_axis(takers, axis=axis_number) + return obj.loc(axis=axis_number)[takers] # this might be the name of a file IN an axis elif field in axis_values: @@ -3998,7 +3998,7 @@ def process_filter(field, filt): if isinstance(obj, DataFrame): axis_number = 1 - axis_number takers = op(values, filt) - return obj.loc._getitem_axis(takers, axis=axis_number) + return obj.loc(axis=axis_number)[takers] raise ValueError( "cannot find the field [{field}] for " From ba70015aa81837dd7c2ee6dad90995185795fdfe Mon Sep 17 00:00:00 2001 From: jbrockmendel <jbrockmendel@gmail.com> Date: Fri, 12 Jul 2019 16:51:58 -0700 Subject: [PATCH 02/10] avoid private usage --- pandas/core/frame.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pandas/core/frame.py b/pandas/core/frame.py index 55a9eb6a0810a..677ab10d6b3fd 100644 --- a/pandas/core/frame.py +++ b/pandas/core/frame.py @@ -3483,7 +3483,7 @@ def __setitem__(self, key, value): def _setitem_slice(self, key, value): self._check_setitem_copy() - self.loc._setitem_with_indexer(key, value) + self.loc[key] = value def _setitem_array(self, key, value): # also raises Exception if object array with NA values From 21fb39732bfec5ae0737bbc46be9850fa1d5395a Mon Sep 17 00:00:00 2001 From: jbrockmendel <jbrockmendel@gmail.com> Date: Sat, 13 Jul 2019 11:30:28 -0700 Subject: [PATCH 03/10] abstractmethod --- pandas/core/indexing.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pandas/core/indexing.py b/pandas/core/indexing.py index 01f338a021cec..0624a277dfe10 100755 --- a/pandas/core/indexing.py +++ b/pandas/core/indexing.py @@ -2201,7 +2201,7 @@ class _ScalarAccessIndexer(_NDFrameIndexer): """ access scalars quickly """ def _convert_key(self, key, is_setter: bool = False): - return list(key) + raise AbstractMethodError(self) def __getitem__(self, key): if not isinstance(key, tuple): From 0de71aab4214a442f1dcdd4d565ef0221adc6e43 Mon Sep 17 00:00:00 2001 From: jbrockmendel <jbrockmendel@gmail.com> Date: Sat, 13 Jul 2019 15:00:35 -0700 Subject: [PATCH 04/10] add types in indexing; make internal calls with less overhead --- pandas/core/frame.py | 4 ++-- pandas/core/indexing.py | 36 ++++++++++++++++-------------------- 2 files changed, 18 insertions(+), 22 deletions(-) diff --git a/pandas/core/frame.py b/pandas/core/frame.py index 833ea68a6f9c5..f537a48b44f87 100644 --- a/pandas/core/frame.py +++ b/pandas/core/frame.py @@ -2974,7 +2974,7 @@ def __getitem__(self, key): else: if is_iterator(key): key = list(key) - indexer = self.loc._convert_to_indexer(key, axis=1, raise_missing=True) + indexer = self.loc._get_listlike_indexer(key, axis=1, raise_missing=True)[1] # take() does not accept boolean indexers if getattr(indexer, "dtype", None) == bool: @@ -3484,7 +3484,7 @@ def _setitem_array(self, key, value): for k1, k2 in zip(key, value.columns): self[k1] = value[k2] else: - indexer = self.loc._convert_to_indexer(key, axis=1) + indexer = self.loc._get_listlike_indexer(key, axis=1, raise_missing=False)[1] self._check_setitem_copy() self.loc._setitem_with_indexer((slice(None), indexer), value) diff --git a/pandas/core/indexing.py b/pandas/core/indexing.py index 7b74fe9454ef7..a30d87b1825e2 100755 --- a/pandas/core/indexing.py +++ b/pandas/core/indexing.py @@ -222,7 +222,7 @@ def _validate_key(self, key, axis: int): """ raise AbstractMethodError(self) - def _has_valid_tuple(self, key): + def _has_valid_tuple(self, key: tuple): """ check the key for valid keys across my indexer """ for i, k in enumerate(key): if i >= self.obj.ndim: @@ -235,7 +235,7 @@ def _has_valid_tuple(self, key): "[{types}] types".format(types=self._valid_types) ) - def _is_nested_tuple_indexer(self, tup): + def _is_nested_tuple_indexer(self, tup: tuple): if any(isinstance(ax, MultiIndex) for ax in self.obj.axes): return any(is_nested_tuple(tup, ax) for ax in self.obj.axes) return False @@ -259,7 +259,7 @@ def _convert_tuple(self, key, is_setter: bool = False): keyidx.append(idx) return tuple(keyidx) - def _convert_range(self, key, is_setter: bool = False): + def _convert_range(self, key: range, is_setter: bool = False): """ convert a range argument """ return list(key) @@ -269,7 +269,7 @@ def _convert_scalar_indexer(self, key, axis: int): # a scalar return ax._convert_scalar_indexer(key, kind=self.name) - def _convert_slice_indexer(self, key, axis: int): + def _convert_slice_indexer(self, key: slice, axis: int): # if we are accessing via lowered dim, use the last dim ax = self.obj._get_axis(min(axis, self.ndim - 1)) return ax._convert_slice_indexer(key, kind=self.name) @@ -637,27 +637,23 @@ def _setitem_with_indexer_missing(self, indexer, value): self.obj._maybe_update_cacher(clear=True) return self.obj - def _align_series(self, indexer, ser, multiindex_indexer=False): + def _align_series(self, indexer, ser: ABCSeries, multiindex_indexer=False): """ Parameters ---------- indexer : tuple, slice, scalar The indexer used to get the locations that will be set to `ser` - ser : pd.Series The values to assign to the locations specified by `indexer` - multiindex_indexer : boolean, optional Defaults to False. Should be set to True if `indexer` was from a `pd.MultiIndex`, to avoid unnecessary broadcasting. - Returns ------- `np.array` of `ser` broadcast to the appropriate shape for assignment to the locations selected by `indexer` - """ if isinstance(indexer, (slice, np.ndarray, list, Index)): indexer = tuple([indexer]) @@ -733,7 +729,7 @@ def ravel(i): raise ValueError("Incompatible indexer with Series") - def _align_frame(self, indexer, df): + def _align_frame(self, indexer, df: ABCDataFrame): is_frame = self.obj.ndim == 2 if isinstance(indexer, tuple): @@ -785,7 +781,7 @@ def _align_frame(self, indexer, df): raise ValueError("Incompatible indexer with DataFrame") - def _getitem_tuple(self, tup): + def _getitem_tuple(self, tup: tuple): try: return self._getitem_lowerdim(tup) except IndexingError: @@ -808,7 +804,7 @@ def _getitem_tuple(self, tup): return retval - def _multi_take_opportunity(self, tup): + def _multi_take_opportunity(self, tup: tuple): """ Check whether there is the possibility to use ``_multi_take``. Currently the limit is that all axes being indexed must be indexed with @@ -832,7 +828,7 @@ def _multi_take_opportunity(self, tup): return True - def _multi_take(self, tup): + def _multi_take(self, tup: tuple): """ Create the indexers for the passed tuple of keys, and execute the take operation. This allows the take operation to be executed all at once - @@ -858,7 +854,7 @@ def _multi_take(self, tup): def _convert_for_reindex(self, key, axis: int): return key - def _handle_lowerdim_multi_index_axis0(self, tup): + def _handle_lowerdim_multi_index_axis0(self, tup: tuple): # we have an axis0 multi-index, handle or raise axis = self.axis or 0 try: @@ -883,7 +879,7 @@ def _handle_lowerdim_multi_index_axis0(self, tup): return None - def _getitem_lowerdim(self, tup): + def _getitem_lowerdim(self, tup: tuple): # we can directly get the axis result since the axis is specified if self.axis is not None: @@ -947,7 +943,7 @@ def _getitem_lowerdim(self, tup): raise IndexingError("not applicable") - def _getitem_nested_tuple(self, tup): + def _getitem_nested_tuple(self, tup: tuple): # we have a nested tuple so have at least 1 multi-index level # we should be able to match up the dimensionality here @@ -1421,7 +1417,7 @@ def _getbool_axis(self, key, axis: int): # caller is responsible for ensuring non-None axis labels = self.obj._get_axis(axis) key = check_bool_indexer(labels, key) - inds, = key.nonzero() + inds = key.nonzero()[0] try: return self.obj.take(inds, axis=axis) except Exception as detail: @@ -1739,7 +1735,7 @@ def _getitem_scalar(self, key): values = self.obj._get_value(*key) return values - def _get_partial_string_timestamp_match_key(self, key, labels): + def _get_partial_string_timestamp_match_key(self, key, labels: Index): """Translate any partial string timestamp matches in key, returning the new key (GH 10331)""" if isinstance(labels, MultiIndex): @@ -2042,7 +2038,7 @@ def _getitem_scalar(self, key): values = self.obj._get_value(*key, takeable=True) return values - def _validate_integer(self, key, axis): + def _validate_integer(self, key: int, axis: int): """ Check that 'key' is a valid position in the desired axis. @@ -2067,7 +2063,7 @@ def _validate_integer(self, key, axis): if key >= len_axis or key < -len_axis: raise IndexError("single positional indexer is out-of-bounds") - def _getitem_tuple(self, tup): + def _getitem_tuple(self, tup: tuple): self._has_valid_tuple(tup) try: From aa3b6c3395e3dcb52aa4a8b3e84d52b064b70de5 Mon Sep 17 00:00:00 2001 From: jbrockmendel <jbrockmendel@gmail.com> Date: Sat, 13 Jul 2019 15:21:46 -0700 Subject: [PATCH 05/10] blackify --- pandas/core/frame.py | 4 +++- pandas/core/indexing.py | 2 +- 2 files changed, 4 insertions(+), 2 deletions(-) diff --git a/pandas/core/frame.py b/pandas/core/frame.py index f537a48b44f87..663b45c4b1643 100644 --- a/pandas/core/frame.py +++ b/pandas/core/frame.py @@ -3484,7 +3484,9 @@ def _setitem_array(self, key, value): for k1, k2 in zip(key, value.columns): self[k1] = value[k2] else: - indexer = self.loc._get_listlike_indexer(key, axis=1, raise_missing=False)[1] + indexer = self.loc._get_listlike_indexer( + key, axis=1, raise_missing=False + )[1] self._check_setitem_copy() self.loc._setitem_with_indexer((slice(None), indexer), value) diff --git a/pandas/core/indexing.py b/pandas/core/indexing.py index a30d87b1825e2..b54295954c41d 100755 --- a/pandas/core/indexing.py +++ b/pandas/core/indexing.py @@ -637,7 +637,7 @@ def _setitem_with_indexer_missing(self, indexer, value): self.obj._maybe_update_cacher(clear=True) return self.obj - def _align_series(self, indexer, ser: ABCSeries, multiindex_indexer=False): + def _align_series(self, indexer, ser: ABCSeries, multiindex_indexer: bool = False): """ Parameters ---------- From deb619379faa8b4bef44fa9d8ffb2943b43f76ec Mon Sep 17 00:00:00 2001 From: jbrockmendel <jbrockmendel@gmail.com> Date: Sat, 13 Jul 2019 19:46:02 -0700 Subject: [PATCH 06/10] remove unnecessary --- pandas/core/indexing.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pandas/core/indexing.py b/pandas/core/indexing.py index b54295954c41d..75c2af5816597 100755 --- a/pandas/core/indexing.py +++ b/pandas/core/indexing.py @@ -483,7 +483,7 @@ def setter(item, v): if is_list_like_indexer(value) and getattr(value, "ndim", 1) > 0: # we have an equal len Frame - if isinstance(value, ABCDataFrame) and value.ndim > 1: + if isinstance(value, ABCDataFrame): sub_indexer = list(indexer) multiindex_indexer = isinstance(labels, MultiIndex) From 944fb3663becb2f7b5d912220411a9aec282a3be Mon Sep 17 00:00:00 2001 From: jbrockmendel <jbrockmendel@gmail.com> Date: Sun, 14 Jul 2019 13:11:22 -0700 Subject: [PATCH 07/10] silence mypy complaint --- pandas/core/indexing.py | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/pandas/core/indexing.py b/pandas/core/indexing.py index 75c2af5816597..af882a814ed24 100755 --- a/pandas/core/indexing.py +++ b/pandas/core/indexing.py @@ -1,4 +1,5 @@ import textwrap +from typing import Any import warnings import numpy as np @@ -1742,7 +1743,7 @@ def _get_partial_string_timestamp_match_key(self, key, labels: Index): if isinstance(key, str) and labels.levels[0].is_all_dates: # Convert key '2016-01-01' to # ('2016-01-01'[, slice(None, None, None)]+) - key = tuple([key] + [slice(None)] * (len(labels.levels) - 1)) + key = tuple([key] + [slice(None)] * (len(labels.levels) - 1)) # type: ignore if isinstance(key, tuple): # Convert (..., '2016-01-01', ...) in tuple to @@ -1750,7 +1751,7 @@ def _get_partial_string_timestamp_match_key(self, key, labels: Index): new_key = [] for i, component in enumerate(key): if isinstance(component, str) and labels.levels[i].is_all_dates: - new_key.append(slice(component, component, None)) + new_key.append(slice(component, component, None)) # type: ignore else: new_key.append(component) key = tuple(new_key) From 0f44a3c858a2250ab41647c1b739c94a5fa443a5 Mon Sep 17 00:00:00 2001 From: jbrockmendel <jbrockmendel@gmail.com> Date: Sun, 14 Jul 2019 13:15:01 -0700 Subject: [PATCH 08/10] blackify --- pandas/core/indexing.py | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) diff --git a/pandas/core/indexing.py b/pandas/core/indexing.py index af882a814ed24..234c0f58ebac5 100755 --- a/pandas/core/indexing.py +++ b/pandas/core/indexing.py @@ -1743,7 +1743,9 @@ def _get_partial_string_timestamp_match_key(self, key, labels: Index): if isinstance(key, str) and labels.levels[0].is_all_dates: # Convert key '2016-01-01' to # ('2016-01-01'[, slice(None, None, None)]+) - key = tuple([key] + [slice(None)] * (len(labels.levels) - 1)) # type: ignore + key = tuple( + [key] + [slice(None)] * (len(labels.levels) - 1) + ) # type: ignore if isinstance(key, tuple): # Convert (..., '2016-01-01', ...) in tuple to @@ -1751,7 +1753,9 @@ def _get_partial_string_timestamp_match_key(self, key, labels: Index): new_key = [] for i, component in enumerate(key): if isinstance(component, str) and labels.levels[i].is_all_dates: - new_key.append(slice(component, component, None)) # type: ignore + new_key.append( + slice(component, component, None) + ) # type: ignore else: new_key.append(component) key = tuple(new_key) From 525a02de8a6f6c058e36bee14c742d6392ec232b Mon Sep 17 00:00:00 2001 From: jbrockmendel <jbrockmendel@gmail.com> Date: Sun, 14 Jul 2019 20:56:07 -0700 Subject: [PATCH 09/10] lint/mypy --- pandas/core/indexing.py | 11 +++-------- 1 file changed, 3 insertions(+), 8 deletions(-) diff --git a/pandas/core/indexing.py b/pandas/core/indexing.py index 234c0f58ebac5..bb875760df5d3 100755 --- a/pandas/core/indexing.py +++ b/pandas/core/indexing.py @@ -1,5 +1,4 @@ import textwrap -from typing import Any import warnings import numpy as np @@ -1736,16 +1735,14 @@ def _getitem_scalar(self, key): values = self.obj._get_value(*key) return values - def _get_partial_string_timestamp_match_key(self, key, labels: Index): + def _get_partial_string_timestamp_match_key(self, key, labels): """Translate any partial string timestamp matches in key, returning the new key (GH 10331)""" if isinstance(labels, MultiIndex): if isinstance(key, str) and labels.levels[0].is_all_dates: # Convert key '2016-01-01' to # ('2016-01-01'[, slice(None, None, None)]+) - key = tuple( - [key] + [slice(None)] * (len(labels.levels) - 1) - ) # type: ignore + key = tuple([key] + [slice(None)] * (len(labels.levels) - 1)) if isinstance(key, tuple): # Convert (..., '2016-01-01', ...) in tuple to @@ -1753,9 +1750,7 @@ def _get_partial_string_timestamp_match_key(self, key, labels: Index): new_key = [] for i, component in enumerate(key): if isinstance(component, str) and labels.levels[i].is_all_dates: - new_key.append( - slice(component, component, None) - ) # type: ignore + new_key.append(slice(component, component, None)) else: new_key.append(component) key = tuple(new_key) From f9c7fc98098c56cf7badfd8914936e489d9fe663 Mon Sep 17 00:00:00 2001 From: jbrockmendel <jbrockmendel@gmail.com> Date: Mon, 15 Jul 2019 08:43:48 -0700 Subject: [PATCH 10/10] tuple->Tuple --- pandas/core/indexing.py | 19 ++++++++++--------- 1 file changed, 10 insertions(+), 9 deletions(-) diff --git a/pandas/core/indexing.py b/pandas/core/indexing.py index bb875760df5d3..53434d620877e 100755 --- a/pandas/core/indexing.py +++ b/pandas/core/indexing.py @@ -1,4 +1,5 @@ import textwrap +from typing import Tuple import warnings import numpy as np @@ -222,7 +223,7 @@ def _validate_key(self, key, axis: int): """ raise AbstractMethodError(self) - def _has_valid_tuple(self, key: tuple): + def _has_valid_tuple(self, key: Tuple): """ check the key for valid keys across my indexer """ for i, k in enumerate(key): if i >= self.obj.ndim: @@ -235,7 +236,7 @@ def _has_valid_tuple(self, key: tuple): "[{types}] types".format(types=self._valid_types) ) - def _is_nested_tuple_indexer(self, tup: tuple): + def _is_nested_tuple_indexer(self, tup: Tuple): if any(isinstance(ax, MultiIndex) for ax in self.obj.axes): return any(is_nested_tuple(tup, ax) for ax in self.obj.axes) return False @@ -781,7 +782,7 @@ def _align_frame(self, indexer, df: ABCDataFrame): raise ValueError("Incompatible indexer with DataFrame") - def _getitem_tuple(self, tup: tuple): + def _getitem_tuple(self, tup: Tuple): try: return self._getitem_lowerdim(tup) except IndexingError: @@ -804,7 +805,7 @@ def _getitem_tuple(self, tup: tuple): return retval - def _multi_take_opportunity(self, tup: tuple): + def _multi_take_opportunity(self, tup: Tuple): """ Check whether there is the possibility to use ``_multi_take``. Currently the limit is that all axes being indexed must be indexed with @@ -828,7 +829,7 @@ def _multi_take_opportunity(self, tup: tuple): return True - def _multi_take(self, tup: tuple): + def _multi_take(self, tup: Tuple): """ Create the indexers for the passed tuple of keys, and execute the take operation. This allows the take operation to be executed all at once - @@ -854,7 +855,7 @@ def _multi_take(self, tup: tuple): def _convert_for_reindex(self, key, axis: int): return key - def _handle_lowerdim_multi_index_axis0(self, tup: tuple): + def _handle_lowerdim_multi_index_axis0(self, tup: Tuple): # we have an axis0 multi-index, handle or raise axis = self.axis or 0 try: @@ -879,7 +880,7 @@ def _handle_lowerdim_multi_index_axis0(self, tup: tuple): return None - def _getitem_lowerdim(self, tup: tuple): + def _getitem_lowerdim(self, tup: Tuple): # we can directly get the axis result since the axis is specified if self.axis is not None: @@ -943,7 +944,7 @@ def _getitem_lowerdim(self, tup: tuple): raise IndexingError("not applicable") - def _getitem_nested_tuple(self, tup: tuple): + def _getitem_nested_tuple(self, tup: Tuple): # we have a nested tuple so have at least 1 multi-index level # we should be able to match up the dimensionality here @@ -2063,7 +2064,7 @@ def _validate_integer(self, key: int, axis: int): if key >= len_axis or key < -len_axis: raise IndexError("single positional indexer is out-of-bounds") - def _getitem_tuple(self, tup: tuple): + def _getitem_tuple(self, tup: Tuple): self._has_valid_tuple(tup) try: