Skip to content

REF/TYP: indexes #40330

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 12 commits into from
Mar 9, 2021
14 changes: 5 additions & 9 deletions pandas/core/indexes/base.py
Original file line number Diff line number Diff line change
Expand Up @@ -173,7 +173,6 @@
RangeIndex,
Series,
)
from pandas.core.indexes.datetimelike import DatetimeIndexOpsMixin


__all__ = ["Index"]
Expand Down Expand Up @@ -305,7 +304,7 @@ def _outer_indexer(

_typ = "index"
_data: Union[ExtensionArray, np.ndarray]
_id: Optional[_Identity] = None
_id: Optional[object] = None
_name: Hashable = None
# MultiIndex.levels previously allowed setting the index name. We
# don't allow this anymore, and raise if it happens rather than
Expand Down Expand Up @@ -711,7 +710,7 @@ def _reset_identity(self) -> None:
"""
Initializes or resets ``_id`` attribute with new object.
"""
self._id = _Identity(object())
self._id = object()

@final
def _cleanup(self) -> None:
Expand Down Expand Up @@ -1717,7 +1716,7 @@ def sortlevel(self, level=None, ascending=True, sort_remaining=None):

return self.sort_values(return_indexer=True, ascending=ascending)

def _get_level_values(self, level):
def _get_level_values(self, level) -> Index:
"""
Return an Index of values for requested level.

Expand Down Expand Up @@ -2977,11 +2976,8 @@ def _union(self, other: Index, sort):
return result

@final
def _wrap_setop_result(self, other, result):
if needs_i8_conversion(self.dtype) and isinstance(result, np.ndarray):
self = cast("DatetimeIndexOpsMixin", self)
result = type(self._data)._simple_new(result, dtype=self.dtype)
elif is_categorical_dtype(self.dtype) and isinstance(result, np.ndarray):
def _wrap_setop_result(self, other: Index, result) -> Index:
if is_categorical_dtype(self.dtype) and isinstance(result, np.ndarray):
result = Categorical(result, dtype=self.dtype)

name = get_op_result_name(self, other)
Expand Down
13 changes: 1 addition & 12 deletions pandas/core/indexes/category.py
Original file line number Diff line number Diff line change
Expand Up @@ -178,6 +178,7 @@ class CategoricalIndex(NDArrayBackedExtensionIndex, accessor.PandasDelegate):
"""

_typ = "categoricalindex"
_data_cls = Categorical

@property
def _can_hold_strings(self):
Expand Down Expand Up @@ -225,18 +226,6 @@ def __new__(

return cls._simple_new(data, name=name)

@classmethod
def _simple_new(cls, values: Categorical, name: Optional[Hashable] = None):
assert isinstance(values, Categorical), type(values)
result = object.__new__(cls)

result._data = values
result._name = name
result._cache = {}

result._reset_identity()
return result

# --------------------------------------------------------------------

@doc(Index._shallow_copy)
Expand Down
29 changes: 2 additions & 27 deletions pandas/core/indexes/datetimelike.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,11 +5,9 @@
from typing import (
TYPE_CHECKING,
Any,
Hashable,
List,
Optional,
Tuple,
Type,
TypeVar,
Union,
cast,
Expand Down Expand Up @@ -44,7 +42,6 @@
is_integer,
is_list_like,
is_period_dtype,
is_scalar,
)
from pandas.core.dtypes.concat import concat_compat

Expand Down Expand Up @@ -119,7 +116,6 @@ class DatetimeIndexOpsMixin(NDArrayBackedExtensionIndex):

_can_hold_strings = False
_data: Union[DatetimeArray, TimedeltaArray, PeriodArray]
_data_cls: Union[Type[DatetimeArray], Type[TimedeltaArray], Type[PeriodArray]]
freq: Optional[BaseOffset]
freqstr: Optional[str]
_resolution_obj: Resolution
Expand All @@ -132,25 +128,6 @@ class DatetimeIndexOpsMixin(NDArrayBackedExtensionIndex):
)
_hasnans = hasnans # for index / array -agnostic code

@classmethod
def _simple_new(
cls,
values: Union[DatetimeArray, TimedeltaArray, PeriodArray],
name: Optional[Hashable] = None,
):
assert isinstance(values, cls._data_cls), type(values)

result = object.__new__(cls)
result._data = values
result._name = name
result._cache = {}

# For groupby perf. See note in indexes/base about _index_data
result._index_data = values._ndarray

result._reset_identity()
return result

@property
def _is_all_dates(self) -> bool:
return True
Expand Down Expand Up @@ -219,12 +196,10 @@ def equals(self, other: Any) -> bool:
def __contains__(self, key: Any) -> bool:
hash(key)
try:
res = self.get_loc(key)
self.get_loc(key)
except (KeyError, TypeError, ValueError):
return False
return bool(
is_scalar(res) or isinstance(res, slice) or (is_list_like(res) and len(res))
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

this is some crazy condition, good to remove it

)
return True

@Appender(_index_shared_docs["take"] % _index_doc_kwargs)
def take(self, indices, axis=0, allow_fill=True, fill_value=None, **kwargs):
Expand Down
36 changes: 35 additions & 1 deletion pandas/core/indexes/extension.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,9 @@
Shared methods for Index subclasses backed by ExtensionArray.
"""
from typing import (
Hashable,
List,
Type,
TypeVar,
Union,
)
Expand Down Expand Up @@ -30,7 +32,13 @@
ABCSeries,
)

from pandas.core.arrays import IntervalArray
from pandas.core.arrays import (
Categorical,
DatetimeArray,
IntervalArray,
PeriodArray,
TimedeltaArray,
)
from pandas.core.arrays._mixins import NDArrayBackedExtensionArray
from pandas.core.indexers import deprecate_ndim_indexing
from pandas.core.indexes.base import Index
Expand Down Expand Up @@ -352,6 +360,32 @@ class NDArrayBackedExtensionIndex(ExtensionIndex):

_data: NDArrayBackedExtensionArray

_data_cls: Union[
Type[Categorical],
Type[DatetimeArray],
Type[TimedeltaArray],
Type[PeriodArray],
]

@classmethod
def _simple_new(
cls,
values: NDArrayBackedExtensionArray,
name: Hashable = None,
):
assert isinstance(values, cls._data_cls), type(values)

result = object.__new__(cls)
result._data = values
result._name = name
result._cache = {}

# For groupby perf. See note in indexes/base about _index_data
result._index_data = values._ndarray

result._reset_identity()
return result

def _get_engine_target(self) -> np.ndarray:
return self._data._ndarray

Expand Down
38 changes: 21 additions & 17 deletions pandas/core/indexes/multi.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,7 @@
TYPE_CHECKING,
Any,
Callable,
Collection,
Hashable,
Iterable,
List,
Expand Down Expand Up @@ -98,6 +99,7 @@
if TYPE_CHECKING:
from pandas import (
CategoricalIndex,
DataFrame,
Series,
)

Expand Down Expand Up @@ -323,7 +325,7 @@ def __new__(
if len(levels) == 0:
raise ValueError("Must pass non-zero number of levels/codes")

result = object.__new__(MultiIndex)
result = object.__new__(cls)
result._cache = {}

# we've already validated levels and codes, so shortcut here
Expand Down Expand Up @@ -503,7 +505,7 @@ def from_arrays(cls, arrays, sortorder=None, names=lib.no_default) -> MultiIndex
@names_compat
def from_tuples(
cls,
tuples,
tuples: Iterable[Tuple[Hashable, ...]],
sortorder: Optional[int] = None,
names: Optional[Sequence[Hashable]] = None,
) -> MultiIndex:
Expand Down Expand Up @@ -546,6 +548,7 @@ def from_tuples(
raise TypeError("Input must be a list / sequence of tuple-likes.")
elif is_iterator(tuples):
tuples = list(tuples)
tuples = cast(Collection[Tuple[Hashable, ...]], tuples)

arrays: List[Sequence[Hashable]]
if len(tuples) == 0:
Expand All @@ -560,7 +563,8 @@ def from_tuples(
elif isinstance(tuples, list):
arrays = list(lib.to_object_array_tuples(tuples).T)
else:
arrays = zip(*tuples)
arrs = zip(*tuples)
arrays = cast(List[Sequence[Hashable]], arrs)

return cls.from_arrays(arrays, sortorder=sortorder, names=names)

Expand Down Expand Up @@ -626,7 +630,7 @@ def from_product(
return cls(levels, codes, sortorder=sortorder, names=names)

@classmethod
def from_frame(cls, df, sortorder=None, names=None) -> MultiIndex:
def from_frame(cls, df: DataFrame, sortorder=None, names=None) -> MultiIndex:
"""
Make a MultiIndex from a DataFrame.

Expand Down Expand Up @@ -762,7 +766,7 @@ def __len__(self) -> int:
# Levels Methods

@cache_readonly
def levels(self):
def levels(self) -> FrozenList:
# Use cache_readonly to ensure that self.get_locs doesn't repeatedly
# create new IndexEngine
# https://github.com/pandas-dev/pandas/issues/31648
Expand Down Expand Up @@ -1293,7 +1297,7 @@ def _formatter_func(self, tup):
formatter_funcs = [level._formatter_func for level in self.levels]
return tuple(func(val) for func, val in zip(formatter_funcs, tup))

def _format_data(self, name=None):
def _format_data(self, name=None) -> str:
"""
Return the formatted data as a unicode string
"""
Expand Down Expand Up @@ -1419,10 +1423,10 @@ def format(
# --------------------------------------------------------------------
# Names Methods

def _get_names(self):
def _get_names(self) -> FrozenList:
return FrozenList(self._names)

def _set_names(self, names, level=None, validate=True):
def _set_names(self, names, level=None, validate: bool = True):
"""
Set new names on index. Each name has to be a hashable type.

Expand All @@ -1433,7 +1437,7 @@ def _set_names(self, names, level=None, validate=True):
level : int, level name, or sequence of int/level names (default None)
If the index is a MultiIndex (hierarchical), level(s) to set (None
for all levels). Otherwise level must be None
validate : boolean, default True
validate : bool, default True
validate that the names match level lengths

Raises
Expand Down Expand Up @@ -1712,7 +1716,7 @@ def unique(self, level=None):
level = self._get_level_number(level)
return self._get_level_values(level=level, unique=True)

def to_frame(self, index=True, name=None):
def to_frame(self, index=True, name=None) -> DataFrame:
"""
Create a DataFrame with the levels of the MultiIndex as columns.

Expand Down Expand Up @@ -2109,8 +2113,8 @@ def take(

na_value = -1

taken = [lab.take(indices) for lab in self.codes]
if allow_fill:
taken = [lab.take(indices) for lab in self.codes]
mask = indices == -1
if mask.any():
masked = []
Expand All @@ -2119,8 +2123,6 @@ def take(
label_values[mask] = na_value
masked.append(np.asarray(label_values))
taken = masked
else:
taken = [lab.take(indices) for lab in self.codes]

return MultiIndex(
levels=self.levels, codes=taken, names=self.names, verify_integrity=False
Expand Down Expand Up @@ -2644,7 +2646,9 @@ def _get_partial_string_timestamp_match_key(self, key):

return key

def _get_indexer(self, target: Index, method=None, limit=None, tolerance=None):
def _get_indexer(
self, target: Index, method=None, limit=None, tolerance=None
) -> np.ndarray:

# empty indexer
if not len(target):
Expand Down Expand Up @@ -3521,7 +3525,7 @@ def equals(self, other: object) -> bool:

return True

def equal_levels(self, other) -> bool:
def equal_levels(self, other: MultiIndex) -> bool:
"""
Return True if the levels of both MultiIndex objects are the same

Expand All @@ -3537,7 +3541,7 @@ def equal_levels(self, other) -> bool:
# --------------------------------------------------------------------
# Set Methods

def _union(self, other, sort):
def _union(self, other, sort) -> MultiIndex:
other, result_names = self._convert_can_do_setop(other)

# We could get here with CategoricalIndex other
Expand Down Expand Up @@ -3579,7 +3583,7 @@ def _maybe_match_names(self, other):
names.append(None)
return names

def _intersection(self, other, sort=False):
def _intersection(self, other, sort=False) -> MultiIndex:
other, result_names = self._convert_can_do_setop(other)

lvals = self._values
Expand Down
1 change: 1 addition & 0 deletions pandas/core/indexes/numeric.py
Original file line number Diff line number Diff line change
Expand Up @@ -232,6 +232,7 @@ def __contains__(self, key) -> bool:
hash(key)
try:
if is_float(key) and int(key) != key:
# otherwise the `key in self._engine` check casts e.g. 1.1 -> 1
return False
return key in self._engine
except (OverflowError, TypeError, ValueError):
Expand Down
2 changes: 1 addition & 1 deletion pandas/core/indexes/range.py
Original file line number Diff line number Diff line change
Expand Up @@ -67,7 +67,7 @@ class RangeIndex(Int64Index):

Parameters
----------
start : int (default: 0), or other RangeIndex instance
start : int (default: 0), range, or other RangeIndex instance
If int and "stop" is not given, interpreted as "stop" instead.
stop : int (default: 0)
step : int (default: 1)
Expand Down
4 changes: 0 additions & 4 deletions pandas/tests/groupby/test_function.py
Original file line number Diff line number Diff line change
Expand Up @@ -122,10 +122,6 @@ def test_intercept_builtin_sum():
tm.assert_series_equal(result2, expected)


# @pytest.mark.parametrize("f", [max, min, sum])
# def test_builtins_apply(f):


@pytest.mark.parametrize("f", [max, min, sum])
@pytest.mark.parametrize("keys", ["jim", ["jim", "joe"]]) # Single key # Multi-key
def test_builtins_apply(keys, f):
Expand Down