diff --git a/pandas/core/indexes/base.py b/pandas/core/indexes/base.py index 23cb4d5df3f7b..e3f342a024f6c 100644 --- a/pandas/core/indexes/base.py +++ b/pandas/core/indexes/base.py @@ -173,7 +173,6 @@ RangeIndex, Series, ) - from pandas.core.indexes.datetimelike import DatetimeIndexOpsMixin __all__ = ["Index"] @@ -305,7 +304,7 @@ def _outer_indexer( _typ = "index" _data: Union[ExtensionArray, np.ndarray] - _id: Optional[_Identity] = None + _id: Optional[object] = None _name: Hashable = None # MultiIndex.levels previously allowed setting the index name. We # don't allow this anymore, and raise if it happens rather than @@ -711,7 +710,7 @@ def _reset_identity(self) -> None: """ Initializes or resets ``_id`` attribute with new object. """ - self._id = _Identity(object()) + self._id = object() @final def _cleanup(self) -> None: @@ -1717,7 +1716,7 @@ def sortlevel(self, level=None, ascending=True, sort_remaining=None): return self.sort_values(return_indexer=True, ascending=ascending) - def _get_level_values(self, level): + def _get_level_values(self, level) -> Index: """ Return an Index of values for requested level. @@ -2977,11 +2976,8 @@ def _union(self, other: Index, sort): return result @final - def _wrap_setop_result(self, other, result): - if needs_i8_conversion(self.dtype) and isinstance(result, np.ndarray): - self = cast("DatetimeIndexOpsMixin", self) - result = type(self._data)._simple_new(result, dtype=self.dtype) - elif is_categorical_dtype(self.dtype) and isinstance(result, np.ndarray): + def _wrap_setop_result(self, other: Index, result) -> Index: + if is_categorical_dtype(self.dtype) and isinstance(result, np.ndarray): result = Categorical(result, dtype=self.dtype) name = get_op_result_name(self, other) diff --git a/pandas/core/indexes/category.py b/pandas/core/indexes/category.py index 7a204dcce8a88..869836a3da70c 100644 --- a/pandas/core/indexes/category.py +++ b/pandas/core/indexes/category.py @@ -178,6 +178,7 @@ class CategoricalIndex(NDArrayBackedExtensionIndex, accessor.PandasDelegate): """ _typ = "categoricalindex" + _data_cls = Categorical @property def _can_hold_strings(self): @@ -225,18 +226,6 @@ def __new__( return cls._simple_new(data, name=name) - @classmethod - def _simple_new(cls, values: Categorical, name: Optional[Hashable] = None): - assert isinstance(values, Categorical), type(values) - result = object.__new__(cls) - - result._data = values - result._name = name - result._cache = {} - - result._reset_identity() - return result - # -------------------------------------------------------------------- @doc(Index._shallow_copy) diff --git a/pandas/core/indexes/datetimelike.py b/pandas/core/indexes/datetimelike.py index f7e37b10ef74c..1dd5b40f7102f 100644 --- a/pandas/core/indexes/datetimelike.py +++ b/pandas/core/indexes/datetimelike.py @@ -5,11 +5,9 @@ from typing import ( TYPE_CHECKING, Any, - Hashable, List, Optional, Tuple, - Type, TypeVar, Union, cast, @@ -44,7 +42,6 @@ is_integer, is_list_like, is_period_dtype, - is_scalar, ) from pandas.core.dtypes.concat import concat_compat @@ -119,7 +116,6 @@ class DatetimeIndexOpsMixin(NDArrayBackedExtensionIndex): _can_hold_strings = False _data: Union[DatetimeArray, TimedeltaArray, PeriodArray] - _data_cls: Union[Type[DatetimeArray], Type[TimedeltaArray], Type[PeriodArray]] freq: Optional[BaseOffset] freqstr: Optional[str] _resolution_obj: Resolution @@ -132,25 +128,6 @@ class DatetimeIndexOpsMixin(NDArrayBackedExtensionIndex): ) _hasnans = hasnans # for index / array -agnostic code - @classmethod - def _simple_new( - cls, - values: Union[DatetimeArray, TimedeltaArray, PeriodArray], - name: Optional[Hashable] = None, - ): - assert isinstance(values, cls._data_cls), type(values) - - result = object.__new__(cls) - result._data = values - result._name = name - result._cache = {} - - # For groupby perf. See note in indexes/base about _index_data - result._index_data = values._ndarray - - result._reset_identity() - return result - @property def _is_all_dates(self) -> bool: return True @@ -219,12 +196,10 @@ def equals(self, other: Any) -> bool: def __contains__(self, key: Any) -> bool: hash(key) try: - res = self.get_loc(key) + self.get_loc(key) except (KeyError, TypeError, ValueError): return False - return bool( - is_scalar(res) or isinstance(res, slice) or (is_list_like(res) and len(res)) - ) + return True @Appender(_index_shared_docs["take"] % _index_doc_kwargs) def take(self, indices, axis=0, allow_fill=True, fill_value=None, **kwargs): diff --git a/pandas/core/indexes/extension.py b/pandas/core/indexes/extension.py index ac70200c0c404..efdfd2b089345 100644 --- a/pandas/core/indexes/extension.py +++ b/pandas/core/indexes/extension.py @@ -2,7 +2,9 @@ Shared methods for Index subclasses backed by ExtensionArray. """ from typing import ( + Hashable, List, + Type, TypeVar, Union, ) @@ -30,7 +32,13 @@ ABCSeries, ) -from pandas.core.arrays import IntervalArray +from pandas.core.arrays import ( + Categorical, + DatetimeArray, + IntervalArray, + PeriodArray, + TimedeltaArray, +) from pandas.core.arrays._mixins import NDArrayBackedExtensionArray from pandas.core.indexers import deprecate_ndim_indexing from pandas.core.indexes.base import Index @@ -352,6 +360,32 @@ class NDArrayBackedExtensionIndex(ExtensionIndex): _data: NDArrayBackedExtensionArray + _data_cls: Union[ + Type[Categorical], + Type[DatetimeArray], + Type[TimedeltaArray], + Type[PeriodArray], + ] + + @classmethod + def _simple_new( + cls, + values: NDArrayBackedExtensionArray, + name: Hashable = None, + ): + assert isinstance(values, cls._data_cls), type(values) + + result = object.__new__(cls) + result._data = values + result._name = name + result._cache = {} + + # For groupby perf. See note in indexes/base about _index_data + result._index_data = values._ndarray + + result._reset_identity() + return result + def _get_engine_target(self) -> np.ndarray: return self._data._ndarray diff --git a/pandas/core/indexes/multi.py b/pandas/core/indexes/multi.py index fd0e0ef5fa799..1edc716a24872 100644 --- a/pandas/core/indexes/multi.py +++ b/pandas/core/indexes/multi.py @@ -6,6 +6,7 @@ TYPE_CHECKING, Any, Callable, + Collection, Hashable, Iterable, List, @@ -98,6 +99,7 @@ if TYPE_CHECKING: from pandas import ( CategoricalIndex, + DataFrame, Series, ) @@ -323,7 +325,7 @@ def __new__( if len(levels) == 0: raise ValueError("Must pass non-zero number of levels/codes") - result = object.__new__(MultiIndex) + result = object.__new__(cls) result._cache = {} # we've already validated levels and codes, so shortcut here @@ -503,7 +505,7 @@ def from_arrays(cls, arrays, sortorder=None, names=lib.no_default) -> MultiIndex @names_compat def from_tuples( cls, - tuples, + tuples: Iterable[Tuple[Hashable, ...]], sortorder: Optional[int] = None, names: Optional[Sequence[Hashable]] = None, ) -> MultiIndex: @@ -546,6 +548,7 @@ def from_tuples( raise TypeError("Input must be a list / sequence of tuple-likes.") elif is_iterator(tuples): tuples = list(tuples) + tuples = cast(Collection[Tuple[Hashable, ...]], tuples) arrays: List[Sequence[Hashable]] if len(tuples) == 0: @@ -560,7 +563,8 @@ def from_tuples( elif isinstance(tuples, list): arrays = list(lib.to_object_array_tuples(tuples).T) else: - arrays = zip(*tuples) + arrs = zip(*tuples) + arrays = cast(List[Sequence[Hashable]], arrs) return cls.from_arrays(arrays, sortorder=sortorder, names=names) @@ -626,7 +630,7 @@ def from_product( return cls(levels, codes, sortorder=sortorder, names=names) @classmethod - def from_frame(cls, df, sortorder=None, names=None) -> MultiIndex: + def from_frame(cls, df: DataFrame, sortorder=None, names=None) -> MultiIndex: """ Make a MultiIndex from a DataFrame. @@ -762,7 +766,7 @@ def __len__(self) -> int: # Levels Methods @cache_readonly - def levels(self): + def levels(self) -> FrozenList: # Use cache_readonly to ensure that self.get_locs doesn't repeatedly # create new IndexEngine # https://github.com/pandas-dev/pandas/issues/31648 @@ -1293,7 +1297,7 @@ def _formatter_func(self, tup): formatter_funcs = [level._formatter_func for level in self.levels] return tuple(func(val) for func, val in zip(formatter_funcs, tup)) - def _format_data(self, name=None): + def _format_data(self, name=None) -> str: """ Return the formatted data as a unicode string """ @@ -1419,10 +1423,10 @@ def format( # -------------------------------------------------------------------- # Names Methods - def _get_names(self): + def _get_names(self) -> FrozenList: return FrozenList(self._names) - def _set_names(self, names, level=None, validate=True): + def _set_names(self, names, level=None, validate: bool = True): """ Set new names on index. Each name has to be a hashable type. @@ -1433,7 +1437,7 @@ def _set_names(self, names, level=None, validate=True): level : int, level name, or sequence of int/level names (default None) If the index is a MultiIndex (hierarchical), level(s) to set (None for all levels). Otherwise level must be None - validate : boolean, default True + validate : bool, default True validate that the names match level lengths Raises @@ -1712,7 +1716,7 @@ def unique(self, level=None): level = self._get_level_number(level) return self._get_level_values(level=level, unique=True) - def to_frame(self, index=True, name=None): + def to_frame(self, index=True, name=None) -> DataFrame: """ Create a DataFrame with the levels of the MultiIndex as columns. @@ -2109,8 +2113,8 @@ def take( na_value = -1 + taken = [lab.take(indices) for lab in self.codes] if allow_fill: - taken = [lab.take(indices) for lab in self.codes] mask = indices == -1 if mask.any(): masked = [] @@ -2119,8 +2123,6 @@ def take( label_values[mask] = na_value masked.append(np.asarray(label_values)) taken = masked - else: - taken = [lab.take(indices) for lab in self.codes] return MultiIndex( levels=self.levels, codes=taken, names=self.names, verify_integrity=False @@ -2644,7 +2646,9 @@ def _get_partial_string_timestamp_match_key(self, key): return key - def _get_indexer(self, target: Index, method=None, limit=None, tolerance=None): + def _get_indexer( + self, target: Index, method=None, limit=None, tolerance=None + ) -> np.ndarray: # empty indexer if not len(target): @@ -3521,7 +3525,7 @@ def equals(self, other: object) -> bool: return True - def equal_levels(self, other) -> bool: + def equal_levels(self, other: MultiIndex) -> bool: """ Return True if the levels of both MultiIndex objects are the same @@ -3537,7 +3541,7 @@ def equal_levels(self, other) -> bool: # -------------------------------------------------------------------- # Set Methods - def _union(self, other, sort): + def _union(self, other, sort) -> MultiIndex: other, result_names = self._convert_can_do_setop(other) # We could get here with CategoricalIndex other @@ -3579,7 +3583,7 @@ def _maybe_match_names(self, other): names.append(None) return names - def _intersection(self, other, sort=False): + def _intersection(self, other, sort=False) -> MultiIndex: other, result_names = self._convert_can_do_setop(other) lvals = self._values diff --git a/pandas/core/indexes/numeric.py b/pandas/core/indexes/numeric.py index 96c8c1ab9b69c..a581516f23feb 100644 --- a/pandas/core/indexes/numeric.py +++ b/pandas/core/indexes/numeric.py @@ -232,6 +232,7 @@ def __contains__(self, key) -> bool: hash(key) try: if is_float(key) and int(key) != key: + # otherwise the `key in self._engine` check casts e.g. 1.1 -> 1 return False return key in self._engine except (OverflowError, TypeError, ValueError): diff --git a/pandas/core/indexes/range.py b/pandas/core/indexes/range.py index 0c2d4a1872c98..33525f19912d5 100644 --- a/pandas/core/indexes/range.py +++ b/pandas/core/indexes/range.py @@ -67,7 +67,7 @@ class RangeIndex(Int64Index): Parameters ---------- - start : int (default: 0), or other RangeIndex instance + start : int (default: 0), range, or other RangeIndex instance If int and "stop" is not given, interpreted as "stop" instead. stop : int (default: 0) step : int (default: 1) diff --git a/pandas/tests/groupby/test_function.py b/pandas/tests/groupby/test_function.py index cab5417e81445..515774eae009b 100644 --- a/pandas/tests/groupby/test_function.py +++ b/pandas/tests/groupby/test_function.py @@ -122,10 +122,6 @@ def test_intercept_builtin_sum(): tm.assert_series_equal(result2, expected) -# @pytest.mark.parametrize("f", [max, min, sum]) -# def test_builtins_apply(f): - - @pytest.mark.parametrize("f", [max, min, sum]) @pytest.mark.parametrize("keys", ["jim", ["jim", "joe"]]) # Single key # Multi-key def test_builtins_apply(keys, f):