From 1fa673ecf3c89baec22a6899a9b14e291a9f2ab6 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Torsten=20W=C3=B6rtwein?= Date: Sun, 3 Jul 2022 11:35:00 -0400 Subject: [PATCH 1/3] TYP: more return annotations in core/ --- pandas/_testing/__init__.py | 2 +- pandas/core/array_algos/replace.py | 2 +- pandas/core/arrays/_mixins.py | 6 +- pandas/core/arrays/arrow/_arrow_utils.py | 16 ++- pandas/core/arrays/arrow/array.py | 2 +- pandas/core/arrays/arrow/dtype.py | 2 +- pandas/core/arrays/base.py | 10 +- pandas/core/arrays/categorical.py | 167 +++++++++++++++++++---- pandas/core/arrays/datetimelike.py | 10 +- pandas/core/arrays/interval.py | 13 +- pandas/core/arrays/masked.py | 6 +- pandas/core/arrays/period.py | 4 +- pandas/core/arrays/sparse/accessor.py | 16 ++- pandas/core/arrays/sparse/array.py | 4 +- pandas/core/arrays/sparse/dtype.py | 4 +- pandas/core/arrays/string_.py | 4 +- pandas/core/arrays/string_arrow.py | 2 +- pandas/core/computation/common.py | 2 +- pandas/core/computation/expr.py | 6 +- pandas/core/computation/expressions.py | 4 +- pandas/core/computation/ops.py | 8 +- pandas/core/dtypes/base.py | 2 +- pandas/core/dtypes/cast.py | 4 +- pandas/core/dtypes/common.py | 4 +- pandas/core/dtypes/concat.py | 3 +- pandas/core/dtypes/dtypes.py | 13 +- pandas/core/exchange/column.py | 4 +- pandas/core/exchange/dataframe.py | 20 ++- pandas/core/exchange/from_dataframe.py | 6 +- pandas/core/groupby/base.py | 7 +- pandas/core/groupby/generic.py | 10 +- pandas/core/groupby/groupby.py | 30 ++-- pandas/core/groupby/grouper.py | 10 +- pandas/core/groupby/ops.py | 2 +- pandas/core/indexers/utils.py | 2 +- pandas/core/indexes/accessors.py | 7 +- pandas/core/indexes/base.py | 20 +-- pandas/core/indexes/category.py | 11 +- pandas/core/indexes/datetimelike.py | 2 +- pandas/core/indexes/multi.py | 12 +- pandas/core/indexes/numeric.py | 4 +- pandas/core/indexes/range.py | 3 +- pandas/core/internals/array_manager.py | 20 +-- pandas/core/internals/blocks.py | 25 ++-- pandas/core/internals/concat.py | 2 +- pandas/core/internals/construction.py | 2 +- pandas/core/ops/invalid.py | 2 +- pandas/core/ops/mask_ops.py | 2 +- pandas/core/ops/methods.py | 2 +- pandas/core/reshape/reshape.py | 6 +- pandas/core/reshape/util.py | 4 +- pandas/core/tools/datetimes.py | 2 +- pandas/core/window/ewm.py | 6 +- pandas/core/window/online.py | 2 +- pandas/io/formats/csvs.py | 6 +- pandas/io/formats/format.py | 4 +- pandas/io/formats/html.py | 2 + pandas/io/parsers/c_parser_wrapper.py | 2 +- pandas/io/stata.py | 4 +- 59 files changed, 378 insertions(+), 181 deletions(-) diff --git a/pandas/_testing/__init__.py b/pandas/_testing/__init__.py index 0fcea111716ec..5e90eae27f981 100644 --- a/pandas/_testing/__init__.py +++ b/pandas/_testing/__init__.py @@ -529,7 +529,7 @@ def getMixedTypeDict(): return index, data -def makeMixedDataFrame(): +def makeMixedDataFrame() -> DataFrame: return DataFrame(getMixedTypeDict()[1]) diff --git a/pandas/core/array_algos/replace.py b/pandas/core/array_algos/replace.py index 19a44dbfe6f6d..466eeb768f5f9 100644 --- a/pandas/core/array_algos/replace.py +++ b/pandas/core/array_algos/replace.py @@ -119,7 +119,7 @@ def _check_comparison_types( def replace_regex( values: ArrayLike, rx: re.Pattern, value, mask: npt.NDArray[np.bool_] | None -): +) -> None: """ Parameters ---------- diff --git a/pandas/core/arrays/_mixins.py b/pandas/core/arrays/_mixins.py index b15e0624963ea..f17d343024915 100644 --- a/pandas/core/arrays/_mixins.py +++ b/pandas/core/arrays/_mixins.py @@ -70,6 +70,8 @@ NumpyValueArrayLike, ) + from pandas import Series + def ravel_compat(meth: F) -> F: """ @@ -259,7 +261,7 @@ def _validate_shift_value(self, fill_value): # we can remove this and use validate_fill_value directly return self._validate_scalar(fill_value) - def __setitem__(self, key, value): + def __setitem__(self, key, value) -> None: key = check_array_indexer(self, key) value = self._validate_setitem_value(value) self._ndarray[key] = value @@ -433,7 +435,7 @@ def insert( # These are not part of the EA API, but we implement them because # pandas assumes they're there. - def value_counts(self, dropna: bool = True): + def value_counts(self, dropna: bool = True) -> Series: """ Return a Series containing counts of unique values. diff --git a/pandas/core/arrays/arrow/_arrow_utils.py b/pandas/core/arrays/arrow/_arrow_utils.py index e4bb7dc94cb8d..5893ca77193c4 100644 --- a/pandas/core/arrays/arrow/_arrow_utils.py +++ b/pandas/core/arrays/arrow/_arrow_utils.py @@ -13,7 +13,7 @@ from pandas.core.arrays.interval import VALID_CLOSED -def fallback_performancewarning(version: str | None = None): +def fallback_performancewarning(version: str | None = None) -> None: """ Raise a PerformanceWarning for falling back to ExtensionArray's non-pyarrow method @@ -24,7 +24,9 @@ def fallback_performancewarning(version: str | None = None): warnings.warn(msg, PerformanceWarning, stacklevel=find_stack_level()) -def pyarrow_array_to_numpy_and_mask(arr, dtype: np.dtype): +def pyarrow_array_to_numpy_and_mask( + arr, dtype: np.dtype +) -> tuple[np.ndarray, np.ndarray]: """ Convert a primitive pyarrow.Array to a numpy array and boolean mask based on the buffers of the Array. @@ -74,12 +76,12 @@ def __init__(self, freq) -> None: def freq(self): return self._freq - def __arrow_ext_serialize__(self): + def __arrow_ext_serialize__(self) -> bytes: metadata = {"freq": self.freq} return json.dumps(metadata).encode() @classmethod - def __arrow_ext_deserialize__(cls, storage_type, serialized): + def __arrow_ext_deserialize__(cls, storage_type, serialized) -> ArrowPeriodType: metadata = json.loads(serialized.decode()) return ArrowPeriodType(metadata["freq"]) @@ -122,7 +124,7 @@ def subtype(self): return self._subtype @property - def inclusive(self): + def inclusive(self) -> str: return self._closed @property @@ -134,12 +136,12 @@ def closed(self): ) return self._closed - def __arrow_ext_serialize__(self): + def __arrow_ext_serialize__(self) -> bytes: metadata = {"subtype": str(self.subtype), "inclusive": self.inclusive} return json.dumps(metadata).encode() @classmethod - def __arrow_ext_deserialize__(cls, storage_type, serialized): + def __arrow_ext_deserialize__(cls, storage_type, serialized) -> ArrowIntervalType: metadata = json.loads(serialized.decode()) subtype = pyarrow.type_for_alias(metadata["subtype"]) inclusive = metadata["inclusive"] diff --git a/pandas/core/arrays/arrow/array.py b/pandas/core/arrays/arrow/array.py index dfb58f0edd127..92aedbb836b38 100644 --- a/pandas/core/arrays/arrow/array.py +++ b/pandas/core/arrays/arrow/array.py @@ -368,7 +368,7 @@ def take( indices: TakeIndexer, allow_fill: bool = False, fill_value: Any = None, - ): + ) -> ArrowExtensionArray: """ Take elements from an array. diff --git a/pandas/core/arrays/arrow/dtype.py b/pandas/core/arrays/arrow/dtype.py index 346c4e8d19379..4a32663a68ed2 100644 --- a/pandas/core/arrays/arrow/dtype.py +++ b/pandas/core/arrays/arrow/dtype.py @@ -77,7 +77,7 @@ def construct_array_type(cls): return ArrowExtensionArray @classmethod - def construct_from_string(cls, string: str): + def construct_from_string(cls, string: str) -> ArrowDtype: """ Construct this type from a string. diff --git a/pandas/core/arrays/base.py b/pandas/core/arrays/base.py index 4274e6e5a911c..882cc76cf2d77 100644 --- a/pandas/core/arrays/base.py +++ b/pandas/core/arrays/base.py @@ -460,7 +460,7 @@ def __ne__(self, other: Any) -> ArrayLike: # type: ignore[override] """ return ~(self == other) - def __init_subclass__(cls, **kwargs): + def __init_subclass__(cls, **kwargs) -> None: factorize = getattr(cls, "factorize") if ( "use_na_sentinel" not in inspect.signature(factorize).parameters @@ -770,11 +770,11 @@ def argmax(self, skipna: bool = True) -> int: return nargminmax(self, "argmax") def fillna( - self, + self: ExtensionArrayT, value: object | ArrayLike | None = None, method: FillnaOptions | None = None, limit: int | None = None, - ): + ) -> ExtensionArrayT: """ Fill NA/NaN values using the specified method. @@ -1139,7 +1139,9 @@ def factorize( @Substitution(klass="ExtensionArray") @Appender(_extension_array_shared_docs["repeat"]) - def repeat(self, repeats: int | Sequence[int], axis: int | None = None): + def repeat( + self: ExtensionArrayT, repeats: int | Sequence[int], axis: int | None = None + ) -> ExtensionArrayT: nv.validate_repeat((), {"axis": axis}) ind = np.arange(len(self)).repeat(repeats) return self.take(ind) diff --git a/pandas/core/arrays/categorical.py b/pandas/core/arrays/categorical.py index 70699c45e0c36..2c3b7c2f2589d 100644 --- a/pandas/core/arrays/categorical.py +++ b/pandas/core/arrays/categorical.py @@ -7,6 +7,7 @@ from typing import ( TYPE_CHECKING, Hashable, + Literal, Sequence, TypeVar, Union, @@ -29,7 +30,10 @@ lib, ) from pandas._libs.arrays import NDArrayBacked -from pandas._libs.lib import no_default +from pandas._libs.lib import ( + NoDefault, + no_default, +) from pandas._typing import ( ArrayLike, AstypeArg, @@ -114,7 +118,11 @@ from pandas.io.formats import console if TYPE_CHECKING: - from pandas import Index + from pandas import ( + DataFrame, + Index, + Series, + ) CategoricalT = TypeVar("CategoricalT", bound="Categorical") @@ -193,7 +201,7 @@ def func(self, other): return func -def contains(cat, key, container): +def contains(cat, key, container) -> bool: """ Helper for membership check for ``key`` in ``cat``. @@ -462,9 +470,7 @@ def __init__( dtype = CategoricalDtype(ordered=False).update_dtype(dtype) arr = coerce_indexer_dtype(codes, dtype.categories) - # error: Argument 1 to "__init__" of "NDArrayBacked" has incompatible - # type "Union[ExtensionArray, ndarray]"; expected "ndarray" - super().__init__(arr, dtype) # type: ignore[arg-type] + super().__init__(arr, dtype) @property def dtype(self) -> CategoricalDtype: @@ -639,7 +645,7 @@ def _from_inferred_categories( @classmethod def from_codes( cls, codes, categories=None, ordered=None, dtype: Dtype | None = None - ): + ) -> Categorical: """ Make a Categorical type from codes and categories or dtype. @@ -707,7 +713,7 @@ def from_codes( # Categories/Codes/Ordered @property - def categories(self): + def categories(self) -> Index: """ The categories of this categorical. @@ -738,7 +744,7 @@ def categories(self): return self.dtype.categories @categories.setter - def categories(self, categories): + def categories(self, categories) -> None: new_dtype = CategoricalDtype(categories, ordered=self.ordered) if self.dtype.categories is not None and len(self.dtype.categories) != len( new_dtype.categories @@ -829,7 +835,20 @@ def _set_dtype(self, dtype: CategoricalDtype) -> Categorical: codes = recode_for_categories(self.codes, self.categories, dtype.categories) return type(self)(codes, dtype=dtype, fastpath=True) - def set_ordered(self, value, inplace=False): + @overload + def set_ordered(self, value, *, inplace: Literal[False] = ...) -> Categorical: + ... + + @overload + def set_ordered(self, value, *, inplace: Literal[True]) -> None: + ... + + @overload + def set_ordered(self, value, *, inplace: bool) -> Categorical | None: + ... + + @deprecate_nonkeyword_arguments(version=None, allowed_args=["self", "value"]) + def set_ordered(self, value, inplace: bool = False) -> Categorical | None: """ Set the ordered attribute to the boolean value. @@ -847,8 +866,18 @@ def set_ordered(self, value, inplace=False): NDArrayBacked.__init__(cat, cat._ndarray, new_dtype) if not inplace: return cat + return None + + @overload + def as_ordered(self, *, inplace: Literal[False] = ...) -> Categorical: + ... + + @overload + def as_ordered(self, *, inplace: Literal[True]) -> None: + ... - def as_ordered(self, inplace=False): + @deprecate_nonkeyword_arguments(version=None, allowed_args=["self"]) + def as_ordered(self, inplace: bool = False) -> Categorical | None: """ Set the Categorical to be ordered. @@ -866,7 +895,16 @@ def as_ordered(self, inplace=False): inplace = validate_bool_kwarg(inplace, "inplace") return self.set_ordered(True, inplace=inplace) - def as_unordered(self, inplace=False): + @overload + def as_unordered(self, *, inplace: Literal[False] = ...) -> Categorical: + ... + + @overload + def as_unordered(self, *, inplace: Literal[True]) -> None: + ... + + @deprecate_nonkeyword_arguments(version=None, allowed_args=["self"]) + def as_unordered(self, inplace: bool = False) -> Categorical | None: """ Set the Categorical to be unordered. @@ -973,7 +1011,22 @@ def set_categories( if not inplace: return cat - def rename_categories(self, new_categories, inplace=no_default): + @overload + def rename_categories( + self, new_categories, *, inplace: Literal[False] | NoDefault = ... + ) -> Categorical: + ... + + @overload + def rename_categories(self, new_categories, *, inplace: Literal[True]) -> None: + ... + + @deprecate_nonkeyword_arguments( + version=None, allowed_args=["self", "new_categories"] + ) + def rename_categories( + self, new_categories, inplace: bool | NoDefault = no_default + ) -> Categorical | None: """ Rename categories. @@ -1062,6 +1115,7 @@ def rename_categories(self, new_categories, inplace=no_default): cat.categories = new_categories if not inplace: return cat + return None def reorder_categories(self, new_categories, ordered=None, inplace=no_default): """ @@ -1124,7 +1178,22 @@ def reorder_categories(self, new_categories, ordered=None, inplace=no_default): simplefilter("ignore") return self.set_categories(new_categories, ordered=ordered, inplace=inplace) - def add_categories(self, new_categories, inplace=no_default): + @overload + def add_categories( + self, new_categories, *, inplace: Literal[False] | NoDefault = ... + ) -> Categorical: + ... + + @overload + def add_categories(self, new_categories, *, inplace: Literal[True]) -> None: + ... + + @deprecate_nonkeyword_arguments( + version=None, allowed_args=["self", "new_categories"] + ) + def add_categories( + self, new_categories, inplace: bool | NoDefault = no_default + ) -> Categorical | None: """ Add new categories. @@ -1199,6 +1268,7 @@ def add_categories(self, new_categories, inplace=no_default): NDArrayBacked.__init__(cat, codes, new_dtype) if not inplace: return cat + return None def remove_categories(self, removals, inplace=no_default): """ @@ -1280,7 +1350,20 @@ def remove_categories(self, removals, inplace=no_default): new_categories, ordered=self.ordered, rename=False, inplace=inplace ) - def remove_unused_categories(self, inplace=no_default): + @overload + def remove_unused_categories( + self, *, inplace: Literal[False] | NoDefault = ... + ) -> Categorical: + ... + + @overload + def remove_unused_categories(self, *, inplace: Literal[True]) -> None: + ... + + @deprecate_nonkeyword_arguments(version=None, allowed_args=["self"]) + def remove_unused_categories( + self, inplace: bool | NoDefault = no_default + ) -> Categorical | None: """ Remove categories which are not used. @@ -1348,6 +1431,7 @@ def remove_unused_categories(self, inplace=no_default): NDArrayBacked.__init__(cat, new_codes, new_dtype) if not inplace: return cat + return None # ------------------------------------------------------------------ @@ -1531,7 +1615,7 @@ def __array_ufunc__(self, ufunc: np.ufunc, method: str, *inputs, **kwargs): f"the numpy op {ufunc.__name__}" ) - def __setstate__(self, state): + def __setstate__(self, state) -> None: """Necessary for making this object picklable""" if not isinstance(state, dict): return super().__setstate__(state) @@ -1617,7 +1701,7 @@ def notna(self) -> np.ndarray: notnull = notna - def value_counts(self, dropna: bool = True): + def value_counts(self, dropna: bool = True) -> Series: """ Return a Series containing counts of each category. @@ -1700,7 +1784,7 @@ def _internal_get_values(self): return self.categories.astype("object").take(self._codes, fill_value=np.nan) return np.array(self) - def check_for_ordered(self, op): + def check_for_ordered(self, op) -> None: """assert that we are ordered""" if not self.ordered: raise TypeError( @@ -1763,9 +1847,26 @@ def argsort(self, ascending=True, kind="quicksort", **kwargs): """ return super().argsort(ascending=ascending, kind=kind, **kwargs) + @overload + def sort_values( + self, + *, + inplace: Literal[False] = ..., + ascending: bool = ..., + na_position: str = ..., + ) -> Categorical: + ... + + @overload + def sort_values( + self, *, inplace: Literal[True], ascending: bool = ..., na_position: str = ... + ) -> None: + ... + + @deprecate_nonkeyword_arguments(version=None, allowed_args=["self"]) def sort_values( self, inplace: bool = False, ascending: bool = True, na_position: str = "last" - ): + ) -> Categorical | None: """ Sort the Categorical by category value returning a new Categorical by default. @@ -1845,11 +1946,11 @@ def sort_values( sorted_idx = nargsort(self, ascending=ascending, na_position=na_position) - if inplace: - self._codes[:] = self._codes[sorted_idx] - else: + if not inplace: codes = self._codes[sorted_idx] return self._from_backing_data(codes) + self._codes[:] = self._codes[sorted_idx] + return None def _rank( self, @@ -1954,7 +2055,9 @@ def _unbox_scalar(self, key) -> int: # ------------------------------------------------------------------ - def take_nd(self, indexer, allow_fill: bool = False, fill_value=None): + def take_nd( + self, indexer, allow_fill: bool = False, fill_value=None + ) -> Categorical: # GH#27745 deprecate alias that other EAs dont have warn( "Categorical.take_nd is deprecated, use Categorical.take instead", @@ -2402,7 +2505,7 @@ def is_dtype_equal(self, other) -> bool: except (AttributeError, TypeError): return False - def describe(self): + def describe(self) -> DataFrame: """ Describes this Categorical @@ -2476,7 +2579,18 @@ def isin(self, values) -> npt.NDArray[np.bool_]: code_values = code_values[null_mask | (code_values >= 0)] return algorithms.isin(self.codes, code_values) - def replace(self, to_replace, value, inplace: bool = False): + @overload + def replace( + self, to_replace, value, *, inplace: Literal[False] = ... + ) -> Categorical: + ... + + @overload + def replace(self, to_replace, value, *, inplace: Literal[True]) -> None: + ... + + @deprecate_nonkeyword_arguments(version=None, allowed_args=["self", "value"]) + def replace(self, to_replace, value, inplace: bool = False) -> Categorical | None: """ Replaces all instances of one value with another @@ -2724,7 +2838,7 @@ def _delegate_property_set(self, name, new_values): return setattr(self._parent, name, new_values) @property - def codes(self): + def codes(self) -> Series: """ Return Series of codes as well as the index. """ @@ -2823,6 +2937,7 @@ def factorize_from_iterable(values) -> tuple[np.ndarray, Index]: if not is_list_like(values): raise TypeError("Input must be list-like") + categories: Index if is_categorical_dtype(values): values = extract_array(values) # The Categorical we want to build has the same categories diff --git a/pandas/core/arrays/datetimelike.py b/pandas/core/arrays/datetimelike.py index 48de7771cd8d7..a2251c49a2cc5 100644 --- a/pandas/core/arrays/datetimelike.py +++ b/pandas/core/arrays/datetimelike.py @@ -931,7 +931,7 @@ def freq(self): return self._freq @freq.setter - def freq(self, value): + def freq(self, value) -> None: if value is not None: value = to_offset(value) self._validate_frequency(self, value) @@ -1548,7 +1548,7 @@ def __rsub__(self, other): # We get here with e.g. datetime objects return -(self - other) - def __iadd__(self, other): + def __iadd__(self: DatetimeLikeArrayT, other) -> DatetimeLikeArrayT: result = self + other self[:] = result[:] @@ -1557,7 +1557,7 @@ def __iadd__(self, other): self._freq = result.freq return self - def __isub__(self, other): + def __isub__(self: DatetimeLikeArrayT, other) -> DatetimeLikeArrayT: result = self - other self[:] = result[:] @@ -2041,11 +2041,11 @@ def ceil(self, freq, ambiguous="raise", nonexistent="raise"): # -------------------------------------------------------------- # Reductions - def any(self, *, axis: int | None = None, skipna: bool = True): + def any(self, *, axis: int | None = None, skipna: bool = True) -> bool: # GH#34479 discussion of desired behavior long-term return nanops.nanany(self._ndarray, axis=axis, skipna=skipna, mask=self.isna()) - def all(self, *, axis: int | None = None, skipna: bool = True): + def all(self, *, axis: int | None = None, skipna: bool = True) -> bool: # GH#34479 discussion of desired behavior long-term return nanops.nanall(self._ndarray, axis=axis, skipna=skipna, mask=self.isna()) diff --git a/pandas/core/arrays/interval.py b/pandas/core/arrays/interval.py index 56aae3039f7d6..d4db5cfd78367 100644 --- a/pandas/core/arrays/interval.py +++ b/pandas/core/arrays/interval.py @@ -99,7 +99,10 @@ ) if TYPE_CHECKING: - from pandas import Index + from pandas import ( + Index, + Series, + ) IntervalArrayT = TypeVar("IntervalArrayT", bound="IntervalArray") @@ -708,7 +711,7 @@ def __getitem__( raise ValueError("multi-dimensional indexing not allowed") return self._shallow_copy(left, right) - def __setitem__(self, key, value): + def __setitem__(self, key, value) -> None: value_left, value_right = self._validate_setitem_value(value) key = check_array_indexer(self, key) @@ -837,7 +840,7 @@ def argsort( ascending=ascending, kind=kind, na_position=na_position, **kwargs ) - def min(self, *, axis: int | None = None, skipna: bool = True): + def min(self, *, axis: int | None = None, skipna: bool = True) -> IntervalOrNA: nv.validate_minmax_axis(axis, self.ndim) if not len(self): @@ -854,7 +857,7 @@ def min(self, *, axis: int | None = None, skipna: bool = True): indexer = obj.argsort()[0] return obj[indexer] - def max(self, *, axis: int | None = None, skipna: bool = True): + def max(self, *, axis: int | None = None, skipna: bool = True) -> IntervalOrNA: nv.validate_minmax_axis(axis, self.ndim) if not len(self): @@ -1172,7 +1175,7 @@ def _validate_setitem_value(self, value): return value_left, value_right - def value_counts(self, dropna: bool = True): + def value_counts(self, dropna: bool = True) -> Series: """ Returns a Series containing counts of each interval. diff --git a/pandas/core/arrays/masked.py b/pandas/core/arrays/masked.py index 2fce5fc747312..128c7e44f5075 100644 --- a/pandas/core/arrays/masked.py +++ b/pandas/core/arrays/masked.py @@ -322,13 +322,13 @@ def round(self, decimals: int = 0, *args, **kwargs): def __invert__(self: BaseMaskedArrayT) -> BaseMaskedArrayT: return type(self)(~self._data, self._mask.copy()) - def __neg__(self): + def __neg__(self: BaseMaskedArrayT) -> BaseMaskedArrayT: return type(self)(-self._data, self._mask.copy()) - def __pos__(self): + def __pos__(self: BaseMaskedArrayT) -> BaseMaskedArrayT: return self.copy() - def __abs__(self): + def __abs__(self: BaseMaskedArrayT) -> BaseMaskedArrayT: return type(self)(abs(self._data), self._mask.copy()) # ------------------------------------------------------------------ diff --git a/pandas/core/arrays/period.py b/pandas/core/arrays/period.py index 4d97345912250..fa7c4e0d0aa70 100644 --- a/pandas/core/arrays/period.py +++ b/pandas/core/arrays/period.py @@ -1009,7 +1009,9 @@ def validate_dtype_freq(dtype, freq): return freq -def dt64arr_to_periodarr(data, freq, tz=None): +def dt64arr_to_periodarr( + data, freq, tz=None +) -> tuple[npt.NDArray[np.int64], BaseOffset]: """ Convert an datetime-like array to values Period ordinals. diff --git a/pandas/core/arrays/sparse/accessor.py b/pandas/core/arrays/sparse/accessor.py index 41af7d4ccd506..d9d19d29e3d5c 100644 --- a/pandas/core/arrays/sparse/accessor.py +++ b/pandas/core/arrays/sparse/accessor.py @@ -1,5 +1,7 @@ """Sparse accessor""" +from typing import TYPE_CHECKING + import numpy as np from pandas.compat._optional import import_optional_dependency @@ -13,6 +15,12 @@ from pandas.core.arrays.sparse.array import SparseArray from pandas.core.arrays.sparse.dtype import SparseDtype +if TYPE_CHECKING: + from pandas import ( + DataFrame, + Series, + ) + class BaseAccessor: _validation_msg = "Can only use the '.sparse' accessor with Sparse data." @@ -49,7 +57,7 @@ def _delegate_method(self, name, *args, **kwargs): raise ValueError @classmethod - def from_coo(cls, A, dense_index=False): + def from_coo(cls, A, dense_index=False) -> Series: """ Create a Series with sparse values from a scipy.sparse.coo_matrix. @@ -180,7 +188,7 @@ def to_coo(self, row_levels=(0,), column_levels=(1,), sort_labels=False): ) return A, rows, columns - def to_dense(self): + def to_dense(self) -> Series: """ Convert a Series from sparse values to dense. @@ -228,7 +236,7 @@ def _validate(self, data): raise AttributeError(self._validation_msg) @classmethod - def from_spmatrix(cls, data, index=None, columns=None): + def from_spmatrix(cls, data, index=None, columns=None) -> DataFrame: """ Create a new DataFrame from a scipy sparse matrix. @@ -284,7 +292,7 @@ def from_spmatrix(cls, data, index=None, columns=None): arrays, columns=columns, index=index, verify_integrity=False ) - def to_dense(self): + def to_dense(self) -> DataFrame: """ Convert a DataFrame with sparse values to dense. diff --git a/pandas/core/arrays/sparse/array.py b/pandas/core/arrays/sparse/array.py index e7c745e902a49..5653d87a4570b 100644 --- a/pandas/core/arrays/sparse/array.py +++ b/pandas/core/arrays/sparse/array.py @@ -1395,7 +1395,7 @@ def _where(self, mask, value): # ------------------------------------------------------------------------ # IO # ------------------------------------------------------------------------ - def __setstate__(self, state): + def __setstate__(self, state) -> None: """Necessary for making this object picklable""" if isinstance(state, tuple): # Compat for pandas < 0.24.0 @@ -1410,7 +1410,7 @@ def __setstate__(self, state): else: self.__dict__.update(state) - def nonzero(self): + def nonzero(self) -> tuple[npt.NDArray[np.int32]]: if self.fill_value == 0: return (self.sp_index.indices,) else: diff --git a/pandas/core/arrays/sparse/dtype.py b/pandas/core/arrays/sparse/dtype.py index b6bb5faeebdee..859995cb3c230 100644 --- a/pandas/core/arrays/sparse/dtype.py +++ b/pandas/core/arrays/sparse/dtype.py @@ -179,7 +179,7 @@ def _is_boolean(self) -> bool: return is_bool_dtype(self.subtype) @property - def kind(self): + def kind(self) -> str: """ The sparse kind. Either 'integer', or 'block'. """ @@ -194,7 +194,7 @@ def subtype(self): return self._dtype @property - def name(self): + def name(self) -> str: return f"Sparse[{self.subtype.name}, {repr(self.fill_value)}]" def __repr__(self) -> str: diff --git a/pandas/core/arrays/string_.py b/pandas/core/arrays/string_.py index 45683d83a1303..083acf16ec758 100644 --- a/pandas/core/arrays/string_.py +++ b/pandas/core/arrays/string_.py @@ -51,6 +51,8 @@ if TYPE_CHECKING: import pyarrow + from pandas import Series + @register_extension_dtype class StringDtype(StorageExtensionDtype): @@ -461,7 +463,7 @@ def max(self, axis=None, skipna: bool = True, **kwargs) -> Scalar: ) return self._wrap_reduction_result(axis, result) - def value_counts(self, dropna: bool = True): + def value_counts(self, dropna: bool = True) -> Series: from pandas import value_counts result = value_counts(self._ndarray, dropna=dropna).astype("Int64") diff --git a/pandas/core/arrays/string_arrow.py b/pandas/core/arrays/string_arrow.py index c4d1a35315d7d..3e3df5a3200c1 100644 --- a/pandas/core/arrays/string_arrow.py +++ b/pandas/core/arrays/string_arrow.py @@ -178,7 +178,7 @@ def to_numpy( result[mask] = na_value return result - def insert(self, loc: int, item): + def insert(self, loc: int, item) -> ArrowStringArray: if not isinstance(item, str) and item is not libmissing.NA: raise TypeError("Scalar must be NA or str") return super().insert(loc, item) diff --git a/pandas/core/computation/common.py b/pandas/core/computation/common.py index 8a9583c465f50..ebf4d4ea9154e 100644 --- a/pandas/core/computation/common.py +++ b/pandas/core/computation/common.py @@ -5,7 +5,7 @@ from pandas._config import get_option -def ensure_decoded(s): +def ensure_decoded(s) -> str: """ If we have bytes, decode them to unicode. """ diff --git a/pandas/core/computation/expr.py b/pandas/core/computation/expr.py index 4b037ab564a87..90824ce8d856f 100644 --- a/pandas/core/computation/expr.py +++ b/pandas/core/computation/expr.py @@ -548,13 +548,13 @@ def visit_UnaryOp(self, node, **kwargs): def visit_Name(self, node, **kwargs): return self.term_type(node.id, self.env, **kwargs) - def visit_NameConstant(self, node, **kwargs): + def visit_NameConstant(self, node, **kwargs) -> Term: return self.const_type(node.value, self.env) - def visit_Num(self, node, **kwargs): + def visit_Num(self, node, **kwargs) -> Term: return self.const_type(node.n, self.env) - def visit_Constant(self, node, **kwargs): + def visit_Constant(self, node, **kwargs) -> Term: return self.const_type(node.n, self.env) def visit_Str(self, node, **kwargs): diff --git a/pandas/core/computation/expressions.py b/pandas/core/computation/expressions.py index 9e180f11c4211..e82bec47c6ac5 100644 --- a/pandas/core/computation/expressions.py +++ b/pandas/core/computation/expressions.py @@ -38,7 +38,7 @@ _MIN_ELEMENTS = 1_000_000 -def set_use_numexpr(v=True): +def set_use_numexpr(v=True) -> None: # set/unset to use numexpr global USE_NUMEXPR if NUMEXPR_INSTALLED: @@ -51,7 +51,7 @@ def set_use_numexpr(v=True): _where = _where_numexpr if USE_NUMEXPR else _where_standard -def set_numexpr_threads(n=None): +def set_numexpr_threads(n=None) -> None: # if we are using numexpr, set the threads to n # otherwise reset if NUMEXPR_INSTALLED and USE_NUMEXPR: diff --git a/pandas/core/computation/ops.py b/pandas/core/computation/ops.py index 3a556b57ea5a5..db5f28e2ae6c1 100644 --- a/pandas/core/computation/ops.py +++ b/pandas/core/computation/ops.py @@ -94,7 +94,7 @@ def __repr__(self) -> str: def __call__(self, *args, **kwargs): return self.value - def evaluate(self, *args, **kwargs): + def evaluate(self, *args, **kwargs) -> Term: return self def _resolve_name(self): @@ -107,7 +107,7 @@ def _resolve_name(self): ) return res - def update(self, value): + def update(self, value) -> None: """ search order for local (i.e., @variable) variables: @@ -447,7 +447,7 @@ def evaluate(self, env, engine: str, parser, term_type, eval_in_python): name = env.add_tmp(res) return term_type(name, env=env) - def convert_values(self): + def convert_values(self) -> None: """ Convert datetimes to a comparable value in an expression. """ @@ -564,7 +564,7 @@ def __init__(self, op: str, operand) -> None: f"valid operators are {UNARY_OPS_SYMS}" ) from err - def __call__(self, env): + def __call__(self, env) -> MathCall: operand = self.operand(env) # error: Cannot call function of unknown type return self.func(operand) # type: ignore[operator] diff --git a/pandas/core/dtypes/base.py b/pandas/core/dtypes/base.py index f96a9ab4cfb43..5ec2aaab98ba1 100644 --- a/pandas/core/dtypes/base.py +++ b/pandas/core/dtypes/base.py @@ -400,7 +400,7 @@ class StorageExtensionDtype(ExtensionDtype): def __init__(self, storage=None) -> None: self.storage = storage - def __repr__(self): + def __repr__(self) -> str: return f"{self.name}[{self.storage}]" def __str__(self): diff --git a/pandas/core/dtypes/cast.py b/pandas/core/dtypes/cast.py index d356a858a82fb..769656d1c4755 100644 --- a/pandas/core/dtypes/cast.py +++ b/pandas/core/dtypes/cast.py @@ -978,7 +978,7 @@ def maybe_upcast( return upcast_values, fill_value # type: ignore[return-value] -def invalidate_string_dtypes(dtype_set: set[DtypeObj]): +def invalidate_string_dtypes(dtype_set: set[DtypeObj]) -> None: """ Change string like dtypes to object for ``DataFrame.select_dtypes()``. @@ -995,7 +995,7 @@ def invalidate_string_dtypes(dtype_set: set[DtypeObj]): raise TypeError("string dtypes are not allowed, use 'object' instead") -def coerce_indexer_dtype(indexer, categories): +def coerce_indexer_dtype(indexer, categories) -> np.ndarray: """coerce the indexer input array to the smallest dtype possible""" length = len(categories) if length < _int8_max: diff --git a/pandas/core/dtypes/common.py b/pandas/core/dtypes/common.py index 519dfd9269df5..378f33e2b65ac 100644 --- a/pandas/core/dtypes/common.py +++ b/pandas/core/dtypes/common.py @@ -1041,7 +1041,7 @@ def is_datetime_or_timedelta_dtype(arr_or_dtype) -> bool: # This exists to silence numpy deprecation warnings, see GH#29553 -def is_numeric_v_string_like(a: ArrayLike, b): +def is_numeric_v_string_like(a: ArrayLike, b) -> bool: """ Check if we are comparing a string-like object to a numeric ndarray. NumPy doesn't like to compare such objects, especially numeric arrays @@ -1090,7 +1090,7 @@ def is_numeric_v_string_like(a: ArrayLike, b): # This exists to silence numpy deprecation warnings, see GH#29553 -def is_datetimelike_v_numeric(a, b): +def is_datetimelike_v_numeric(a, b) -> bool: """ Check if we are comparing a datetime-like object to a numeric object. By "numeric," we mean an object that is either of an int or float dtype. diff --git a/pandas/core/dtypes/concat.py b/pandas/core/dtypes/concat.py index c61e9aaa59362..25e008f42688d 100644 --- a/pandas/core/dtypes/concat.py +++ b/pandas/core/dtypes/concat.py @@ -32,6 +32,7 @@ ) if TYPE_CHECKING: + from pandas.core.arrays import Categorical from pandas.core.arrays.sparse import SparseArray @@ -156,7 +157,7 @@ def is_nonempty(x) -> bool: def union_categoricals( to_union, sort_categories: bool = False, ignore_order: bool = False -): +) -> Categorical: """ Combine list-like of Categorical-like, unioning categories. diff --git a/pandas/core/dtypes/dtypes.py b/pandas/core/dtypes/dtypes.py index 20fecbb0095c5..16e7559e4d153 100644 --- a/pandas/core/dtypes/dtypes.py +++ b/pandas/core/dtypes/dtypes.py @@ -27,6 +27,7 @@ from pandas._libs.tslibs import ( BaseOffset, NaT, + NaTType, Period, Timestamp, dtypes, @@ -945,7 +946,7 @@ def name(self) -> str_type: return f"period[{self.freq.freqstr}]" @property - def na_value(self): + def na_value(self) -> NaTType: return NaT def __hash__(self) -> int: @@ -972,7 +973,7 @@ def __eq__(self, other: Any) -> bool: def __ne__(self, other: Any) -> bool: return not self.__eq__(other) - def __setstate__(self, state): + def __setstate__(self, state) -> None: # for pickle compat. __getstate__ is defined in the # PandasExtensionDtype superclass and uses the public properties to # pickle -> need to set the settable private ones here (see GH26067) @@ -1034,7 +1035,9 @@ def __from_arrow__( for arr in chunks: data, mask = pyarrow_array_to_numpy_and_mask(arr, dtype=np.dtype(np.int64)) parr = PeriodArray(data.copy(), freq=self.freq, copy=False) - parr[~mask] = NaT + # error: Invalid index type "ndarray[Any, dtype[bool_]]" for "PeriodArray"; + # expected type "Union[int, Sequence[int], Sequence[bool], slice]" + parr[~mask] = NaT # type: ignore[index] results.append(parr) if not results: @@ -1230,7 +1233,7 @@ def construct_from_string(cls, string: str_type) -> IntervalDtype: raise TypeError(msg) @property - def type(self): + def type(self) -> type[Interval]: return Interval def __str__(self) -> str_type: @@ -1260,7 +1263,7 @@ def __eq__(self, other: Any) -> bool: return is_dtype_equal(self.subtype, other.subtype) - def __setstate__(self, state): + def __setstate__(self, state) -> None: # for pickle compat. __get_state__ is defined in the # PandasExtensionDtype superclass and uses the public properties to # pickle -> need to set the settable private ones here (see GH26067) diff --git a/pandas/core/exchange/column.py b/pandas/core/exchange/column.py index ae24c5d295cc9..fbf5fa0a36b01 100644 --- a/pandas/core/exchange/column.py +++ b/pandas/core/exchange/column.py @@ -1,3 +1,5 @@ +from __future__ import annotations + from typing import ( Any, Tuple, @@ -214,7 +216,7 @@ def get_chunks(self, n_chunks=None): else: yield self - def get_buffers(self): + def get_buffers(self) -> ColumnBuffers: """ Return a dictionary containing the underlying buffers. The returned dictionary has the following contents: diff --git a/pandas/core/exchange/dataframe.py b/pandas/core/exchange/dataframe.py index c8a89184b34c6..e5bb3811afed0 100644 --- a/pandas/core/exchange/dataframe.py +++ b/pandas/core/exchange/dataframe.py @@ -1,9 +1,15 @@ +from __future__ import annotations + from collections import abc +from typing import TYPE_CHECKING import pandas as pd from pandas.core.exchange.column import PandasColumn from pandas.core.exchange.dataframe_protocol import DataFrame as DataFrameXchg +if TYPE_CHECKING: + from pandas import Index + class PandasDataFrameXchg(DataFrameXchg): """ @@ -29,11 +35,13 @@ def __init__( self._nan_as_null = nan_as_null self._allow_copy = allow_copy - def __dataframe__(self, nan_as_null: bool = False, allow_copy: bool = True): + def __dataframe__( + self, nan_as_null: bool = False, allow_copy: bool = True + ) -> PandasDataFrameXchg: return PandasDataFrameXchg(self._df, nan_as_null, allow_copy) @property - def metadata(self): + def metadata(self) -> dict[str, Index]: # `index` isn't a regular column, and the protocol doesn't support row # labels - so we export it as Pandas-specific metadata here. return {"pandas.index": self._df.index} @@ -47,7 +55,7 @@ def num_rows(self) -> int: def num_chunks(self) -> int: return 1 - def column_names(self): + def column_names(self) -> Index: return self._df.columns def get_column(self, i: int) -> PandasColumn: @@ -56,13 +64,13 @@ def get_column(self, i: int) -> PandasColumn: def get_column_by_name(self, name: str) -> PandasColumn: return PandasColumn(self._df[name], allow_copy=self._allow_copy) - def get_columns(self): + def get_columns(self) -> list[PandasColumn]: return [ PandasColumn(self._df[name], allow_copy=self._allow_copy) for name in self._df.columns ] - def select_columns(self, indices): + def select_columns(self, indices) -> PandasDataFrameXchg: if not isinstance(indices, abc.Sequence): raise ValueError("`indices` is not a sequence") if not isinstance(indices, list): @@ -72,7 +80,7 @@ def select_columns(self, indices): self._df.iloc[:, indices], self._nan_as_null, self._allow_copy ) - def select_columns_by_name(self, names): + def select_columns_by_name(self, names) -> PandasDataFrameXchg: if not isinstance(names, abc.Sequence): raise ValueError("`names` is not a sequence") if not isinstance(names, list): diff --git a/pandas/core/exchange/from_dataframe.py b/pandas/core/exchange/from_dataframe.py index 805e63ac67f16..4f3c7a48ff9b9 100644 --- a/pandas/core/exchange/from_dataframe.py +++ b/pandas/core/exchange/from_dataframe.py @@ -1,6 +1,7 @@ import ctypes import re from typing import ( + TYPE_CHECKING, Any, Dict, List, @@ -24,6 +25,9 @@ Endianness, ) +if TYPE_CHECKING: + from pandas import DataFrame + _NP_DTYPES: Dict[DtypeKind, Dict[int, Any]] = { DtypeKind.INT: {8: np.int8, 16: np.int16, 32: np.int32, 64: np.int64}, DtypeKind.UINT: {8: np.uint8, 16: np.uint16, 32: np.uint32, 64: np.uint64}, @@ -32,7 +36,7 @@ } -def from_dataframe(df, allow_copy=True): +def from_dataframe(df, allow_copy=True) -> DataFrame: """ Build a ``pd.DataFrame`` from any DataFrame supporting the interchange protocol. diff --git a/pandas/core/groupby/base.py b/pandas/core/groupby/base.py index ec9a2e4a4b5c0..ad1f36e0cddd8 100644 --- a/pandas/core/groupby/base.py +++ b/pandas/core/groupby/base.py @@ -6,7 +6,10 @@ from __future__ import annotations import dataclasses -from typing import Hashable +from typing import ( + Hashable, + Literal, +) @dataclasses.dataclass(order=True, frozen=True) @@ -92,7 +95,7 @@ class OutputKey: # TODO(2.0) Remove after pad/backfill deprecation enforced -def maybe_normalize_deprecated_kernels(kernel): +def maybe_normalize_deprecated_kernels(kernel) -> Literal["bfill", "ffill"]: if kernel == "backfill": kernel = "bfill" elif kernel == "pad": diff --git a/pandas/core/groupby/generic.py b/pandas/core/groupby/generic.py index 63c861e084eda..9e26598d85e74 100644 --- a/pandas/core/groupby/generic.py +++ b/pandas/core/groupby/generic.py @@ -603,7 +603,7 @@ def value_counts( ascending: bool = False, bins=None, dropna: bool = True, - ): + ) -> Series: from pandas.core.reshape.merge import get_join_indexers from pandas.core.reshape.tile import cut @@ -747,7 +747,7 @@ def build_codes(lev_codes: np.ndarray) -> np.ndarray: return self.obj._constructor(out, index=mi, name=self.obj.name) @doc(Series.nlargest) - def nlargest(self, n: int = 5, keep: str = "first"): + def nlargest(self, n: int = 5, keep: str = "first") -> Series: f = partial(Series.nlargest, n=n, keep=keep) data = self._obj_with_exclusions # Don't change behavior if result index happens to be the same, i.e. @@ -756,7 +756,7 @@ def nlargest(self, n: int = 5, keep: str = "first"): return result @doc(Series.nsmallest) - def nsmallest(self, n: int = 5, keep: str = "first"): + def nsmallest(self, n: int = 5, keep: str = "first") -> Series: f = partial(Series.nsmallest, n=n, keep=keep) data = self._obj_with_exclusions # Don't change behavior if result index happens to be the same, i.e. @@ -1600,7 +1600,7 @@ def idxmax( axis=0, skipna: bool = True, numeric_only: bool | lib.NoDefault = lib.no_default, - ): + ) -> DataFrame: axis = DataFrame._get_axis_number(axis) if numeric_only is lib.no_default: # Cannot use self._resolve_numeric_only; we must pass None to @@ -1639,7 +1639,7 @@ def idxmin( axis=0, skipna: bool = True, numeric_only: bool | lib.NoDefault = lib.no_default, - ): + ) -> DataFrame: axis = DataFrame._get_axis_number(axis) if numeric_only is lib.no_default: # Cannot use self._resolve_numeric_only; we must pass None to diff --git a/pandas/core/groupby/groupby.py b/pandas/core/groupby/groupby.py index 7e8a732a2e30d..89e47af4cb614 100644 --- a/pandas/core/groupby/groupby.py +++ b/pandas/core/groupby/groupby.py @@ -18,6 +18,7 @@ class providing the base-class of operations. from textwrap import dedent import types from typing import ( + TYPE_CHECKING, Callable, Hashable, Iterable, @@ -122,6 +123,13 @@ class providing the base-class of operations. maybe_use_numba, ) +if TYPE_CHECKING: + from pandas.core.window import ( + ExpandingGroupby, + ExponentialMovingWindowGroupby, + RollingGroupby, + ) + _common_see_also = """ See Also -------- @@ -663,7 +671,7 @@ def ngroups(self) -> int: @final @property - def indices(self): + def indices(self) -> dict[Hashable, npt.NDArray[np.intp]]: """ Dict {group name -> group indices}. """ @@ -2758,7 +2766,7 @@ def resample(self, rule, *args, **kwargs): @final @Substitution(name="groupby") @Appender(_common_see_also) - def rolling(self, *args, **kwargs): + def rolling(self, *args, **kwargs) -> RollingGroupby: """ Return a rolling grouper, providing rolling functionality per group. """ @@ -2775,7 +2783,7 @@ def rolling(self, *args, **kwargs): @final @Substitution(name="groupby") @Appender(_common_see_also) - def expanding(self, *args, **kwargs): + def expanding(self, *args, **kwargs) -> ExpandingGroupby: """ Return an expanding grouper, providing expanding functionality per group. @@ -2792,7 +2800,7 @@ def expanding(self, *args, **kwargs): @final @Substitution(name="groupby") @Appender(_common_see_also) - def ewm(self, *args, **kwargs): + def ewm(self, *args, **kwargs) -> ExponentialMovingWindowGroupby: """ Return an ewm grouper, providing ewm functionality per group. """ @@ -3484,7 +3492,7 @@ def rank( na_option: str = "keep", pct: bool = False, axis: int = 0, - ): + ) -> NDFrameT: """ Provide the rank of values within each group. @@ -3575,7 +3583,7 @@ def rank( @final @Substitution(name="groupby") @Appender(_common_see_also) - def cumprod(self, axis=0, *args, **kwargs): + def cumprod(self, axis=0, *args, **kwargs) -> NDFrameT: """ Cumulative product for each group. @@ -3593,7 +3601,7 @@ def cumprod(self, axis=0, *args, **kwargs): @final @Substitution(name="groupby") @Appender(_common_see_also) - def cumsum(self, axis=0, *args, **kwargs): + def cumsum(self, axis=0, *args, **kwargs) -> NDFrameT: """ Cumulative sum for each group. @@ -3611,7 +3619,7 @@ def cumsum(self, axis=0, *args, **kwargs): @final @Substitution(name="groupby") @Appender(_common_see_also) - def cummin(self, axis=0, numeric_only=False, **kwargs): + def cummin(self, axis=0, numeric_only=False, **kwargs) -> NDFrameT: """ Cumulative min for each group. @@ -3631,7 +3639,7 @@ def cummin(self, axis=0, numeric_only=False, **kwargs): @final @Substitution(name="groupby") @Appender(_common_see_also) - def cummax(self, axis=0, numeric_only=False, **kwargs): + def cummax(self, axis=0, numeric_only=False, **kwargs) -> NDFrameT: """ Cumulative max for each group. @@ -3921,7 +3929,7 @@ def pct_change(self, periods=1, fill_method="ffill", limit=None, freq=None, axis @final @Substitution(name="groupby") @Substitution(see_also=_common_see_also) - def head(self, n=5): + def head(self, n: int = 5) -> NDFrameT: """ Return first n rows of each group. @@ -3960,7 +3968,7 @@ def head(self, n=5): @final @Substitution(name="groupby") @Substitution(see_also=_common_see_also) - def tail(self, n=5): + def tail(self, n: int = 5) -> NDFrameT: """ Return last n rows of each group. diff --git a/pandas/core/groupby/grouper.py b/pandas/core/groupby/grouper.py index 0c9b64dc8cec3..b9f4166b475ca 100644 --- a/pandas/core/groupby/grouper.py +++ b/pandas/core/groupby/grouper.py @@ -679,10 +679,16 @@ def _codes_and_uniques(self) -> tuple[npt.NDArray[np.signedinteger], ArrayLike]: elif isinstance(self.grouping_vector, ops.BaseGrouper): # we have a list of groupers codes = self.grouping_vector.codes_info - uniques = self.grouping_vector.result_index._values + # error: Incompatible types in assignment (expression has type "Union + # [ExtensionArray, ndarray[Any, Any]]", variable has type "Categorical") + uniques = ( + self.grouping_vector.result_index._values # type: ignore[assignment] + ) else: # GH35667, replace dropna=False with use_na_sentinel=False - codes, uniques = algorithms.factorize( + # error: Incompatible types in assignment (expression has type "Union[ + # ndarray[Any, Any], Index]", variable has type "Categorical") + codes, uniques = algorithms.factorize( # type: ignore[assignment] self.grouping_vector, sort=self._sort, use_na_sentinel=self._dropna ) return codes, uniques diff --git a/pandas/core/groupby/ops.py b/pandas/core/groupby/ops.py index d056b4b03d904..6dc4ccfa8e1ee 100644 --- a/pandas/core/groupby/ops.py +++ b/pandas/core/groupby/ops.py @@ -735,7 +735,7 @@ def groupings(self) -> list[grouper.Grouping]: def shape(self) -> Shape: return tuple(ping.ngroups for ping in self.groupings) - def __iter__(self): + def __iter__(self) -> Iterator[Hashable]: return iter(self.indices) @property diff --git a/pandas/core/indexers/utils.py b/pandas/core/indexers/utils.py index f098066d1c7d7..0f3cdc4195c85 100644 --- a/pandas/core/indexers/utils.py +++ b/pandas/core/indexers/utils.py @@ -240,7 +240,7 @@ def validate_indices(indices: np.ndarray, n: int) -> None: # Indexer Conversion -def maybe_convert_indices(indices, n: int, verify: bool = True): +def maybe_convert_indices(indices, n: int, verify: bool = True) -> np.ndarray: """ Attempt to convert indices into valid, positive indices. diff --git a/pandas/core/indexes/accessors.py b/pandas/core/indexes/accessors.py index 8694ad94dae26..46959aa5cd3e2 100644 --- a/pandas/core/indexes/accessors.py +++ b/pandas/core/indexes/accessors.py @@ -38,7 +38,10 @@ from pandas.core.indexes.timedeltas import TimedeltaIndex if TYPE_CHECKING: - from pandas import Series + from pandas import ( + DataFrame, + Series, + ) class Properties(PandasDelegate, PandasObject, NoNewAttributesMixin): @@ -241,7 +244,7 @@ def to_pydatetime(self) -> np.ndarray: def freq(self): return self._get_values().inferred_freq - def isocalendar(self): + def isocalendar(self) -> DataFrame: """ Calculate year, week, and day according to the ISO 8601 standard. diff --git a/pandas/core/indexes/base.py b/pandas/core/indexes/base.py index 87f2ae41cc98e..667ce4664c359 100644 --- a/pandas/core/indexes/base.py +++ b/pandas/core/indexes/base.py @@ -1552,7 +1552,7 @@ def _summary(self, name=None) -> str_t: # -------------------------------------------------------------------- # Conversion Methods - def to_flat_index(self): + def to_flat_index(self: _IndexT) -> _IndexT: """ Identity method. @@ -1709,14 +1709,14 @@ def to_frame( # Name-Centric Methods @property - def name(self): + def name(self) -> Hashable: """ Return Index or MultiIndex name. """ return self._name @name.setter - def name(self, value: Hashable): + def name(self, value: Hashable) -> None: if self._no_setting_name: # Used in MultiIndex.levels to avoid silently ignoring name updates. raise RuntimeError( @@ -5947,7 +5947,7 @@ def _get_values_for_loc(self, series: Series, loc, key): return series.iloc[loc] @final - def set_value(self, arr, key, value): + def set_value(self, arr, key, value) -> None: """ Fast lookup of value from 1-dimensional ndarray. @@ -7008,16 +7008,16 @@ def _unary_method(self, op): result = op(self._values) return Index(result, name=self.name) - def __abs__(self): + def __abs__(self) -> Index: return self._unary_method(operator.abs) - def __neg__(self): + def __neg__(self) -> Index: return self._unary_method(operator.neg) - def __pos__(self): + def __pos__(self) -> Index: return self._unary_method(operator.pos) - def __invert__(self): + def __invert__(self) -> Index: # GH#8875 return self._unary_method(operator.inv) @@ -7131,7 +7131,7 @@ def _maybe_disable_logical_methods(self, opname: str_t) -> None: make_invalid_op(opname)(self) @Appender(IndexOpsMixin.argmin.__doc__) - def argmin(self, axis=None, skipna=True, *args, **kwargs): + def argmin(self, axis=None, skipna=True, *args, **kwargs) -> int: nv.validate_argmin(args, kwargs) nv.validate_minmax_axis(axis) @@ -7143,7 +7143,7 @@ def argmin(self, axis=None, skipna=True, *args, **kwargs): return super().argmin(skipna=skipna) @Appender(IndexOpsMixin.argmax.__doc__) - def argmax(self, axis=None, skipna=True, *args, **kwargs): + def argmax(self, axis=None, skipna=True, *args, **kwargs) -> int: nv.validate_argmax(args, kwargs) nv.validate_minmax_axis(axis) diff --git a/pandas/core/indexes/category.py b/pandas/core/indexes/category.py index c2bcd90ff10fb..9a70a4a1aa615 100644 --- a/pandas/core/indexes/category.py +++ b/pandas/core/indexes/category.py @@ -422,6 +422,7 @@ def reindex( stacklevel=find_stack_level(), ) + new_target: Index if len(self) and indexer is not None: new_target = self.take(indexer) else: @@ -434,8 +435,8 @@ def reindex( if not isinstance(target, CategoricalIndex) or (cats == -1).any(): new_target, indexer, _ = super()._reindex_non_unique(target) else: - - codes = new_target.codes.copy() + # error: "Index" has no attribute "codes" + codes = new_target.codes.copy() # type: ignore[attr-defined] codes[indexer == -1] = cats[missing] cat = self._data._from_backing_data(codes) new_target = type(self)._simple_new(cat, name=self.name) @@ -450,8 +451,8 @@ def reindex( new_target = type(self)._simple_new(cat, name=self.name) else: # e.g. test_reindex_with_categoricalindex, test_reindex_duplicate_target - new_target = np.asarray(new_target) - new_target = Index._with_infer(new_target, name=self.name) + new_target_array = np.asarray(new_target) + new_target = Index._with_infer(new_target_array, name=self.name) return new_target, indexer @@ -488,7 +489,7 @@ def _maybe_cast_listlike_indexer(self, values) -> CategoricalIndex: def _is_comparable_dtype(self, dtype: DtypeObj) -> bool: return self.categories._is_comparable_dtype(dtype) - def take_nd(self, *args, **kwargs): + def take_nd(self, *args, **kwargs) -> CategoricalIndex: """Alias for `take`""" warnings.warn( "CategoricalIndex.take_nd is deprecated, use CategoricalIndex.take " diff --git a/pandas/core/indexes/datetimelike.py b/pandas/core/indexes/datetimelike.py index 811dc72e9b908..8014d010afc1b 100644 --- a/pandas/core/indexes/datetimelike.py +++ b/pandas/core/indexes/datetimelike.py @@ -672,7 +672,7 @@ def _get_insert_freq(self, loc: int, item): return freq @doc(NDArrayBackedExtensionIndex.delete) - def delete(self, loc): + def delete(self, loc) -> DatetimeTimedeltaMixin: result = super().delete(loc) result._data._freq = self._get_delete_freq(loc) return result diff --git a/pandas/core/indexes/multi.py b/pandas/core/indexes/multi.py index 583612b4659b6..fd6b6ba63d7e0 100644 --- a/pandas/core/indexes/multi.py +++ b/pandas/core/indexes/multi.py @@ -308,7 +308,7 @@ def __new__( copy=False, name=None, verify_integrity: bool = True, - ): + ) -> MultiIndex: # compat with Index if name is not None: @@ -503,7 +503,7 @@ def from_tuples( cls, tuples: Iterable[tuple[Hashable, ...]], sortorder: int | None = None, - names: Sequence[Hashable] | None = None, + names: Sequence[Hashable] | Hashable | None = None, ) -> MultiIndex: """ Convert list of tuples to MultiIndex. @@ -562,7 +562,9 @@ def from_tuples( if len(tuples) == 0: if names is None: raise TypeError("Cannot infer number of levels from empty list") - arrays = [[]] * len(names) + # error: Argument 1 to "len" has incompatible type "Hashable"; + # expected "Sized" + arrays = [[]] * len(names) # type: ignore[arg-type] elif isinstance(tuples, (np.ndarray, Index)): if isinstance(tuples, Index): tuples = np.asarray(tuples._values) @@ -1826,7 +1828,9 @@ def to_frame( result.index = self return result - def to_flat_index(self) -> Index: + # error: Return type "Index" of "to_flat_index" incompatible with return type + # "MultiIndex" in supertype "Index" + def to_flat_index(self) -> Index: # type: ignore[override] """ Convert a MultiIndex to an Index of Tuples containing the level values. diff --git a/pandas/core/indexes/numeric.py b/pandas/core/indexes/numeric.py index c1cb5ad315298..f270a6e8b555f 100644 --- a/pandas/core/indexes/numeric.py +++ b/pandas/core/indexes/numeric.py @@ -120,7 +120,9 @@ def inferred_type(self) -> str: "c": "complex", }[self.dtype.kind] - def __new__(cls, data=None, dtype: Dtype | None = None, copy=False, name=None): + def __new__( + cls, data=None, dtype: Dtype | None = None, copy=False, name=None + ) -> NumericIndex: name = maybe_extract_name(name, data, cls) subarr = cls._ensure_array(data, dtype, copy) diff --git a/pandas/core/indexes/range.py b/pandas/core/indexes/range.py index 5b384fbc97c1a..2d6d121a089c0 100644 --- a/pandas/core/indexes/range.py +++ b/pandas/core/indexes/range.py @@ -8,6 +8,7 @@ Any, Callable, Hashable, + Iterator, List, cast, ) @@ -426,7 +427,7 @@ def tolist(self) -> list[int]: return list(self._range) @doc(Int64Index.__iter__) - def __iter__(self): + def __iter__(self) -> Iterator[int]: yield from self._range @doc(Int64Index._shallow_copy) diff --git a/pandas/core/internals/array_manager.py b/pandas/core/internals/array_manager.py index 7d2e4129461a7..3a8ed54d6c634 100644 --- a/pandas/core/internals/array_manager.py +++ b/pandas/core/internals/array_manager.py @@ -171,13 +171,13 @@ def set_axis(self, axis: int, new_labels: Index) -> None: axis = self._normalize_axis(axis) self._axes[axis] = new_labels - def get_dtypes(self): + def get_dtypes(self) -> np.ndarray: return np.array([arr.dtype for arr in self.arrays], dtype="object") def __getstate__(self): return self.arrays, self._axes - def __setstate__(self, state): + def __setstate__(self, state) -> None: self.arrays = state[0] self._axes = state[1] @@ -348,7 +348,7 @@ def where(self: T, other, cond, align: bool) -> T: def setitem(self: T, indexer, value) -> T: return self.apply_with_block("setitem", indexer=indexer, value=value) - def putmask(self, mask, new, align: bool = True): + def putmask(self: T, mask, new, align: bool = True) -> T: if align: align_keys = ["new", "mask"] else: @@ -451,7 +451,7 @@ def replace_list( regex=regex, ) - def to_native_types(self, **kwargs): + def to_native_types(self: T, **kwargs) -> T: return self.apply(to_native_types, **kwargs) @property @@ -815,7 +815,7 @@ def column_arrays(self) -> list[ArrayLike]: def iset( self, loc: int | slice | np.ndarray, value: ArrayLike, inplace: bool = False - ): + ) -> None: """ Set new column(s). @@ -923,7 +923,7 @@ def insert(self, loc: int, item: Hashable, value: ArrayLike) -> None: self.arrays = arrays self._axes[1] = new_axis - def idelete(self, indexer): + def idelete(self, indexer) -> ArrayManager: """ Delete selected locations in-place (new block and array, same BlockManager) """ @@ -1240,7 +1240,7 @@ def make_empty(self, axes=None) -> SingleArrayManager: return type(self)([array], axes) @classmethod - def from_array(cls, array, index): + def from_array(cls, array, index) -> SingleArrayManager: return cls([array], [index]) @property @@ -1305,7 +1305,7 @@ def apply(self, func, **kwargs): new_array = getattr(self.array, func)(**kwargs) return type(self)([new_array], self._axes) - def setitem(self, indexer, value): + def setitem(self, indexer, value) -> SingleArrayManager: """ Set values with indexer. @@ -1336,7 +1336,7 @@ def _get_data_subset(self, predicate: Callable) -> SingleArrayManager: else: return self.make_empty() - def set_values(self, values: ArrayLike): + def set_values(self, values: ArrayLike) -> None: """ Set (replace) the values of the SingleArrayManager in place. @@ -1372,7 +1372,7 @@ def __init__(self, n: int) -> None: self.n = n @property - def shape(self): + def shape(self) -> tuple[int]: return (self.n,) def to_array(self, dtype: DtypeObj) -> ArrayLike: diff --git a/pandas/core/internals/blocks.py b/pandas/core/internals/blocks.py index 49efecec7472e..df327716970f1 100644 --- a/pandas/core/internals/blocks.py +++ b/pandas/core/internals/blocks.py @@ -215,7 +215,7 @@ def mgr_locs(self) -> BlockPlacement: return self._mgr_locs @mgr_locs.setter - def mgr_locs(self, new_mgr_locs: BlockPlacement): + def mgr_locs(self, new_mgr_locs: BlockPlacement) -> None: self._mgr_locs = new_mgr_locs @final @@ -504,7 +504,7 @@ def dtype(self) -> DtypeObj: @final def astype( self, dtype: DtypeObj, copy: bool = False, errors: IgnoreRaise = "raise" - ): + ) -> Block: """ Coerce to the new dtype. @@ -536,13 +536,13 @@ def astype( return newb @final - def to_native_types(self, na_rep="nan", quoting=None, **kwargs): + def to_native_types(self, na_rep="nan", quoting=None, **kwargs) -> Block: """convert to our native types format""" result = to_native_types(self.values, na_rep=na_rep, quoting=quoting, **kwargs) return self.make_block(result) @final - def copy(self, deep: bool = True): + def copy(self, deep: bool = True) -> Block: """copy constructor""" values = self.values if deep: @@ -575,7 +575,11 @@ def replace( if isinstance(values, Categorical): # TODO: avoid special-casing blk = self if inplace else self.copy() - blk.values._replace(to_replace=to_replace, value=value, inplace=True) + # error: Item "ExtensionArray" of "Union[ndarray[Any, Any], + # ExtensionArray]" has no attribute "_replace" + blk.values._replace( # type: ignore[union-attr] + to_replace=to_replace, value=value, inplace=True + ) return [blk] if not self._can_hold_element(to_replace): @@ -725,10 +729,13 @@ def replace_list( assert not isinstance(mib, bool) m = mib[blk_num : blk_num + 1] + # error: Argument "mask" to "_replace_coerce" of "Block" has + # incompatible type "Union[ExtensionArray, ndarray[Any, Any], bool]"; + # expected "ndarray[Any, dtype[bool_]]" result = blk._replace_coerce( to_replace=src, value=dest, - mask=m, + mask=m, # type: ignore[arg-type] inplace=inplace, regex=regex, ) @@ -815,7 +822,7 @@ def _unwrap_setitem_indexer(self, indexer): def shape(self) -> Shape: return self.values.shape - def iget(self, i: int | tuple[int, int] | tuple[slice, int]): + def iget(self, i: int | tuple[int, int] | tuple[slice, int]) -> np.ndarray: # In the case where we have a tuple[slice, int], the slice will always # be slice(None) # Note: only reached with self.ndim == 2 @@ -924,7 +931,7 @@ def _unstack( # --------------------------------------------------------------------- - def setitem(self, indexer, value): + def setitem(self, indexer, value) -> Block: """ Attempt self.values[indexer] = value, possibly creating a new array. @@ -2156,7 +2163,7 @@ def new_block(values, placement, *, ndim: int) -> Block: return klass(values, ndim=ndim, placement=placement) -def check_ndim(values, placement: BlockPlacement, ndim: int): +def check_ndim(values, placement: BlockPlacement, ndim: int) -> None: """ ndim inference and validation. diff --git a/pandas/core/internals/concat.py b/pandas/core/internals/concat.py index 4a352d614e1d9..77197dac3363b 100644 --- a/pandas/core/internals/concat.py +++ b/pandas/core/internals/concat.py @@ -381,7 +381,7 @@ def needs_filling(self) -> bool: return False @cache_readonly - def dtype(self): + def dtype(self) -> DtypeObj: blk = self.block if blk.values.dtype.kind == "V": raise AssertionError("Block is None, no dtype") diff --git a/pandas/core/internals/construction.py b/pandas/core/internals/construction.py index 626809eab304e..c1d0ab730fe7e 100644 --- a/pandas/core/internals/construction.py +++ b/pandas/core/internals/construction.py @@ -167,7 +167,7 @@ def rec_array_to_mgr( dtype: DtypeObj | None, copy: bool, typ: str, -): +) -> Manager: """ Extract from a masked rec array and create the manager. """ diff --git a/pandas/core/ops/invalid.py b/pandas/core/ops/invalid.py index cc4a1f11edd2b..e069c765d5299 100644 --- a/pandas/core/ops/invalid.py +++ b/pandas/core/ops/invalid.py @@ -6,7 +6,7 @@ import numpy as np -def invalid_comparison(left, right, op): +def invalid_comparison(left, right, op) -> np.ndarray: """ If a comparison has mismatched types and is not necessarily meaningful, follow python3 conventions by: diff --git a/pandas/core/ops/mask_ops.py b/pandas/core/ops/mask_ops.py index 57bacba0d4bee..adc1f63c568bf 100644 --- a/pandas/core/ops/mask_ops.py +++ b/pandas/core/ops/mask_ops.py @@ -184,6 +184,6 @@ def kleene_and( return result, mask -def raise_for_nan(value, method: str): +def raise_for_nan(value, method: str) -> None: if lib.is_float(value) and np.isnan(value): raise ValueError(f"Cannot perform logical '{method}' with floating NaN") diff --git a/pandas/core/ops/methods.py b/pandas/core/ops/methods.py index df22919ed19f1..d1f704635ba64 100644 --- a/pandas/core/ops/methods.py +++ b/pandas/core/ops/methods.py @@ -43,7 +43,7 @@ def _get_method_wrappers(cls): return arith_flex, comp_flex -def add_flex_arithmetic_methods(cls): +def add_flex_arithmetic_methods(cls) -> None: """ Adds the full suite of flex arithmetic methods (``pow``, ``mul``, ``add``) to the class. diff --git a/pandas/core/reshape/reshape.py b/pandas/core/reshape/reshape.py index b4e944861f1bc..d4f4057af7bfd 100644 --- a/pandas/core/reshape/reshape.py +++ b/pandas/core/reshape/reshape.py @@ -152,7 +152,7 @@ def _indexer_and_to_sort( return indexer, to_sort @cache_readonly - def sorted_labels(self): + def sorted_labels(self) -> list[np.ndarray]: indexer, to_sort = self._indexer_and_to_sort return [line.take(indexer) for line in to_sort] @@ -199,7 +199,7 @@ def arange_result(self) -> tuple[npt.NDArray[np.intp], npt.NDArray[np.bool_]]: return new_values, mask.any(0) # TODO: in all tests we have mask.any(0).all(); can we rely on that? - def get_result(self, values, value_columns, fill_value): + def get_result(self, values, value_columns, fill_value) -> DataFrame: if values.ndim == 1: values = values[:, np.newaxis] @@ -346,7 +346,7 @@ def _repeater(self) -> np.ndarray: return repeater @cache_readonly - def new_index(self): + def new_index(self) -> MultiIndex: # Does not depend on values or value_columns result_codes = [lab.take(self.compressor) for lab in self.sorted_labels[:-1]] diff --git a/pandas/core/reshape/util.py b/pandas/core/reshape/util.py index 9f9143f4aaa60..459928acc0da3 100644 --- a/pandas/core/reshape/util.py +++ b/pandas/core/reshape/util.py @@ -1,3 +1,5 @@ +from __future__ import annotations + import numpy as np from pandas._typing import NumpyIndexT @@ -5,7 +7,7 @@ from pandas.core.dtypes.common import is_list_like -def cartesian_product(X): +def cartesian_product(X) -> list[np.ndarray]: """ Numpy version of itertools.product. Sometimes faster (for large inputs)... diff --git a/pandas/core/tools/datetimes.py b/pandas/core/tools/datetimes.py index d4d61df915acb..7de34c04a31ed 100644 --- a/pandas/core/tools/datetimes.py +++ b/pandas/core/tools/datetimes.py @@ -266,7 +266,7 @@ def _box_as_indexlike( def _convert_and_box_cache( arg: DatetimeScalarOrArrayConvertible, cache_array: Series, - name: str | None = None, + name: Hashable | None = None, ) -> Index: """ Convert array of dates with a cache and wrap the result in an Index. diff --git a/pandas/core/window/ewm.py b/pandas/core/window/ewm.py index a153761f377b3..3a42a4b1a1663 100644 --- a/pandas/core/window/ewm.py +++ b/pandas/core/window/ewm.py @@ -442,7 +442,9 @@ def _get_window_indexer(self) -> BaseIndexer: """ return ExponentialMovingWindowIndexer() - def online(self, engine="numba", engine_kwargs=None): + def online( + self, engine="numba", engine_kwargs=None + ) -> OnlineExponentialMovingWindow: """ Return an ``OnlineExponentialMovingWindow`` object to calculate exponentially moving window aggregations in an online method. @@ -948,7 +950,7 @@ def __init__( else: raise ValueError("'numba' is the only supported engine") - def reset(self): + def reset(self) -> None: """ Reset the state captured by `update` calls. """ diff --git a/pandas/core/window/online.py b/pandas/core/window/online.py index 2ef06732f9800..bb973f05687e2 100644 --- a/pandas/core/window/online.py +++ b/pandas/core/window/online.py @@ -112,6 +112,6 @@ def run_ewm(self, weighted_avg, deltas, min_periods, ewm_func): self.last_ewm = result[-1] return result - def reset(self): + def reset(self) -> None: self.old_wt = np.ones(self.shape[self.axis - 1]) self.last_ewm = None diff --git a/pandas/io/formats/csvs.py b/pandas/io/formats/csvs.py index c577acfaeba8e..6ab57b0cce2a4 100644 --- a/pandas/io/formats/csvs.py +++ b/pandas/io/formats/csvs.py @@ -118,16 +118,16 @@ def _initialize_index_label(self, index_label: IndexLabel | None) -> IndexLabel: return [index_label] return index_label - def _get_index_label_from_obj(self) -> list[str]: + def _get_index_label_from_obj(self) -> Sequence[Hashable]: if isinstance(self.obj.index, ABCMultiIndex): return self._get_index_label_multiindex() else: return self._get_index_label_flat() - def _get_index_label_multiindex(self) -> list[str]: + def _get_index_label_multiindex(self) -> Sequence[Hashable]: return [name or "" for name in self.obj.index.names] - def _get_index_label_flat(self) -> list[str]: + def _get_index_label_flat(self) -> Sequence[Hashable]: index_label = self.obj.index.name return [""] if index_label is None else [index_label] diff --git a/pandas/io/formats/format.py b/pandas/io/formats/format.py index 6d497f7d9bb94..6554b4c1f1afd 100644 --- a/pandas/io/formats/format.py +++ b/pandas/io/formats/format.py @@ -993,8 +993,8 @@ def _get_formatted_index(self, frame: DataFrame) -> list[str]: else: return adjoined - def _get_column_name_list(self) -> list[str]: - names: list[str] = [] + def _get_column_name_list(self) -> list[Hashable]: + names: list[Hashable] = [] columns = self.frame.columns if isinstance(columns, MultiIndex): names.extend("" if name is None else name for name in columns.names) diff --git a/pandas/io/formats/html.py b/pandas/io/formats/html.py index b6494682d308d..163e7dc7bde5e 100644 --- a/pandas/io/formats/html.py +++ b/pandas/io/formats/html.py @@ -6,6 +6,7 @@ from textwrap import dedent from typing import ( Any, + Hashable, Iterable, Mapping, cast, @@ -258,6 +259,7 @@ def _write_table(self, indent: int = 0) -> None: self.write("", indent) def _write_col_header(self, indent: int) -> None: + row: list[Hashable] is_truncated_horizontally = self.fmt.is_truncated_horizontally if isinstance(self.columns, MultiIndex): template = 'colspan="{span:d}" halign="left"' diff --git a/pandas/io/parsers/c_parser_wrapper.py b/pandas/io/parsers/c_parser_wrapper.py index 3d6b5fcb49b85..711d0857a5a1c 100644 --- a/pandas/io/parsers/c_parser_wrapper.py +++ b/pandas/io/parsers/c_parser_wrapper.py @@ -367,7 +367,7 @@ def _concatenate_chunks(chunks: list[dict[int, ArrayLike]]) -> dict: names = list(chunks[0].keys()) warning_columns = [] - result = {} + result: dict = {} for name in names: arrs = [chunk.pop(name) for chunk in chunks] # Check each arr for consistent types. diff --git a/pandas/io/stata.py b/pandas/io/stata.py index 16c1a80034d0c..226a19e1f7599 100644 --- a/pandas/io/stata.py +++ b/pandas/io/stata.py @@ -1930,7 +1930,9 @@ def _do_convert_categoricals( categories = list(vl.values()) try: # Try to catch duplicate categories - cat_data.categories = categories + # error: Incompatible types in assignment (expression has + # type "List[str]", variable has type "Index") + cat_data.categories = categories # type: ignore[assignment] except ValueError as err: vc = Series(categories).value_counts() repeated_cats = list(vc.index[vc > 1]) From ed3bc5a1bb76516345115c7ab69c4327d63403d3 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Torsten=20W=C3=B6rtwein?= Date: Wed, 6 Jul 2022 18:27:01 -0400 Subject: [PATCH 2/3] from __future__ import annotations --- pandas/core/dtypes/concat.py | 2 ++ pandas/core/exchange/column.py | 13 +++++-------- pandas/core/exchange/from_dataframe.py | 6 +----- 3 files changed, 8 insertions(+), 13 deletions(-) diff --git a/pandas/core/dtypes/concat.py b/pandas/core/dtypes/concat.py index 25e008f42688d..059df4009e2f6 100644 --- a/pandas/core/dtypes/concat.py +++ b/pandas/core/dtypes/concat.py @@ -1,6 +1,8 @@ """ Utility functions related to concat. """ +from __future__ import annotations + from typing import ( TYPE_CHECKING, cast, diff --git a/pandas/core/exchange/column.py b/pandas/core/exchange/column.py index fbf5fa0a36b01..538c1d061ef22 100644 --- a/pandas/core/exchange/column.py +++ b/pandas/core/exchange/column.py @@ -1,9 +1,6 @@ from __future__ import annotations -from typing import ( - Any, - Tuple, -) +from typing import Any import numpy as np @@ -128,7 +125,7 @@ def dtype(self): else: return self._dtype_from_pandasdtype(dtype) - def _dtype_from_pandasdtype(self, dtype) -> Tuple[DtypeKind, int, str, str]: + def _dtype_from_pandasdtype(self, dtype) -> tuple[DtypeKind, int, str, str]: """ See `self.dtype` for details. """ @@ -255,7 +252,7 @@ def get_buffers(self) -> ColumnBuffers: def _get_data_buffer( self, - ) -> Tuple[PandasBuffer, Any]: # Any is for self.dtype tuple + ) -> tuple[PandasBuffer, Any]: # Any is for self.dtype tuple """ Return the buffer containing the data and the buffer's associated dtype. """ @@ -298,7 +295,7 @@ def _get_data_buffer( return buffer, dtype - def _get_validity_buffer(self) -> Tuple[PandasBuffer, Any]: + def _get_validity_buffer(self) -> tuple[PandasBuffer, Any]: """ Return the buffer containing the mask values indicating missing data and the buffer's associated dtype. @@ -336,7 +333,7 @@ def _get_validity_buffer(self) -> Tuple[PandasBuffer, Any]: raise NoBufferPresent(msg) - def _get_offsets_buffer(self) -> Tuple[PandasBuffer, Any]: + def _get_offsets_buffer(self) -> tuple[PandasBuffer, Any]: """ Return the buffer containing the offset values for variable-size binary data (e.g., variable-length strings) and the buffer's associated dtype. diff --git a/pandas/core/exchange/from_dataframe.py b/pandas/core/exchange/from_dataframe.py index 4f3c7a48ff9b9..cb1967b5701a0 100644 --- a/pandas/core/exchange/from_dataframe.py +++ b/pandas/core/exchange/from_dataframe.py @@ -1,7 +1,6 @@ import ctypes import re from typing import ( - TYPE_CHECKING, Any, Dict, List, @@ -25,9 +24,6 @@ Endianness, ) -if TYPE_CHECKING: - from pandas import DataFrame - _NP_DTYPES: Dict[DtypeKind, Dict[int, Any]] = { DtypeKind.INT: {8: np.int8, 16: np.int16, 32: np.int32, 64: np.int64}, DtypeKind.UINT: {8: np.uint8, 16: np.uint16, 32: np.uint32, 64: np.uint64}, @@ -36,7 +32,7 @@ } -def from_dataframe(df, allow_copy=True) -> DataFrame: +def from_dataframe(df, allow_copy=True) -> pd.DataFrame: """ Build a ``pd.DataFrame`` from any DataFrame supporting the interchange protocol. From d507abd5fea032f0ef5586384f4dfda727447ead Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Torsten=20W=C3=B6rtwein?= Date: Wed, 6 Jul 2022 18:42:40 -0400 Subject: [PATCH 3/3] more __future__ --- pandas/core/arrays/sparse/accessor.py | 1 + 1 file changed, 1 insertion(+) diff --git a/pandas/core/arrays/sparse/accessor.py b/pandas/core/arrays/sparse/accessor.py index d9d19d29e3d5c..80713a6fca323 100644 --- a/pandas/core/arrays/sparse/accessor.py +++ b/pandas/core/arrays/sparse/accessor.py @@ -1,4 +1,5 @@ """Sparse accessor""" +from __future__ import annotations from typing import TYPE_CHECKING