From 830fa003eed373ae3755d315111e96c9e7d2ea2c Mon Sep 17 00:00:00 2001 From: Andrew Wieteska Date: Sat, 5 Dec 2020 12:47:49 -0500 Subject: [PATCH 01/21] add type hints --- pandas/core/missing.py | 143 ++++++++++++++++++++++++++++------------- 1 file changed, 99 insertions(+), 44 deletions(-) diff --git a/pandas/core/missing.py b/pandas/core/missing.py index e374ba435a0bd..7246af42e3098 100644 --- a/pandas/core/missing.py +++ b/pandas/core/missing.py @@ -2,12 +2,12 @@ Routines for filling missing data. """ from functools import partial -from typing import Any, List, Optional, Set, Union +from typing import Any, Callable, List, Optional, Set, Tuple, Union import numpy as np from pandas._libs import algos, lib -from pandas._typing import ArrayLike, Axis, DtypeObj +from pandas._typing import ArrayLike, Axis, DtypeObj, IndexLabel, Scalar from pandas.compat._optional import import_optional_dependency from pandas.core.dtypes.cast import infer_dtype_from_array @@ -20,7 +20,9 @@ from pandas.core.dtypes.missing import isna -def mask_missing(arr: ArrayLike, values_to_mask) -> np.ndarray: +def mask_missing( + arr: ArrayLike, values_to_mask: Union[List, Tuple, Scalar] +) -> np.ndarray: """ Return a masking array of same size/shape as arr with entries equaling any member of values_to_mask set to True @@ -58,7 +60,7 @@ def mask_missing(arr: ArrayLike, values_to_mask) -> np.ndarray: return mask -def clean_fill_method(method, allow_nearest: bool = False): +def clean_fill_method(method: str, allow_nearest: bool = False) -> Optional[str]: # asfreq is compat for resampling if method in [None, "asfreq"]: return None @@ -117,7 +119,7 @@ def clean_interp_method(method: str, **kwargs) -> str: return method -def find_valid_index(values, how: str): +def find_valid_index(values: ArrayLike, how: str) -> Optional[int]: """ Retrieves the index of the first valid value. @@ -165,7 +167,7 @@ def interpolate_1d( bounds_error: bool = False, order: Optional[int] = None, **kwargs, -): +) -> np.ndarray: """ Logic for the 1-d interpolation. The result should be 1-d, inputs xvalues and yvalues will each be 1-d arrays of the same length. @@ -218,8 +220,13 @@ def interpolate_1d( # These are sets of index pointers to invalid values... i.e. {0, 1, etc... all_nans = set(np.flatnonzero(invalid)) - start_nans = set(range(find_valid_index(yvalues, "first"))) - end_nans = set(range(1 + find_valid_index(yvalues, "last"), len(valid))) + + start_nan_idx = find_valid_index(yvalues, "first") + start_nans = set() if start_nan_idx is None else set(range(start_nan_idx)) + + end_nan_idx = find_valid_index(yvalues, "last") + end_nans = set() if end_nan_idx is None else set(range(1 + end_nan_idx, len(valid))) + mid_nans = all_nans - start_nans - end_nans # Like the sets above, preserve_nans contains indices of invalid values, @@ -294,8 +301,15 @@ def interpolate_1d( def _interpolate_scipy_wrapper( - x, y, new_x, method, fill_value=None, bounds_error=False, order=None, **kwargs -): + x, + y, + new_x, + method: Optional[str], + fill_value: Optional[Scalar] = None, + bounds_error: bool = False, + order: Optional[int] = None, + **kwargs, +) -> np.ndarray: """ Passed off to scipy.interpolate.interp1d. method is scipy's kind. Returns an array interpolated at new_x. Add any new methods to @@ -326,7 +340,7 @@ def _interpolate_scipy_wrapper( elif method == "cubicspline": alt_methods["cubicspline"] = _cubicspline_interpolate - interp1d_methods = [ + interp1d_methods: List[str] = [ "nearest", "zero", "slinear", @@ -335,18 +349,18 @@ def _interpolate_scipy_wrapper( "polynomial", ] if method in interp1d_methods: - if method == "polynomial": - method = order + kind = order if method == "polynomial" else method terp = interpolate.interp1d( - x, y, kind=method, fill_value=fill_value, bounds_error=bounds_error + x, y, kind=kind, fill_value=fill_value, bounds_error=bounds_error ) new_y = terp(new_x) elif method == "spline": # GH #10633, #24014 - if isna(order) or (order <= 0): - raise ValueError( - f"order needs to be specified and greater than 0; got order: {order}" - ) + if isna(order): + raise ValueError(f"order needs to be specified; got order: {order}") + assert isinstance(order, int) + if order <= 0: + raise ValueError(f"order needs to be greater than 0; got order: {order}") terp = interpolate.UnivariateSpline(x, y, k=order, **kwargs) new_y = terp(new_x) else: @@ -358,12 +372,21 @@ def _interpolate_scipy_wrapper( y = y.copy() if not new_x.flags.writeable: new_x = new_x.copy() - method = alt_methods[method] - new_y = method(x, y, new_x, **kwargs) + + assert isinstance(method, str) + alt_method = alt_methods[method] + new_y = alt_method(x, y, new_x, **kwargs) return new_y -def _from_derivatives(xi, yi, x, order=None, der=0, extrapolate=False): +def _from_derivatives( + xi: np.ndarray, + yi: np.ndarray, + x: Union[Scalar, ArrayLike], + order: Optional[Union[int, List[int]]] = None, + der: Union[int, List[int]] = 0, + extrapolate: bool = False, +) -> np.ndarray: """ Convenience function for interpolate.BPoly.from_derivatives. @@ -406,7 +429,13 @@ def _from_derivatives(xi, yi, x, order=None, der=0, extrapolate=False): return m(x) -def _akima_interpolate(xi, yi, x, der=0, axis=0): +def _akima_interpolate( + xi: ArrayLike, + yi: ArrayLike, + x: Union[Scalar, ArrayLike], + der: Optional[int] = 0, + axis: Optional[int] = 0, +) -> Union[Scalar, ArrayLike]: """ Convenience function for akima interpolation. xi and yi are arrays of values used to approximate some function f, @@ -449,7 +478,14 @@ def _akima_interpolate(xi, yi, x, der=0, axis=0): return P(x, nu=der) -def _cubicspline_interpolate(xi, yi, x, axis=0, bc_type="not-a-knot", extrapolate=None): +def _cubicspline_interpolate( + xi: ArrayLike, + yi: ArrayLike, + x: Union[ArrayLike, Scalar], + axis: Optional[int] = 0, + bc_type: Union[str, Tuple] = "not-a-knot", + extrapolate: Optional[Union[bool, str]] = None, +) -> Union[ArrayLike, Scalar]: """ Convenience function for cubic spline data interpolator. @@ -557,6 +593,8 @@ def _interpolate_with_limit_area( first = find_valid_index(values, "first") last = find_valid_index(values, "last") + assert first is not None and last is not None + values = interpolate_2d( values, method=method, @@ -574,12 +612,12 @@ def _interpolate_with_limit_area( def interpolate_2d( - values, + values: np.ndarray, method: str = "pad", axis: Axis = 0, limit: Optional[int] = None, limit_area: Optional[str] = None, -): +) -> np.ndarray: """ Perform an actual interpolation of values, values will be make 2-d if needed fills inplace, returns the result. @@ -625,7 +663,10 @@ def interpolate_2d( raise AssertionError("cannot interpolate on a ndim == 1 with axis != 0") values = values.reshape(tuple((1,) + values.shape)) - method = clean_fill_method(method) + method_cleaned = clean_fill_method(method) + assert isinstance(method_cleaned, str) + method = method_cleaned + tvalues = transf(values) if method == "pad": result = _pad_2d(tvalues, limit=limit) @@ -644,7 +685,9 @@ def interpolate_2d( return result -def _cast_values_for_fillna(values, dtype: DtypeObj, has_mask: bool): +def _cast_values_for_fillna( + values: ArrayLike, dtype: DtypeObj, has_mask: bool +) -> ArrayLike: """ Cast values to a dtype that algos.pad and algos.backfill can handle. """ @@ -663,34 +706,41 @@ def _cast_values_for_fillna(values, dtype: DtypeObj, has_mask: bool): return values -def _fillna_prep(values, mask=None): +def _fillna_prep( + values: np.ndarray, mask: Optional[np.ndarray] = None +) -> Tuple[np.ndarray, np.ndarray]: # boilerplate for _pad_1d, _backfill_1d, _pad_2d, _backfill_2d - dtype = values.dtype has_mask = mask is not None - if not has_mask: - # This needs to occur before datetime/timedeltas are cast to int64 - mask = isna(values) - values = _cast_values_for_fillna(values, dtype, has_mask) + # This needs to occur before datetime/timedeltas are cast to int64 + mask = isna(values) if mask is None else mask + values = _cast_values_for_fillna(values, values.dtype, has_mask) mask = mask.view(np.uint8) + return values, mask -def _pad_1d(values, limit=None, mask=None): +def _pad_1d( + values: np.ndarray, limit: Optional[int] = None, mask: Optional[np.ndarray] = None +) -> np.ndarray: values, mask = _fillna_prep(values, mask) algos.pad_inplace(values, mask, limit=limit) return values -def _backfill_1d(values, limit=None, mask=None): +def _backfill_1d( + values: np.ndarray, limit: Optional[int] = None, mask: Optional[np.ndarray] = None +) -> np.ndarray: values, mask = _fillna_prep(values, mask) algos.backfill_inplace(values, mask, limit=limit) return values -def _pad_2d(values, limit=None, mask=None): +def _pad_2d( + values: np.ndarray, limit: Optional[int] = None, mask: Optional[np.ndarray] = None +) -> np.ndarray: values, mask = _fillna_prep(values, mask) if np.all(values.shape): @@ -701,7 +751,9 @@ def _pad_2d(values, limit=None, mask=None): return values -def _backfill_2d(values, limit=None, mask=None): +def _backfill_2d( + values: np.ndarray, limit: Optional[int] = None, mask: Optional[np.ndarray] = None +) -> np.ndarray: values, mask = _fillna_prep(values, mask) if np.all(values.shape): @@ -715,16 +767,19 @@ def _backfill_2d(values, limit=None, mask=None): _fill_methods = {"pad": _pad_1d, "backfill": _backfill_1d} -def get_fill_func(method): - method = clean_fill_method(method) - return _fill_methods[method] +def get_fill_func(method: str) -> Callable: + method_cleaned = clean_fill_method(method) + assert isinstance(method_cleaned, str) + return _fill_methods[method_cleaned] -def clean_reindex_fill_method(method): +def clean_reindex_fill_method(method: str) -> Optional[str]: return clean_fill_method(method, allow_nearest=True) -def _interp_limit(invalid, fw_limit, bw_limit): +def _interp_limit( + invalid: np.ndarray, fw_limit: Optional[int], bw_limit: Optional[int] +) -> Set[IndexLabel]: """ Get indexers of values that won't be filled because they exceed the limits. @@ -759,7 +814,7 @@ def _interp_limit(invalid, fw_limit, bw_limit): f_idx = set() b_idx = set() - def inner(invalid, limit): + def inner(invalid: np.ndarray, limit: int) -> Set[IndexLabel]: limit = min(limit, N) windowed = _rolling_window(invalid, limit + 1).all(1) idx = set(np.where(windowed)[0] + limit) | set( @@ -789,7 +844,7 @@ def inner(invalid, limit): return f_idx & b_idx -def _rolling_window(a: np.ndarray, window: int): +def _rolling_window(a: np.ndarray, window: int) -> np.ndarray: """ [True, True, False, True, False], 2 -> From 605dc3ce03c2bbdf6c336a97b241e3da430fc9a7 Mon Sep 17 00:00:00 2001 From: Andrew Wieteska Date: Mon, 7 Dec 2020 11:13:16 -0500 Subject: [PATCH 02/21] review: remove assert --- pandas/core/missing.py | 9 ++++----- 1 file changed, 4 insertions(+), 5 deletions(-) diff --git a/pandas/core/missing.py b/pandas/core/missing.py index 7246af42e3098..9c14a75be8753 100644 --- a/pandas/core/missing.py +++ b/pandas/core/missing.py @@ -356,11 +356,10 @@ def _interpolate_scipy_wrapper( new_y = terp(new_x) elif method == "spline": # GH #10633, #24014 - if isna(order): - raise ValueError(f"order needs to be specified; got order: {order}") - assert isinstance(order, int) - if order <= 0: - raise ValueError(f"order needs to be greater than 0; got order: {order}") + if order is not None and order >= 0: + raise ValueError( + f"order needs to be specified and greater than 0; got order: {order}" + ) terp = interpolate.UnivariateSpline(x, y, k=order, **kwargs) new_y = terp(new_x) else: From f2d5ec44f968debff3a288369223e2ef3ad5110a Mon Sep 17 00:00:00 2001 From: Andrew Wieteska Date: Mon, 7 Dec 2020 13:22:13 -0500 Subject: [PATCH 03/21] typo --- pandas/core/missing.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pandas/core/missing.py b/pandas/core/missing.py index 57598c3197cb4..bbb213ee5a043 100644 --- a/pandas/core/missing.py +++ b/pandas/core/missing.py @@ -354,7 +354,7 @@ def _interpolate_scipy_wrapper( new_y = terp(new_x) elif method == "spline": # GH #10633, #24014 - if order is not None and order >= 0: + if order is None or order <= 0: raise ValueError( f"order needs to be specified and greater than 0; got order: {order}" ) From e83904fdcd182d14318735d184853f87e3fed27d Mon Sep 17 00:00:00 2001 From: Andrew Wieteska Date: Mon, 7 Dec 2020 14:08:26 -0500 Subject: [PATCH 04/21] add isna check --- pandas/core/missing.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pandas/core/missing.py b/pandas/core/missing.py index bbb213ee5a043..ffd4c1b810486 100644 --- a/pandas/core/missing.py +++ b/pandas/core/missing.py @@ -354,7 +354,7 @@ def _interpolate_scipy_wrapper( new_y = terp(new_x) elif method == "spline": # GH #10633, #24014 - if order is None or order <= 0: + if order is None or isna(order) or order <= 0: raise ValueError( f"order needs to be specified and greater than 0; got order: {order}" ) From 71caeeb618a0ba6c6f641c6330c256568d9f0f79 Mon Sep 17 00:00:00 2001 From: Andrew Wieteska Date: Mon, 7 Dec 2020 16:46:57 -0500 Subject: [PATCH 05/21] better error msg when interp method not string --- pandas/core/missing.py | 8 +++++--- 1 file changed, 5 insertions(+), 3 deletions(-) diff --git a/pandas/core/missing.py b/pandas/core/missing.py index ffd4c1b810486..85ccfbeda4075 100644 --- a/pandas/core/missing.py +++ b/pandas/core/missing.py @@ -370,9 +370,11 @@ def _interpolate_scipy_wrapper( if not new_x.flags.writeable: new_x = new_x.copy() - assert isinstance(method, str) - alt_method = alt_methods[method] - new_y = alt_method(x, y, new_x, **kwargs) + if isinstance(method, str): + alt_method = alt_methods[method] + new_y = alt_method(x, y, new_x, **kwargs) + else: + raise ValueError(f"{method} is not a valid interp method") return new_y From 8fbbd477ed5c93e2f3c1648252148adfd14bcdbe Mon Sep 17 00:00:00 2001 From: Andrew Wieteska Date: Mon, 7 Dec 2020 16:54:39 -0500 Subject: [PATCH 06/21] improve docstring --- pandas/core/missing.py | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/pandas/core/missing.py b/pandas/core/missing.py index 85ccfbeda4075..bc518f8e1fdbe 100644 --- a/pandas/core/missing.py +++ b/pandas/core/missing.py @@ -398,15 +398,16 @@ def _from_derivatives( sorted 1D array of x-coordinates yi : array_like or list of array-likes yi[i][j] is the j-th derivative known at xi[i] - order: None or int or array_like of ints. Default: None. + x : scalar or array_like + order: None or int or array_like of ints, default: None Specifies the degree of local polynomials. If not None, some derivatives are ignored. - der : int or list + der : int or list, default: 0 How many derivatives to extract; None for all potentially nonzero derivatives (that is a number equal to the number of points), or a list of derivatives to extract. This number includes the function value as 0th derivative. - extrapolate : bool, optional + extrapolate : bool, default False Whether to extrapolate to ouf-of-bounds points based on first and last intervals, or to return NaNs. Default: True. From 575c22764f89bbfc0cecb058ca2cd3b5896f7e47 Mon Sep 17 00:00:00 2001 From: Andrew Wieteska Date: Mon, 7 Dec 2020 16:59:41 -0500 Subject: [PATCH 07/21] remove Optional --- pandas/core/missing.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pandas/core/missing.py b/pandas/core/missing.py index bc518f8e1fdbe..13370e2e33fa7 100644 --- a/pandas/core/missing.py +++ b/pandas/core/missing.py @@ -482,7 +482,7 @@ def _cubicspline_interpolate( xi: ArrayLike, yi: ArrayLike, x: Union[ArrayLike, Scalar], - axis: Optional[int] = 0, + axis: Axis = 0, bc_type: Union[str, Tuple] = "not-a-knot", extrapolate: Optional[Union[bool, str]] = None, ) -> Union[ArrayLike, Scalar]: From b19896b42a76ac74724c31ea9e75fe15eb410246 Mon Sep 17 00:00:00 2001 From: Andrew Wieteska Date: Mon, 7 Dec 2020 17:02:35 -0500 Subject: [PATCH 08/21] use Axis TypeVar --- pandas/core/missing.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pandas/core/missing.py b/pandas/core/missing.py index 13370e2e33fa7..662185539a390 100644 --- a/pandas/core/missing.py +++ b/pandas/core/missing.py @@ -434,7 +434,7 @@ def _akima_interpolate( yi: ArrayLike, x: Union[Scalar, ArrayLike], der: Optional[int] = 0, - axis: Optional[int] = 0, + axis: Axis = 0, ) -> Union[Scalar, ArrayLike]: """ Convenience function for akima interpolation. From 5036ee189bd9da394b5850d7dd4e239244fda827 Mon Sep 17 00:00:00 2001 From: Andrew Wieteska Date: Tue, 8 Dec 2020 13:14:04 -0500 Subject: [PATCH 09/21] more hints --- pandas/core/missing.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/pandas/core/missing.py b/pandas/core/missing.py index 662185539a390..9f0f3ba6aee7a 100644 --- a/pandas/core/missing.py +++ b/pandas/core/missing.py @@ -299,9 +299,9 @@ def interpolate_1d( def _interpolate_scipy_wrapper( - x, - y, - new_x, + x: np.ndarray, + y: np.ndarray, + new_x: Union[Scalar, np.ndarray], method: Optional[str], fill_value: Optional[Scalar] = None, bounds_error: bool = False, From 2a318234b09961605bbcae97e4bec0059502c67f Mon Sep 17 00:00:00 2001 From: Andrew Wieteska Date: Wed, 9 Dec 2020 12:52:07 -0500 Subject: [PATCH 10/21] review comments --- pandas/core/missing.py | 28 +++++++++++++++------------- 1 file changed, 15 insertions(+), 13 deletions(-) diff --git a/pandas/core/missing.py b/pandas/core/missing.py index 9f0f3ba6aee7a..cd9ac081cd839 100644 --- a/pandas/core/missing.py +++ b/pandas/core/missing.py @@ -7,7 +7,7 @@ import numpy as np from pandas._libs import algos, lib -from pandas._typing import ArrayLike, Axis, DtypeObj, IndexLabel, Scalar +from pandas._typing import ArrayLike, DtypeObj, IndexLabel, Scalar from pandas.compat._optional import import_optional_dependency from pandas.core.dtypes.cast import infer_dtype_from_array @@ -63,7 +63,9 @@ def mask_missing( return mask -def clean_fill_method(method: str, allow_nearest: bool = False) -> Optional[str]: +def clean_fill_method( + method: Optional[str], allow_nearest: bool = False +) -> Optional[str]: # asfreq is compat for resampling if method in [None, "asfreq"]: return None @@ -162,7 +164,7 @@ def find_valid_index(values: ArrayLike, how: str) -> Optional[int]: def interpolate_1d( xvalues: "Index", yvalues: np.ndarray, - method: Optional[str] = "linear", + method: str = "linear", limit: Optional[int] = None, limit_direction: str = "forward", limit_area: Optional[str] = None, @@ -302,7 +304,7 @@ def _interpolate_scipy_wrapper( x: np.ndarray, y: np.ndarray, new_x: Union[Scalar, np.ndarray], - method: Optional[str], + method: str, fill_value: Optional[Scalar] = None, bounds_error: bool = False, order: Optional[int] = None, @@ -338,7 +340,7 @@ def _interpolate_scipy_wrapper( elif method == "cubicspline": alt_methods["cubicspline"] = _cubicspline_interpolate - interp1d_methods: List[str] = [ + interp1d_methods = [ "nearest", "zero", "slinear", @@ -430,11 +432,11 @@ def _from_derivatives( def _akima_interpolate( - xi: ArrayLike, - yi: ArrayLike, + xi: np.ndarray, + yi: np.ndarray, x: Union[Scalar, ArrayLike], - der: Optional[int] = 0, - axis: Axis = 0, + der: int = 0, + axis: int = 0, ) -> Union[Scalar, ArrayLike]: """ Convenience function for akima interpolation. @@ -445,9 +447,9 @@ def _akima_interpolate( Parameters ---------- - xi : array_like + xi : np.ndarray A sorted list of x-coordinates, of length N. - yi : array_like + yi : np.ndarray A 1-D array of real values. `yi`'s length along the interpolation axis must be equal to the length of `xi`. If N-D array, use axis parameter to select correct axis. @@ -482,7 +484,7 @@ def _cubicspline_interpolate( xi: ArrayLike, yi: ArrayLike, x: Union[ArrayLike, Scalar], - axis: Axis = 0, + axis: int = 0, bc_type: Union[str, Tuple] = "not-a-knot", extrapolate: Optional[Union[bool, str]] = None, ) -> Union[ArrayLike, Scalar]: @@ -614,7 +616,7 @@ def _interpolate_with_limit_area( def interpolate_2d( values: np.ndarray, method: str = "pad", - axis: Axis = 0, + axis: int = 0, limit: Optional[int] = None, limit_area: Optional[str] = None, ) -> np.ndarray: From 4aeec703118d7eba81c621b3131075670e89a1c5 Mon Sep 17 00:00:00 2001 From: Andrew Wieteska Date: Thu, 10 Dec 2020 13:22:57 -0500 Subject: [PATCH 11/21] review comment --- pandas/core/missing.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/pandas/core/missing.py b/pandas/core/missing.py index cd9ac081cd839..cdbd7e1840666 100644 --- a/pandas/core/missing.py +++ b/pandas/core/missing.py @@ -481,8 +481,8 @@ def _akima_interpolate( def _cubicspline_interpolate( - xi: ArrayLike, - yi: ArrayLike, + xi: np.ndarray, + yi: np.ndarray, x: Union[ArrayLike, Scalar], axis: int = 0, bc_type: Union[str, Tuple] = "not-a-knot", From d67977dc2436ca7a42cfc453db956f52add33acb Mon Sep 17 00:00:00 2001 From: Andrew Wieteska Date: Thu, 10 Dec 2020 14:28:33 -0500 Subject: [PATCH 12/21] review comment: values_to_mask --- pandas/core/dtypes/cast.py | 13 ++++++++++--- pandas/core/missing.py | 4 +--- 2 files changed, 11 insertions(+), 6 deletions(-) diff --git a/pandas/core/dtypes/cast.py b/pandas/core/dtypes/cast.py index 165e63e23d60e..ce25f6da94363 100644 --- a/pandas/core/dtypes/cast.py +++ b/pandas/core/dtypes/cast.py @@ -33,7 +33,14 @@ ints_to_pytimedelta, ) from pandas._libs.tslibs.timezones import tz_compare -from pandas._typing import AnyArrayLike, ArrayLike, Dtype, DtypeObj, Scalar +from pandas._typing import ( + AnyArrayLike, + ArrayLike, + Dtype, + DtypeObj, + PandasScalar, + Scalar, +) from pandas.util._validators import validate_bool_kwarg from pandas.core.dtypes.common import ( @@ -834,8 +841,8 @@ def dict_compat(d: Dict[Scalar, Scalar]) -> Dict[Scalar, Scalar]: def infer_dtype_from_array( - arr, pandas_dtype: bool = False -) -> Tuple[DtypeObj, ArrayLike]: + arr: Union[ArrayLike, PandasScalar], pandas_dtype: bool = False +) -> Tuple[Dtype, ArrayLike]: """ Infer the dtype from an array. diff --git a/pandas/core/missing.py b/pandas/core/missing.py index cdbd7e1840666..e3bb3e149ea4d 100644 --- a/pandas/core/missing.py +++ b/pandas/core/missing.py @@ -23,9 +23,7 @@ from pandas import Index -def mask_missing( - arr: ArrayLike, values_to_mask: Union[List, Tuple, Scalar] -) -> np.ndarray: +def mask_missing(arr: ArrayLike, values_to_mask: ArrayLike) -> np.ndarray: """ Return a masking array of same size/shape as arr with entries equaling any member of values_to_mask set to True From 24f418af0df0a5a55ef0b2681fb8227f5b7715c0 Mon Sep 17 00:00:00 2001 From: Andrew Wieteska Date: Fri, 11 Dec 2020 01:23:19 -0500 Subject: [PATCH 13/21] review comments: mask_missing/infer_dtype_from_array --- pandas/core/dtypes/cast.py | 6 +++--- pandas/core/missing.py | 6 ++++-- 2 files changed, 7 insertions(+), 5 deletions(-) diff --git a/pandas/core/dtypes/cast.py b/pandas/core/dtypes/cast.py index ce25f6da94363..046c8b3a10e4c 100644 --- a/pandas/core/dtypes/cast.py +++ b/pandas/core/dtypes/cast.py @@ -841,8 +841,8 @@ def dict_compat(d: Dict[Scalar, Scalar]) -> Dict[Scalar, Scalar]: def infer_dtype_from_array( - arr: Union[ArrayLike, PandasScalar], pandas_dtype: bool = False -) -> Tuple[Dtype, ArrayLike]: + arr: Union[ArrayLike, Series, PandasScalar], pandas_dtype: bool = False +) -> Tuple[DtypeObj, Union[ArrayLike, Series]]: """ Infer the dtype from an array. @@ -890,7 +890,7 @@ def infer_dtype_from_array( # don't force numpy coerce with nan's inferred = lib.infer_dtype(arr, skipna=False) if inferred in ["string", "bytes", "mixed", "mixed-integer"]: - return (np.dtype(np.object_), arr) + return np.dtype(np.object_), arr arr = np.asarray(arr) return arr.dtype, arr diff --git a/pandas/core/missing.py b/pandas/core/missing.py index e3bb3e149ea4d..342f0a583f17d 100644 --- a/pandas/core/missing.py +++ b/pandas/core/missing.py @@ -20,10 +20,12 @@ from pandas.core.dtypes.missing import isna if TYPE_CHECKING: - from pandas import Index + from pandas import Index, Series -def mask_missing(arr: ArrayLike, values_to_mask: ArrayLike) -> np.ndarray: +def mask_missing( + arr: ArrayLike, values_to_mask: "Union[ArrayLike, Scalar, Series]" +) -> np.ndarray: """ Return a masking array of same size/shape as arr with entries equaling any member of values_to_mask set to True From aeb0b829b205ad17c4ecd50fdeed2283097aca4a Mon Sep 17 00:00:00 2001 From: Andrew Wieteska Date: Fri, 11 Dec 2020 01:38:04 -0500 Subject: [PATCH 14/21] typo --- pandas/core/dtypes/cast.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pandas/core/dtypes/cast.py b/pandas/core/dtypes/cast.py index 046c8b3a10e4c..24dbb3e24930f 100644 --- a/pandas/core/dtypes/cast.py +++ b/pandas/core/dtypes/cast.py @@ -842,7 +842,7 @@ def dict_compat(d: Dict[Scalar, Scalar]) -> Dict[Scalar, Scalar]: def infer_dtype_from_array( arr: Union[ArrayLike, Series, PandasScalar], pandas_dtype: bool = False -) -> Tuple[DtypeObj, Union[ArrayLike, Series]]: +) -> "Tuple[DtypeObj, Union[ArrayLike, Series]]": """ Infer the dtype from an array. From 25d00518de485b525574bd375b0f829b36642159 Mon Sep 17 00:00:00 2001 From: Andrew Wieteska Date: Fri, 11 Dec 2020 02:03:03 -0500 Subject: [PATCH 15/21] typo --- pandas/core/dtypes/cast.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pandas/core/dtypes/cast.py b/pandas/core/dtypes/cast.py index 24dbb3e24930f..1d5a489e1b1c1 100644 --- a/pandas/core/dtypes/cast.py +++ b/pandas/core/dtypes/cast.py @@ -841,7 +841,7 @@ def dict_compat(d: Dict[Scalar, Scalar]) -> Dict[Scalar, Scalar]: def infer_dtype_from_array( - arr: Union[ArrayLike, Series, PandasScalar], pandas_dtype: bool = False + arr: "Union[ArrayLike, Series, PandasScalar]", pandas_dtype: bool = False ) -> "Tuple[DtypeObj, Union[ArrayLike, Series]]": """ Infer the dtype from an array. From bbd25ed2f6a97f8fa9a3c945a4ea4da943edfefd Mon Sep 17 00:00:00 2001 From: Andrew Wieteska Date: Fri, 11 Dec 2020 12:26:55 -0500 Subject: [PATCH 16/21] review comment --- pandas/core/missing.py | 18 ++++++++++++++---- 1 file changed, 14 insertions(+), 4 deletions(-) diff --git a/pandas/core/missing.py b/pandas/core/missing.py index 342f0a583f17d..a4d6d79ba3de7 100644 --- a/pandas/core/missing.py +++ b/pandas/core/missing.py @@ -2,12 +2,22 @@ Routines for filling missing data. """ from functools import partial -from typing import TYPE_CHECKING, Any, Callable, List, Optional, Set, Tuple, Union +from typing import ( + TYPE_CHECKING, + Any, + Callable, + List, + Optional, + Sequence, + Set, + Tuple, + Union, +) import numpy as np from pandas._libs import algos, lib -from pandas._typing import ArrayLike, DtypeObj, IndexLabel, Scalar +from pandas._typing import AnyArrayLike, ArrayLike, DtypeObj, IndexLabel, Scalar from pandas.compat._optional import import_optional_dependency from pandas.core.dtypes.cast import infer_dtype_from_array @@ -20,11 +30,11 @@ from pandas.core.dtypes.missing import isna if TYPE_CHECKING: - from pandas import Index, Series + from pandas import Index def mask_missing( - arr: ArrayLike, values_to_mask: "Union[ArrayLike, Scalar, Series]" + arr: ArrayLike, values_to_mask: Union[AnyArrayLike, Scalar, Sequence[Any]] ) -> np.ndarray: """ Return a masking array of same size/shape as arr From b505de510c0785bd6b46ad5598b6c6b4228e33ee Mon Sep 17 00:00:00 2001 From: Andrew Wieteska Date: Mon, 14 Dec 2020 17:14:13 -0500 Subject: [PATCH 17/21] review comment --- pandas/core/missing.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pandas/core/missing.py b/pandas/core/missing.py index a4d6d79ba3de7..69be8b4740c38 100644 --- a/pandas/core/missing.py +++ b/pandas/core/missing.py @@ -34,7 +34,7 @@ def mask_missing( - arr: ArrayLike, values_to_mask: Union[AnyArrayLike, Scalar, Sequence[Any]] + arr: AnyArrayLike, values_to_mask: Union[AnyArrayLike, Scalar, Sequence[Any]] ) -> np.ndarray: """ Return a masking array of same size/shape as arr From e39c152534166c3c372a76fe503cd57d7e77c4a2 Mon Sep 17 00:00:00 2001 From: Andrew Wieteska Date: Mon, 14 Dec 2020 17:16:21 -0500 Subject: [PATCH 18/21] docstring fix --- pandas/core/missing.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pandas/core/missing.py b/pandas/core/missing.py index 69be8b4740c38..785538d52306b 100644 --- a/pandas/core/missing.py +++ b/pandas/core/missing.py @@ -419,7 +419,7 @@ def _from_derivatives( derivatives (that is a number equal to the number of points), or a list of derivatives to extract. This number includes the function value as 0th derivative. - extrapolate : bool, default False + extrapolate : bool, default False Whether to extrapolate to ouf-of-bounds points based on first and last intervals, or to return NaNs. Default: True. From cb82c9a6263527fef0a734b959c6daf2f5af1a0f Mon Sep 17 00:00:00 2001 From: Andrew Wieteska Date: Mon, 14 Dec 2020 17:36:02 -0500 Subject: [PATCH 19/21] review comments --- pandas/core/missing.py | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/pandas/core/missing.py b/pandas/core/missing.py index 785538d52306b..c710b45253407 100644 --- a/pandas/core/missing.py +++ b/pandas/core/missing.py @@ -393,7 +393,7 @@ def _interpolate_scipy_wrapper( def _from_derivatives( xi: np.ndarray, yi: np.ndarray, - x: Union[Scalar, ArrayLike], + x: np.ndarray, order: Optional[Union[int, List[int]]] = None, der: Union[int, List[int]] = 0, extrapolate: bool = False, @@ -444,7 +444,7 @@ def _from_derivatives( def _akima_interpolate( xi: np.ndarray, yi: np.ndarray, - x: Union[Scalar, ArrayLike], + x: np.ndarray, der: int = 0, axis: int = 0, ) -> Union[Scalar, ArrayLike]: @@ -463,7 +463,7 @@ def _akima_interpolate( A 1-D array of real values. `yi`'s length along the interpolation axis must be equal to the length of `xi`. If N-D array, use axis parameter to select correct axis. - x : scalar or array_like + x : array_like Of length M. der : int, optional How many derivatives to extract; None for all potentially @@ -493,7 +493,7 @@ def _akima_interpolate( def _cubicspline_interpolate( xi: np.ndarray, yi: np.ndarray, - x: Union[ArrayLike, Scalar], + x: np.ndarray, axis: int = 0, bc_type: Union[str, Tuple] = "not-a-knot", extrapolate: Optional[Union[bool, str]] = None, @@ -512,7 +512,7 @@ def _cubicspline_interpolate( Array containing values of the dependent variable. It can have arbitrary number of dimensions, but the length along ``axis`` (see below) must match the length of ``x``. Values must be finite. - x : scalar or array_like, shape (m,) + x : array_like, shape (m,) axis : int, optional Axis along which `y` is assumed to be varying. Meaning that for ``x[i]`` the corresponding values are ``np.take(y, i, axis=axis)``. From 315822cc352327ebc772d8ca77cbb186082eec28 Mon Sep 17 00:00:00 2001 From: Andrew Wieteska Date: Sun, 21 Feb 2021 15:06:47 -0500 Subject: [PATCH 20/21] TYP: infer_dtype_from_array --- pandas/core/dtypes/cast.py | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/pandas/core/dtypes/cast.py b/pandas/core/dtypes/cast.py index 733e4fb5e0a8c..0bb39255c1af8 100644 --- a/pandas/core/dtypes/cast.py +++ b/pandas/core/dtypes/cast.py @@ -48,7 +48,6 @@ ArrayLike, Dtype, DtypeObj, - PandasScalar, Scalar, ) from pandas.util._exceptions import find_stack_level @@ -850,7 +849,7 @@ def dict_compat(d: Dict[Scalar, Scalar]) -> Dict[Scalar, Scalar]: def infer_dtype_from_array( - arr: Union[ArrayLike, Series, PandasScalar], pandas_dtype: bool = False + arr: AnyArrayLike, pandas_dtype: bool = False ) -> Tuple[DtypeObj, Union[ArrayLike, Series]]: """ Infer the dtype from an array. From df4b70ad211b896d00f5261dbe0f52271625f511 Mon Sep 17 00:00:00 2001 From: Andrew Wieteska Date: Sun, 21 Feb 2021 15:07:43 -0500 Subject: [PATCH 21/21] minimize diff --- pandas/core/dtypes/cast.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pandas/core/dtypes/cast.py b/pandas/core/dtypes/cast.py index 0bb39255c1af8..fb664e4ed657e 100644 --- a/pandas/core/dtypes/cast.py +++ b/pandas/core/dtypes/cast.py @@ -898,7 +898,7 @@ def infer_dtype_from_array( # don't force numpy coerce with nan's inferred = lib.infer_dtype(arr, skipna=False) if inferred in ["string", "bytes", "mixed", "mixed-integer"]: - return np.dtype(np.object_), arr + return (np.dtype(np.object_), arr) arr = np.asarray(arr) return arr.dtype, arr