Skip to content

REF: move interpolate validation to core.missing #53580

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 2 commits into from
Jun 12, 2023
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
48 changes: 6 additions & 42 deletions pandas/core/generic.py
Original file line number Diff line number Diff line change
Expand Up @@ -143,6 +143,7 @@
arraylike,
common,
indexing,
missing,
nanops,
sample,
)
Expand Down Expand Up @@ -7907,20 +7908,7 @@ def interpolate(
"Only `method=linear` interpolation is supported on MultiIndexes."
)

# Set `limit_direction` depending on `method`
if limit_direction is None:
limit_direction = (
"backward" if method in ("backfill", "bfill") else "forward"
)
else:
if method in ("pad", "ffill") and limit_direction != "forward":
raise ValueError(
f"`limit_direction` must be 'forward' for method `{method}`"
)
if method in ("backfill", "bfill") and limit_direction != "backward":
raise ValueError(
f"`limit_direction` must be 'backward' for method `{method}`"
)
limit_direction = missing.infer_limit_direction(limit_direction, method)

if obj.ndim == 2 and np.all(obj.dtypes == np.dtype("object")):
raise TypeError(
Expand All @@ -7929,32 +7917,8 @@ def interpolate(
"column to a numeric dtype."
)

# create/use the index
if method == "linear":
# prior default
index = Index(np.arange(len(obj.index)))
else:
index = obj.index
methods = {"index", "values", "nearest", "time"}
is_numeric_or_datetime = (
is_numeric_dtype(index.dtype)
or isinstance(index.dtype, DatetimeTZDtype)
or lib.is_np_dtype(index.dtype, "mM")
)
if method not in methods and not is_numeric_or_datetime:
raise ValueError(
"Index column must be numeric or datetime type when "
f"using {method} method other than linear. "
"Try setting a numeric or datetime index column before "
"interpolating."
)
index = missing.get_interp_index(method, obj.index)

if isna(index).any():
raise NotImplementedError(
"Interpolation with NaNs in the index "
"has not been implemented. Try filling "
"those NaNs before interpolating."
)
new_data = obj._mgr.interpolate(
method=method,
axis=axis,
Expand Down Expand Up @@ -8140,13 +8104,13 @@ def asof(self, where, subset=None):
locs = self.index.asof_locs(where, ~(nulls._values))

# mask the missing
missing = locs == -1
mask = locs == -1
data = self.take(locs)
data.index = where
if missing.any():
if mask.any():
# GH#16063 only do this setting when necessary, otherwise
# we'd cast e.g. bools to floats
data.loc[missing] = np.nan
data.loc[mask] = np.nan
return data if is_list else data.iloc[-1]

# ----------------------------------------------------------------------
Expand Down
2 changes: 1 addition & 1 deletion pandas/core/internals/blocks.py
Original file line number Diff line number Diff line change
Expand Up @@ -1350,7 +1350,7 @@ def interpolate(
index: Index | None = None,
inplace: bool = False,
limit: int | None = None,
limit_direction: str = "forward",
limit_direction: Literal["forward", "backward", "both"] = "forward",
limit_area: str | None = None,
fill_value: Any | None = None,
downcast: Literal["infer"] | None = None,
Expand Down
52 changes: 52 additions & 0 deletions pandas/core/missing.py
Original file line number Diff line number Diff line change
Expand Up @@ -33,10 +33,12 @@
from pandas.core.dtypes.cast import infer_dtype_from
from pandas.core.dtypes.common import (
is_array_like,
is_numeric_dtype,
is_numeric_v_string_like,
is_object_dtype,
needs_i8_conversion,
)
from pandas.core.dtypes.dtypes import DatetimeTZDtype
from pandas.core.dtypes.missing import (
is_valid_na_for_dtype,
isna,
Expand Down Expand Up @@ -225,6 +227,56 @@ def find_valid_index(how: str, is_valid: npt.NDArray[np.bool_]) -> int | None:
return idxpos # type: ignore[return-value]


def infer_limit_direction(limit_direction, method):
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Could we type this function?

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Doing so caused some trouble in part bc the NDFrame.interpolate annotation is inaccurate. ive got a interpolate-cleanup branch that im hoping improves the situation a bit, but the real solution is #53607

# Set `limit_direction` depending on `method`
if limit_direction is None:
if method in ("backfill", "bfill"):
limit_direction = "backward"
else:
limit_direction = "forward"
else:
if method in ("pad", "ffill") and limit_direction != "forward":
raise ValueError(
f"`limit_direction` must be 'forward' for method `{method}`"
)
if method in ("backfill", "bfill") and limit_direction != "backward":
raise ValueError(
f"`limit_direction` must be 'backward' for method `{method}`"
)
return limit_direction


def get_interp_index(method, index: Index) -> Index:
# create/use the index
if method == "linear":
# prior default
from pandas import Index

index = Index(np.arange(len(index)))
else:
methods = {"index", "values", "nearest", "time"}
is_numeric_or_datetime = (
is_numeric_dtype(index.dtype)
or isinstance(index.dtype, DatetimeTZDtype)
or lib.is_np_dtype(index.dtype, "mM")
)
if method not in methods and not is_numeric_or_datetime:
raise ValueError(
"Index column must be numeric or datetime type when "
f"using {method} method other than linear. "
"Try setting a numeric or datetime index column before "
"interpolating."
)

if isna(index).any():
raise NotImplementedError(
"Interpolation with NaNs in the index "
"has not been implemented. Try filling "
"those NaNs before interpolating."
)
return index


def interpolate_array_2d(
data: np.ndarray,
method: str = "pad",
Expand Down