Skip to content

Commit 1fa7b9b

Browse files
Illviljandcherian
andauthored
Allow dataset interpolation with different datatypes (#5008)
Co-authored-by: Deepak Cherian <[email protected]>
1 parent 6e14df6 commit 1fa7b9b

File tree

2 files changed

+57
-13
lines changed

2 files changed

+57
-13
lines changed

xarray/core/dataset.py

Lines changed: 44 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -2853,6 +2853,7 @@ def interp(
28532853
method: str = "linear",
28542854
assume_sorted: bool = False,
28552855
kwargs: Mapping[str, Any] = None,
2856+
method_non_numeric: str = "nearest",
28562857
**coords_kwargs: Any,
28572858
) -> "Dataset":
28582859
"""Multidimensional interpolation of Dataset.
@@ -2877,6 +2878,9 @@ def interp(
28772878
Additional keyword arguments passed to scipy's interpolator. Valid
28782879
options and their behavior depend on if 1-dimensional or
28792880
multi-dimensional interpolation is used.
2881+
method_non_numeric : {"nearest", "pad", "ffill", "backfill", "bfill"}, optional
2882+
Method for non-numeric types. Passed on to :py:meth:`Dataset.reindex`.
2883+
``"nearest"`` is used by default.
28802884
**coords_kwargs : {dim: coordinate, ...}, optional
28812885
The keyword arguments form of ``coords``.
28822886
One of coords or coords_kwargs must be provided.
@@ -3034,6 +3038,7 @@ def _validate_interp_indexer(x, new_x):
30343038
}
30353039

30363040
variables: Dict[Hashable, Variable] = {}
3041+
to_reindex: Dict[Hashable, Variable] = {}
30373042
for name, var in obj._variables.items():
30383043
if name in indexers:
30393044
continue
@@ -3043,20 +3048,45 @@ def _validate_interp_indexer(x, new_x):
30433048
else:
30443049
use_indexers = validated_indexers
30453050

3046-
if var.dtype.kind in "uifc":
3051+
dtype_kind = var.dtype.kind
3052+
if dtype_kind in "uifc":
3053+
# For normal number types do the interpolation:
30473054
var_indexers = {k: v for k, v in use_indexers.items() if k in var.dims}
30483055
variables[name] = missing.interp(var, var_indexers, method, **kwargs)
3056+
elif dtype_kind in "ObU" and (use_indexers.keys() & var.dims):
3057+
# For types that we do not understand do stepwise
3058+
# interpolation to avoid modifying the elements.
3059+
# Use reindex_variables instead because it supports
3060+
# booleans and objects and retains the dtype but inside
3061+
# this loop there might be some duplicate code that slows it
3062+
# down, therefore collect these signals and run it later:
3063+
to_reindex[name] = var
30493064
elif all(d not in indexers for d in var.dims):
3050-
# keep unrelated object array
3065+
# For anything else we can only keep variables if they
3066+
# are not dependent on any coords that are being
3067+
# interpolated along:
30513068
variables[name] = var
30523069

3070+
if to_reindex:
3071+
# Reindex variables:
3072+
variables_reindex = alignment.reindex_variables(
3073+
variables=to_reindex,
3074+
sizes=obj.sizes,
3075+
indexes=obj.xindexes,
3076+
indexers={k: v[-1] for k, v in validated_indexers.items()},
3077+
method=method_non_numeric,
3078+
)[0]
3079+
variables.update(variables_reindex)
3080+
3081+
# Get the coords that also exist in the variables:
30533082
coord_names = obj._coord_names & variables.keys()
3083+
# Get the indexes that are not being interpolated along:
30543084
indexes = {k: v for k, v in obj.xindexes.items() if k not in indexers}
30553085
selected = self._replace_with_new_dims(
30563086
variables.copy(), coord_names, indexes=indexes
30573087
)
30583088

3059-
# attach indexer as coordinate
3089+
# Attach indexer as coordinate
30603090
variables.update(indexers)
30613091
for k, v in indexers.items():
30623092
assert isinstance(v, Variable)
@@ -3077,6 +3107,7 @@ def interp_like(
30773107
method: str = "linear",
30783108
assume_sorted: bool = False,
30793109
kwargs: Mapping[str, Any] = None,
3110+
method_non_numeric: str = "nearest",
30803111
) -> "Dataset":
30813112
"""Interpolate this object onto the coordinates of another object,
30823113
filling the out of range values with NaN.
@@ -3098,6 +3129,9 @@ def interp_like(
30983129
values.
30993130
kwargs : dict, optional
31003131
Additional keyword passed to scipy's interpolator.
3132+
method_non_numeric : {"nearest", "pad", "ffill", "backfill", "bfill"}, optional
3133+
Method for non-numeric types. Passed on to :py:meth:`Dataset.reindex`.
3134+
``"nearest"`` is used by default.
31013135
31023136
Returns
31033137
-------
@@ -3133,7 +3167,13 @@ def interp_like(
31333167
# We do not support interpolation along object coordinate.
31343168
# reindex instead.
31353169
ds = self.reindex(object_coords)
3136-
return ds.interp(numeric_coords, method, assume_sorted, kwargs)
3170+
return ds.interp(
3171+
coords=numeric_coords,
3172+
method=method,
3173+
assume_sorted=assume_sorted,
3174+
kwargs=kwargs,
3175+
method_non_numeric=method_non_numeric,
3176+
)
31373177

31383178
# Helper methods for rename()
31393179
def _rename_vars(self, name_dict, dims_dict):

xarray/tests/test_interp.py

Lines changed: 13 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -416,15 +416,19 @@ def test_errors(use_dask):
416416

417417
@requires_scipy
418418
def test_dtype():
419-
ds = xr.Dataset(
420-
{"var1": ("x", [0, 1, 2]), "var2": ("x", ["a", "b", "c"])},
421-
coords={"x": [0.1, 0.2, 0.3], "z": ("x", ["a", "b", "c"])},
422-
)
423-
actual = ds.interp(x=[0.15, 0.25])
424-
assert "var1" in actual
425-
assert "var2" not in actual
426-
# object array should be dropped
427-
assert "z" not in actual.coords
419+
data_vars = dict(
420+
a=("time", np.array([1, 1.25, 2])),
421+
b=("time", np.array([True, True, False], dtype=bool)),
422+
c=("time", np.array(["start", "start", "end"], dtype=str)),
423+
)
424+
time = np.array([0, 0.25, 1], dtype=float)
425+
expected = xr.Dataset(data_vars, coords=dict(time=time))
426+
actual = xr.Dataset(
427+
{k: (dim, arr[[0, -1]]) for k, (dim, arr) in data_vars.items()},
428+
coords=dict(time=time[[0, -1]]),
429+
)
430+
actual = actual.interp(time=time, method="linear")
431+
assert_identical(expected, actual)
428432

429433

430434
@requires_scipy

0 commit comments

Comments
 (0)