Skip to content

Commit 68e86f4

Browse files
authored
Fix rechunking to a frequency with empty bins. (#9364)
Closes #9360
1 parent 28dfea7 commit 68e86f4

File tree

3 files changed

+26
-10
lines changed

3 files changed

+26
-10
lines changed

doc/whats-new.rst

+2
Original file line numberDiff line numberDiff line change
@@ -36,6 +36,8 @@ Deprecations
3636
Bug fixes
3737
~~~~~~~~~
3838

39+
- Fix bug with rechunking to a frequency when some periods contain no data (:issue:`9360`).
40+
By `Deepak Cherian <https://github.com/dcherian>`_.
3941
- Fix bug causing `DataTree.from_dict` to be sensitive to insertion order (:issue:`9276`, :pull:`9292`).
4042
By `Tom Nicholas <https://github.com/TomNicholas>`_.
4143
- Fix resampling error with monthly, quarterly, or yearly frequencies with

xarray/core/dataset.py

+7-3
Original file line numberDiff line numberDiff line change
@@ -2752,17 +2752,21 @@ def _resolve_frequency(
27522752
)
27532753

27542754
assert variable.ndim == 1
2755-
chunks: tuple[int, ...] = tuple(
2755+
chunks = (
27562756
DataArray(
27572757
np.ones(variable.shape, dtype=int),
27582758
dims=(name,),
27592759
coords={name: variable},
27602760
)
27612761
.resample({name: resampler})
27622762
.sum()
2763-
.data.tolist()
27642763
)
2765-
return chunks
2764+
# When bins (binning) or time periods are missing (resampling)
2765+
# we can end up with NaNs. Drop them.
2766+
if chunks.dtype.kind == "f":
2767+
chunks = chunks.dropna(name).astype(int)
2768+
chunks_tuple: tuple[int, ...] = tuple(chunks.data.tolist())
2769+
return chunks_tuple
27662770

27672771
chunks_mapping_ints: Mapping[Any, T_ChunkDim] = {
27682772
name: (

xarray/tests/test_dataset.py

+17-7
Original file line numberDiff line numberDiff line change
@@ -1209,24 +1209,34 @@ def get_dask_names(ds):
12091209
),
12101210
)
12111211
@pytest.mark.parametrize("freq", ["D", "W", "5ME", "YE"])
1212-
def test_chunk_by_frequency(self, freq, calendar) -> None:
1212+
@pytest.mark.parametrize("add_gap", [True, False])
1213+
def test_chunk_by_frequency(self, freq: str, calendar: str, add_gap: bool) -> None:
12131214
import dask.array
12141215

12151216
N = 365 * 2
1217+
ΔN = 28
1218+
time = xr.date_range(
1219+
"2001-01-01", periods=N + ΔN, freq="D", calendar=calendar
1220+
).to_numpy()
1221+
if add_gap:
1222+
# introduce an empty bin
1223+
time[31 : 31 + ΔN] = np.datetime64("NaT")
1224+
time = time[~np.isnat(time)]
1225+
else:
1226+
time = time[:N]
1227+
12161228
ds = Dataset(
12171229
{
12181230
"pr": ("time", dask.array.random.random((N), chunks=(20))),
12191231
"pr2d": (("x", "time"), dask.array.random.random((10, N), chunks=(20))),
12201232
"ones": ("time", np.ones((N,))),
12211233
},
1222-
coords={
1223-
"time": xr.date_range(
1224-
"2001-01-01", periods=N, freq="D", calendar=calendar
1225-
)
1226-
},
1234+
coords={"time": time},
12271235
)
12281236
rechunked = ds.chunk(x=2, time=TimeResampler(freq))
1229-
expected = tuple(ds.ones.resample(time=freq).sum().data.tolist())
1237+
expected = tuple(
1238+
ds.ones.resample(time=freq).sum().dropna("time").astype(int).data.tolist()
1239+
)
12301240
assert rechunked.chunksizes["time"] == expected
12311241
assert rechunked.chunksizes["x"] == (2,) * 5
12321242

0 commit comments

Comments
 (0)