Skip to content

Commit 49e3032

Browse files
authored
Remove maybe chunck duplicated function (#4494)
* move functions selkeys and maybechunk outside dataset.chunk add in maybechunk the key overwrite_encoded_chunks for zarr * replace ZarrStore.maybe_chunk with dataset._maybe_chunck + ZarrStore.get_chunks * remove no more used ZarrStore.maybe_chunk * style * style * style * fix typo * move `dataset._selkeys` logic inside _maybe_chunk
1 parent 6dc3c75 commit 49e3032

File tree

3 files changed

+39
-39
lines changed

3 files changed

+39
-39
lines changed

xarray/backends/api.py

+7-2
Original file line numberDiff line numberDiff line change
@@ -25,7 +25,7 @@
2525
combine_by_coords,
2626
)
2727
from ..core.dataarray import DataArray
28-
from ..core.dataset import Dataset
28+
from ..core.dataset import Dataset, _maybe_chunk
2929
from ..core.utils import close_on_error, is_grib_path, is_remote_uri
3030
from .common import AbstractDataStore, ArrayWriter
3131
from .locks import _get_scheduler
@@ -524,7 +524,12 @@ def maybe_decode_store(store, chunks):
524524
chunks = dict.fromkeys(ds.dims, chunks)
525525

526526
variables = {
527-
k: store.maybe_chunk(k, v, chunks, overwrite_encoded_chunks)
527+
k: _maybe_chunk(
528+
k,
529+
v,
530+
store.get_chunk(k, v, chunks),
531+
overwrite_encoded_chunks=overwrite_encoded_chunks,
532+
)
528533
for k, v in ds.variables.items()
529534
}
530535
ds2 = ds._replace(variables)

xarray/backends/zarr.py

-16
Original file line numberDiff line numberDiff line change
@@ -390,22 +390,6 @@ def get_chunk(self, name, var, chunks):
390390
chunk_spec[dim] = chunks[dim]
391391
return chunk_spec
392392

393-
def maybe_chunk(self, name, var, chunks, overwrite_encoded_chunks):
394-
chunk_spec = self.get_chunk(name, var, chunks)
395-
396-
if (var.ndim > 0) and (chunk_spec is not None):
397-
from dask.base import tokenize
398-
399-
# does this cause any data to be read?
400-
token2 = tokenize(name, var._data, chunks)
401-
name2 = f"xarray-{name}-{token2}"
402-
var = var.chunk(chunk_spec, name=name2, lock=None)
403-
if overwrite_encoded_chunks and var.chunks is not None:
404-
var.encoding["chunks"] = tuple(x[0] for x in var.chunks)
405-
return var
406-
else:
407-
return var
408-
409393
def store(
410394
self,
411395
variables,

xarray/core/dataset.py

+32-21
Original file line numberDiff line numberDiff line change
@@ -359,6 +359,34 @@ def _assert_empty(args: tuple, msg: str = "%s") -> None:
359359
raise ValueError(msg % args)
360360

361361

362+
def _maybe_chunk(
363+
name,
364+
var,
365+
chunks=None,
366+
token=None,
367+
lock=None,
368+
name_prefix="xarray-",
369+
overwrite_encoded_chunks=False,
370+
):
371+
from dask.base import tokenize
372+
373+
if chunks is not None:
374+
chunks = {dim: chunks[dim] for dim in var.dims if dim in chunks}
375+
if var.ndim:
376+
# when rechunking by different amounts, make sure dask names change
377+
# by provinding chunks as an input to tokenize.
378+
# subtle bugs result otherwise. see GH3350
379+
token2 = tokenize(name, token if token else var._data, chunks)
380+
name2 = f"{name_prefix}{name}-{token2}"
381+
var = var.chunk(chunks, name=name2, lock=lock)
382+
383+
if overwrite_encoded_chunks and var.chunks is not None:
384+
var.encoding["chunks"] = tuple(x[0] for x in var.chunks)
385+
return var
386+
else:
387+
return var
388+
389+
362390
def as_dataset(obj: Any) -> "Dataset":
363391
"""Cast the given object to a Dataset.
364392
@@ -1761,7 +1789,6 @@ def chunk(
17611789
-------
17621790
chunked : xarray.Dataset
17631791
"""
1764-
from dask.base import tokenize
17651792

17661793
if isinstance(chunks, (Number, str)):
17671794
chunks = dict.fromkeys(self.dims, chunks)
@@ -1774,26 +1801,10 @@ def chunk(
17741801
"object: %s" % bad_dims
17751802
)
17761803

1777-
def selkeys(dict_, keys):
1778-
if dict_ is None:
1779-
return None
1780-
return {d: dict_[d] for d in keys if d in dict_}
1781-
1782-
def maybe_chunk(name, var, chunks):
1783-
chunks = selkeys(chunks, var.dims)
1784-
if not chunks:
1785-
chunks = None
1786-
if var.ndim > 0:
1787-
# when rechunking by different amounts, make sure dask names change
1788-
# by provinding chunks as an input to tokenize.
1789-
# subtle bugs result otherwise. see GH3350
1790-
token2 = tokenize(name, token if token else var._data, chunks)
1791-
name2 = f"{name_prefix}{name}-{token2}"
1792-
return var.chunk(chunks, name=name2, lock=lock)
1793-
else:
1794-
return var
1795-
1796-
variables = {k: maybe_chunk(k, v, chunks) for k, v in self.variables.items()}
1804+
variables = {
1805+
k: _maybe_chunk(k, v, chunks, token, lock, name_prefix)
1806+
for k, v in self.variables.items()
1807+
}
17971808
return self._replace(variables)
17981809

17991810
def _validate_indexers(

0 commit comments

Comments
 (0)