Skip to content

Commit 62893ab

Browse files
author
Miguel Jimenez
authored
Merge pull request #1 from Mikejmnez/new_branch
moved definitions to zarr backends
2 parents 89a780b + 6f6eb23 commit 62893ab

File tree

2 files changed

+44
-49
lines changed

2 files changed

+44
-49
lines changed

xarray/backends/api.py

Lines changed: 2 additions & 49 deletions
Original file line numberDiff line numberDiff line change
@@ -567,7 +567,7 @@ def maybe_decode_store(store, lock=False):
567567
ds2 = ds.chunk(chunks, name_prefix=name_prefix, token=token)
568568
ds2._file_obj = ds._file_obj
569569

570-
else: # file is zarr!
570+
else:
571571

572572
# adapted from Dataset.Chunk() and taken from open_zarr
573573
if not isinstance(chunks, (int, dict)):
@@ -579,58 +579,11 @@ def maybe_decode_store(store, lock=False):
579579
if isinstance(chunks, int):
580580
chunks = dict.fromkeys(ds.dims, chunks)
581581

582-
if isinstance(chunks, tuple) and len(chunks) == len(ds.dims):
583-
chunks = dict(zip(ds.dims, chunks))
584-
585-
def get_chunk(name, var, chunks):
586-
chunk_spec = dict(zip(var.dims, var.encoding.get("chunks")))
587-
588-
# Coordinate labels aren't chunked
589-
if var.ndim == 1 and var.dims[0] == name:
590-
return chunk_spec
591-
592-
if chunks == "auto":
593-
return chunk_spec
594-
595-
for dim in var.dims:
596-
if dim in chunks:
597-
spec = chunks[dim]
598-
if isinstance(spec, int):
599-
spec = (spec,)
600-
if isinstance(spec, (tuple, list)) and chunk_spec[dim]:
601-
if any(s % chunk_spec[dim] for s in spec):
602-
warnings.warn(
603-
"Specified Dask chunks %r would "
604-
"separate Zarr chunk shape %r for "
605-
"dimension %r. This significantly "
606-
"degrades performance. Consider "
607-
"rechunking after loading instead."
608-
% (chunks[dim], chunk_spec[dim], dim),
609-
stacklevel=2,
610-
)
611-
chunk_spec[dim] = chunks[dim]
612-
return chunk_spec
613-
614-
def maybe_chunk(name, var, chunks):
615-
chunk_spec = get_chunk(name, var, chunks)
616-
617-
if (var.ndim > 0) and (chunk_spec is not None):
618-
# does this cause any data to be read?
619-
token2 = tokenize(name, var._data)
620-
name2 = "zarr-%s" % token2
621-
var = var.chunk(chunk_spec, name=name2, lock=None)
622-
if overwrite_encoded_chunks and var.chunks is not None:
623-
var.encoding["chunks"] = tuple(x[0] for x in var.chunk)
624-
return var
625-
else:
626-
return var
627-
628-
variables = {k: maybe_chunk(k, v, chunks) for k, v in ds.variables.items()}
582+
variables = {k: backends.ZarrStore.open_group.maybe_chunk(k, v, chunks) for k, v in ds.variables.items()}
629583
ds2 = ds._replace_vars_and_dims(variables)
630584
return ds2
631585
else:
632586
ds2 = ds
633-
634587
return ds2
635588

636589

xarray/backends/zarr.py

Lines changed: 42 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -358,6 +358,48 @@ def encode_variable(self, variable):
358358
def encode_attribute(self, a):
359359
return encode_zarr_attr_value(a)
360360

361+
362+
def get_chunk(name, var, chunks):
363+
chunk_spec = dict(zip(var.dims, var.encoding.get("chunks")))
364+
365+
# Coordinate labels aren't chunked
366+
if var.ndim == 1 and var.dims[0] == name:
367+
return chunk_spec
368+
369+
if chunks == "auto":
370+
return chunk_spec
371+
372+
for dim in var.dims:
373+
if dim in chunks:
374+
spec = chunks[dim]
375+
if isinstance(spec, int):
376+
spec = (spec,)
377+
if isinstance(spec, (tuple, list)) and chunk_spec[dim]:
378+
if any(s % chunk_spec[dim] for s in spec):
379+
warnings.warn(
380+
"Specified Dask chunks %r would "
381+
"separate Zarr chunk shape %r for "
382+
"dimension %r. This significantly "
383+
"degrades performance. Consider "
384+
"rechunking after loading instead."
385+
% (chunks[dim], chunk_spec[dim], dim),
386+
stacklevel=2,
387+
)
388+
chunk_spec[dim] = chunks[dim]
389+
return chunk_spec
390+
391+
def maybe_chunk(name, var, chunks):
392+
chunk_spec = get_chunk(name, var, chunks)
393+
394+
if (var.ndim > 0) and (chunk_spec is not None):
395+
# does this cause any data to be read?
396+
token2 = tokenize(name, var._data)
397+
name2 = "zarr-%s" % token2
398+
var = var.chunk(chunk_spec, name=name2, lock=None)
399+
if overwrite_encoded_chunks and var.chunks is not None:
400+
var.encoding["chunks"] = tuple(x[0] for x in var.chunk)
401+
return var
402+
361403
def store(
362404
self,
363405
variables,

0 commit comments

Comments
 (0)