From 5ab07f8fa8f2a1e656b276e64f698f91aa07330d Mon Sep 17 00:00:00 2001 From: Ryan Abernathey Date: Thu, 14 Mar 2019 10:47:47 -0400 Subject: [PATCH 1/2] added protect_dataset_variables_inplace to open_zarr --- xarray/backends/zarr.py | 13 ++++++++++--- 1 file changed, 10 insertions(+), 3 deletions(-) diff --git a/xarray/backends/zarr.py b/xarray/backends/zarr.py index ee77e0833c4..9f6a693a096 100644 --- a/xarray/backends/zarr.py +++ b/xarray/backends/zarr.py @@ -8,6 +8,7 @@ from ..core.pycompat import integer_types from ..core.utils import FrozenOrderedDict, HiddenKeyDict from .common import AbstractWritableDataStore, BackendArray +from .api import _protect_dataset_variables_inplace # need some special secret attributes to tell us the dimensions _DIMENSION_KEY = '_ARRAY_DIMENSIONS' @@ -355,7 +356,7 @@ def close(self): def open_zarr(store, group=None, synchronizer=None, auto_chunk=True, decode_cf=True, mask_and_scale=True, decode_times=True, concat_characters=True, decode_coords=True, - drop_variables=None, consolidated=False): + drop_variables=None, consolidated=False, cache=False): """Load and decode a dataset from a Zarr store. .. note:: Experimental @@ -408,7 +409,13 @@ def open_zarr(store, group=None, synchronizer=None, auto_chunk=True, consolidated : bool, optional Whether to open the store using zarr's consolidated metadata capability. Only works for stores that have already been consolidated. - + cache : bool, optional + If True, cache data loaded from the underlying datastore in memory as + NumPy arrays when accessed to avoid reading from the underlying data- + store multiple times. Defaults to True unless you specify the `chunks` + argument to use dask, in which case it defaults to False. Does not + change the behavior of coordinates corresponding to dimensions, which + always load their data from disk into a ``pandas.Index``. Returns ------- dataset : Dataset @@ -435,7 +442,7 @@ def maybe_decode_store(store, lock=False): concat_characters=concat_characters, decode_coords=decode_coords, drop_variables=drop_variables) - # TODO: this is where we would apply caching + _protect_dataset_variables_inplace(ds, cache) return ds From e92f9c1b55fb685c2fc80f13dd16de852b0550b6 Mon Sep 17 00:00:00 2001 From: Ryan Abernathey Date: Thu, 14 Mar 2019 10:59:22 -0400 Subject: [PATCH 2/2] use zarr LRU cache --- xarray/backends/zarr.py | 22 ++++++++++++---------- 1 file changed, 12 insertions(+), 10 deletions(-) diff --git a/xarray/backends/zarr.py b/xarray/backends/zarr.py index 9f6a693a096..eb17611872f 100644 --- a/xarray/backends/zarr.py +++ b/xarray/backends/zarr.py @@ -356,7 +356,8 @@ def close(self): def open_zarr(store, group=None, synchronizer=None, auto_chunk=True, decode_cf=True, mask_and_scale=True, decode_times=True, concat_characters=True, decode_coords=True, - drop_variables=None, consolidated=False, cache=False): + drop_variables=None, consolidated=False, cache=False, + max_cache_size=None): """Load and decode a dataset from a Zarr store. .. note:: Experimental @@ -410,12 +411,12 @@ def open_zarr(store, group=None, synchronizer=None, auto_chunk=True, Whether to open the store using zarr's consolidated metadata capability. Only works for stores that have already been consolidated. cache : bool, optional - If True, cache data loaded from the underlying datastore in memory as - NumPy arrays when accessed to avoid reading from the underlying data- - store multiple times. Defaults to True unless you specify the `chunks` - argument to use dask, in which case it defaults to False. Does not - change the behavior of coordinates corresponding to dimensions, which - always load their data from disk into a ``pandas.Index``. + If True, the zarr store is wrapped with a + ``zarr.storage.LRUStoreCache``. + max_cache_size : int, optional + The maximum size that the cache may grow to, in number of bytes. + Provide `None` if you would like the cache to have unlimited size. + Returns ------- dataset : Dataset @@ -441,11 +442,12 @@ def maybe_decode_store(store, lock=False): store, mask_and_scale=mask_and_scale, decode_times=decode_times, concat_characters=concat_characters, decode_coords=decode_coords, drop_variables=drop_variables) - - _protect_dataset_variables_inplace(ds, cache) - return ds + if cache: + import zarr + store = zarr.LRUStoreCache(store, max_size=max_cache_size) + # Zarr supports a wide range of access modes, but for now xarray either # reads or writes from a store, never both. For open_zarr, we only read mode = 'r'