Skip to content
This repository was archived by the owner on Sep 11, 2023. It is now read-only.

Commit 532d077

Browse files
authored
Merge pull request #459 from openclimatefix/jack/try-using-dask-to-speed-up-loading-zarrs
10x speedup loading Zarrs by using chunks=auto!
2 parents 75f1a43 + 2803b37 commit 532d077

File tree

2 files changed

+6
-8
lines changed

2 files changed

+6
-8
lines changed

nowcasting_dataset/data_sources/nwp/nwp_data_source.py

Lines changed: 5 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -176,7 +176,11 @@ def open_nwp(zarr_path: str, consolidated: bool) -> xr.DataArray:
176176
_LOG.debug("Opening NWP data: %s", zarr_path)
177177
utils.set_fsspec_for_multiprocess()
178178
nwp = xr.open_dataset(
179-
zarr_path, engine="zarr", consolidated=consolidated, mode="r", chunks=None
179+
zarr_path,
180+
engine="zarr",
181+
consolidated=consolidated,
182+
mode="r",
183+
chunks="auto", # See issue #456 for why we use "auto".
180184
)
181185

182186
# Select the "UKV" DataArray from the "nwp" Dataset.

nowcasting_dataset/data_sources/satellite/satellite_data_source.py

Lines changed: 1 addition & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -233,18 +233,12 @@ def open_sat_data(zarr_path: str, consolidated: bool) -> xr.DataArray:
233233
"""
234234
_LOG.debug("Opening satellite data: %s", zarr_path)
235235

236-
# We load using chunks=None so xarray *doesn't* use Dask to
237-
# load the Zarr chunks from disk. Using Dask to load the data
238-
# seems to slow things down a lot if the Zarr store has more than
239-
# about a million chunks.
240-
# See https://github.com/openclimatefix/nowcasting_dataset/issues/23
241-
242236
# If we are opening multiple Zarr stores (i.e. one for each month of the year) we load them
243237
# together and create a single dataset from them. open_mfdataset also works if zarr_path
244238
# points to a specific zarr directory (with no wildcards).
245239
dataset = xr.open_mfdataset(
246240
zarr_path,
247-
chunks=None,
241+
chunks="auto", # See issue #456 for why we use "auto".
248242
mode="r",
249243
engine="zarr",
250244
concat_dim="time",

0 commit comments

Comments
 (0)