diff --git a/doc/whats-new.rst b/doc/whats-new.rst index b3130a1a4ab..38f183f9fc2 100644 --- a/doc/whats-new.rst +++ b/doc/whats-new.rst @@ -55,6 +55,8 @@ Enhancements Bug fixes ~~~~~~~~~ +- Fixed ``decode_cf`` function to operate lazily on dask arrays + (:issue:`1372`). By `Ryan Abernathey `_. - Fixed labeled indexing with slice bounds given by xarray objects with datetime64 or timedelta64 dtypes (:issue:`1240`). By `Stephan Hoyer `_. diff --git a/xarray/conventions.py b/xarray/conventions.py index 93fd56ed5d2..04664435732 100644 --- a/xarray/conventions.py +++ b/xarray/conventions.py @@ -9,7 +9,7 @@ from .coding import times, variables from .coding.variables import SerializationWarning from .core import duck_array_ops, indexing -from .core.pycompat import OrderedDict, basestring, iteritems +from .core.pycompat import OrderedDict, basestring, iteritems, dask_array_type from .core.variable import IndexVariable, Variable, as_variable @@ -490,8 +490,9 @@ def decode_cf_variable(name, var, concat_characters=True, mask_and_scale=True, del attributes['dtype'] data = BoolTypeArray(data) - return Variable(dimensions, indexing.LazilyOuterIndexedArray(data), - attributes, encoding=encoding) + if not isinstance(data, dask_array_type): + data = indexing.LazilyOuterIndexedArray(data) + return Variable(dimensions, data, attributes, encoding=encoding) def decode_cf_variables(variables, attributes, concat_characters=True, diff --git a/xarray/tests/test_conventions.py b/xarray/tests/test_conventions.py index 5b82f5a26f2..86240f4e33e 100644 --- a/xarray/tests/test_conventions.py +++ b/xarray/tests/test_conventions.py @@ -18,7 +18,7 @@ from . import ( IndexerMaker, TestCase, assert_array_equal, raises_regex, requires_netCDF4, - requires_netcdftime, unittest) + requires_netcdftime, unittest, requires_dask) from .test_backends import CFEncodedDataTest B = IndexerMaker(indexing.BasicIndexer) @@ -332,6 +332,17 @@ def test_decode_cf_datetime_transition_to_invalid(self): assert_array_equal(ds_decoded.time.values, expected) + @requires_dask + def test_decode_cf_with_dask(self): + import dask + original = Dataset({ + 't': ('t', [0, 1, 2], {'units': 'days since 2000-01-01'}), + 'foo': ('t', [0, 0, 0], {'coordinates': 'y', 'units': 'bar'}), + 'y': ('t', [5, 10, -999], {'_FillValue': -999}) + }).chunk({'t': 1}) + decoded = conventions.decode_cf(original) + assert dask.is_dask_collection(decoded.y.data) + class CFEncodedInMemoryStore(WritableCFDataStore, InMemoryDataStore): pass