diff --git a/doc/data-structures.rst b/doc/data-structures.rst index 95f755680df..10d83ca448f 100644 --- a/doc/data-structures.rst +++ b/doc/data-structures.rst @@ -310,18 +310,15 @@ You can also create an dataset from: Dataset contents ~~~~~~~~~~~~~~~~ -:py:class:`~xarray.Dataset` implements the Python dictionary interface, with +:py:class:`~xarray.Dataset` implements the Python mapping interface, with values given by :py:class:`xarray.DataArray` objects: .. ipython:: python 'temperature' in ds - - ds.keys() - ds['temperature'] -The valid keys include each listed coordinate and data variable. +Valid keys include each listed coordinate and data variable. Data and coordinate variables are also contained separately in the :py:attr:`~xarray.Dataset.data_vars` and :py:attr:`~xarray.Dataset.coords` @@ -356,6 +353,13 @@ setting) variables and attributes: This is particularly useful in an exploratory context, because you can tab-complete these variable names with tools like IPython. +.. warning:: + + We are changing the behavior of iterating over a Dataset the next major + release of xarray, to only include data variables instead of both data + variables and coordinates. In the meantime, prefer iterating over + ``ds.data_vars`` or ``ds.coords``. + Dictionary like methods ~~~~~~~~~~~~~~~~~~~~~~~ diff --git a/doc/whats-new.rst b/doc/whats-new.rst index 48c565a98d2..a6e7ecd8baf 100644 --- a/doc/whats-new.rst +++ b/doc/whats-new.rst @@ -78,14 +78,23 @@ Breaking changes disk when calling ``repr`` (:issue:`1522`). By `Guido Imperiale `_. -- Deprecations: +- Several existing features have been deprecated and will change to new + behavior in xarray v0.11. If you use any of them with xarray v0.10, you + should see a ``FutureWarning`` that describes how to update your code: - ``Dataset.T`` has been deprecated an alias for ``Dataset.transpose()`` - (:issue:`1232`). - - ``key in data_array`` currently checks for membership in - ``data_array.coords``. This is now deprecated: in the future, it will check - membership in ``data_array.values`` instead. - + (:issue:`1232`). In the next major version of xarray, it will provide short- + cut lookup for variables or attributes with name ``'T'``. + - ``DataArray.__contains__`` (e.g., ``key in data_array``) currently checks + for membership in ``DataArray.coords``. In the next major version of + xarray, it will check membership in the array data found in + ``DataArray.values`` instead (:issue:`1267`). + - Direct iteration over and counting a ``Dataset`` (e.g., ``[k for k in ds]``, + ``ds.keys()``, ``ds.values()``, ``len(ds)`` and ``if ds``) currently + includes all variables, both data and coordinates. For improved usability + and consistency with pandas, in the next major version of xarray these will + change to only include data variables (:issue:`884`). Use ``ds.variables``, + ``ds.data_vars`` or `ds.coords`` as alternatives. Backward Incompatible Changes ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ diff --git a/xarray/backends/api.py b/xarray/backends/api.py index 6bf0ed03af2..36c686e7a91 100644 --- a/xarray/backends/api.py +++ b/xarray/backends/api.py @@ -86,7 +86,7 @@ def check_name(name): raise TypeError('DataArray.name or Dataset key must be either a ' 'string or None for serialization to netCDF files') - for k in dataset: + for k in dataset.variables: check_name(k) diff --git a/xarray/core/dataarray.py b/xarray/core/dataarray.py index 235342681ef..aec8cbc6f1f 100644 --- a/xarray/core/dataarray.py +++ b/xarray/core/dataarray.py @@ -1844,7 +1844,7 @@ def dot(self, other): new_dims = ([d for d in self.dims if d not in dims] + [d for d in other.dims if d not in dims]) - return type(self)(new_data, new_coords, new_dims) + return type(self)(new_data, new_coords.variables, new_dims) def sortby(self, variables, ascending=True): """ diff --git a/xarray/core/dataset.py b/xarray/core/dataset.py index e54f307c075..fcea294238d 100644 --- a/xarray/core/dataset.py +++ b/xarray/core/dataset.py @@ -373,6 +373,9 @@ def _set_init_vars_and_dims(self, data_vars, coords, compat): raise ValueError('variables %r are found in both data_vars and ' 'coords' % both_data_and_coords) + if isinstance(coords, Dataset): + coords = coords.variables + variables, coord_names, dims = merge_data_and_coords( data_vars, coords, compat=compat) @@ -725,7 +728,7 @@ def _attr_sources(self): @property def _item_sources(self): """List of places to look-up items for key-completion""" - return [self, {d: self[d] for d in self.dims}, + return [self.data_vars, self.coords, {d: self[d] for d in self.dims}, LevelCoordinatesSource(self)] def __contains__(self, key): @@ -735,9 +738,31 @@ def __contains__(self, key): return key in self._variables def __len__(self): + warnings.warn('calling len() on an xarray.Dataset will change in ' + 'xarray v0.11 to only include data variables, not ' + 'coordinates. Call len() on the Dataset.variables ' + 'property instead, like ``len(ds.variables)``, to ' + 'preserve existing behavior in a forwards compatible ' + 'manner.', + FutureWarning, stacklevel=2) return len(self._variables) + def __bool__(self): + warnings.warn('casting an xarray.Dataset to a boolean will change in ' + 'xarray v0.11 to only include data variables, not ' + 'coordinates. Cast the Dataset.variables property ' + 'instead to preserve existing behavior in a forwards ' + 'compatible manner.', + FutureWarning, stacklevel=2) + return bool(self._variables) + def __iter__(self): + warnings.warn('iteration over an xarray.Dataset will change in xarray ' + 'v0.11 to only include data variables, not coordinates. ' + 'Iterate over the Dataset.variables property instead to ' + 'preserve existing behavior in a forwards compatible ' + 'manner.', + FutureWarning, stacklevel=2) return iter(self._variables) @property @@ -2201,8 +2226,7 @@ def transpose(self, *dims): @property def T(self): warnings.warn('xarray.Dataset.T has been deprecated as an alias for ' - '`.transpose()`. It will be removed in a future version ' - 'of xarray.', + '`.transpose()`. It will be removed in xarray v0.11.', FutureWarning, stacklevel=2) return self.transpose() @@ -2475,7 +2499,7 @@ def to_array(self, dim='variable', name=None): return DataArray(data, coords, dims, attrs=self.attrs, name=name) def _to_dataframe(self, ordered_dims): - columns = [k for k in self if k not in self.dims] + columns = [k for k in self.variables if k not in self.dims] data = [self._variables[k].set_dims(ordered_dims).values.reshape(-1) for k in columns] index = self.coords.to_index(ordered_dims) diff --git a/xarray/core/formatting.py b/xarray/core/formatting.py index faa125e73a5..b9e381149b7 100644 --- a/xarray/core/formatting.py +++ b/xarray/core/formatting.py @@ -411,7 +411,7 @@ def array_repr(arr): def dataset_repr(ds): summary = [u'' % type(ds).__name__] - col_width = _calculate_col_width(_get_col_items(ds)) + col_width = _calculate_col_width(_get_col_items(ds.variables)) dims_start = pretty_print(u'Dimensions:', col_width) summary.append(u'%s(%s)' % (dims_start, dim_summary(ds))) diff --git a/xarray/testing.py b/xarray/testing.py index 23b06f98de5..20316eb03fe 100644 --- a/xarray/testing.py +++ b/xarray/testing.py @@ -132,7 +132,7 @@ def assert_allclose(a, b, rtol=1e-05, atol=1e-08, decode_bytes=True): assert allclose, '{}\n{}'.format(a.coords[v].values, b.coords[v].values) elif isinstance(a, xr.Dataset): - assert set(a) == set(b) + assert set(a.data_vars) == set(b.data_vars) assert set(a.coords) == set(b.coords) for k in list(a.variables) + list(a.coords): assert_allclose(a[k], b[k], **kwargs) diff --git a/xarray/tests/test_backends.py b/xarray/tests/test_backends.py index 6cb291693f4..0b0aa973198 100644 --- a/xarray/tests/test_backends.py +++ b/xarray/tests/test_backends.py @@ -152,7 +152,7 @@ def test_write_store(self): self.assertDatasetAllClose(expected, actual) def check_dtypes_roundtripped(self, expected, actual): - for k in expected: + for k in expected.variables: expected_dtype = expected.variables[k].dtype if (isinstance(self, Only32BitTypes) and expected_dtype == 'int64'): @@ -832,7 +832,7 @@ def test_variable_order(self): ds.coords['c'] = 4 with self.roundtrip(ds) as actual: - self.assertEqual(list(ds), list(actual)) + self.assertEqual(list(ds.variables), list(actual.variables)) def test_unsorted_index_raises(self): # should be fixed in netcdf4 v1.2.1 @@ -976,7 +976,7 @@ def test_roundtrip_example_1_netcdf_gz(self): def test_netcdf3_endianness(self): # regression test for GH416 expected = open_example_dataset('bears.nc', engine='scipy') - for var in expected.values(): + for var in expected.variables.values(): self.assertTrue(var.dtype.isnative) @requires_netCDF4 @@ -1097,11 +1097,12 @@ def test_cross_engine_read_write_netcdf3(self): with open_dataset(tmp_file, engine=read_engine) as actual: # hack to allow test to work: - # coord comes back as DataArray rather than coord, and so - # need to loop through here rather than in the test - # function (or we get recursion) - [assert_allclose(data[k].variable, actual[k].variable) - for k in data] + # coord comes back as DataArray rather than coord, + # and so need to loop through here rather than in + # the test function (or we get recursion) + [assert_allclose(data[k].variable, + actual[k].variable) + for k in data.variables] def test_encoding_unlimited_dims(self): ds = Dataset({'x': ('y', np.arange(10.0))}) diff --git a/xarray/tests/test_combine.py b/xarray/tests/test_combine.py index f4bb5e83f98..e8d1ce67d05 100644 --- a/xarray/tests/test_combine.py +++ b/xarray/tests/test_combine.py @@ -19,7 +19,7 @@ def test_concat(self): # drop the third dimension to keep things relatively understandable data = create_test_data() - for k in list(data): + for k in list(data.variables): if 'dim3' in data[k].dims: del data[k] diff --git a/xarray/tests/test_dataset.py b/xarray/tests/test_dataset.py index cfb5d4367c4..f7b69163455 100644 --- a/xarray/tests/test_dataset.py +++ b/xarray/tests/test_dataset.py @@ -51,7 +51,7 @@ def create_test_data(seed=None): obj.coords['numbers'] = ('dim3', np.array([0, 1, 2, 0, 0, 1, 1, 2, 2, 3], dtype='int64')) obj.encoding = {'foo': 'bar'} - assert all(obj.data.flags.writeable for obj in obj.values()) + assert all(obj.data.flags.writeable for obj in obj.variables.values()) return obj @@ -410,11 +410,16 @@ def test_properties(self): self.assertIsInstance(ds.dims.mapping, utils.SortedKeysDict) self.assertIs(type(ds.dims.mapping.mapping), dict) - self.assertItemsEqual(ds, list(ds.variables)) - self.assertItemsEqual(ds.keys(), list(ds.variables)) + with pytest.warns(FutureWarning): + self.assertItemsEqual(ds, list(ds.variables)) + with pytest.warns(FutureWarning): + self.assertItemsEqual(ds.keys(), list(ds.variables)) self.assertNotIn('aasldfjalskdfj', ds.variables) self.assertIn('dim1', repr(ds.variables)) - self.assertEqual(len(ds), 7) + with pytest.warns(FutureWarning): + self.assertEqual(len(ds), 7) + with pytest.warns(FutureWarning): + self.assertEqual(bool(ds), True) self.assertItemsEqual(ds.data_vars, ['var1', 'var2', 'var3']) self.assertItemsEqual(ds.data_vars.keys(), ['var1', 'var2', 'var3']) @@ -470,7 +475,7 @@ def test_variable(self): self.assertTrue('foo' in a) a['bar'] = (('time', 'x',), d) # order of creation is preserved - self.assertEqual(list(a), ['foo', 'bar']) + self.assertEqual(list(a.variables), ['foo', 'bar']) self.assertArrayEqual(a['foo'].values, d) # try to add variable with dim (10,3) with data that's (3,10) with self.assertRaises(ValueError): @@ -819,7 +824,7 @@ def test_isel(self): else: self.assertEqual(data.dims[d], ret.dims[d]) # Verify that the data is what we expect - for v in data: + for v in data.variables: self.assertEqual(data[v].dims, ret[v].dims) self.assertEqual(data[v].attrs, ret[v].attrs) slice_list = [slice(None)] * data[v].values.ndim @@ -1801,7 +1806,8 @@ def test_drop_variables(self): self.assertDatasetIdentical(data, data.drop([])) - expected = Dataset(dict((k, data[k]) for k in data if k != 'time')) + expected = Dataset(dict((k, data[k]) for k in data.variables + if k != 'time')) actual = data.drop('time') self.assertDatasetIdentical(expected, actual) actual = data.drop(['time']) @@ -1848,8 +1854,7 @@ def test_copy(self): for copied in [data.copy(deep=True), deepcopy(data)]: self.assertDatasetIdentical(data, copied) - for k in data: - v0 = data.variables[k] + for k, v0 in data.variables.items(): v1 = copied.variables[k] self.assertIsNot(v0, v1) @@ -2304,30 +2309,30 @@ def test_setitem_align_new_indexes(self): def test_assign(self): ds = Dataset() - actual = ds.assign(x = [0, 1, 2], y = 2) + actual = ds.assign(x=[0, 1, 2], y=2) expected = Dataset({'x': [0, 1, 2], 'y': 2}) self.assertDatasetIdentical(actual, expected) - self.assertEqual(list(actual), ['x', 'y']) + self.assertEqual(list(actual.variables), ['x', 'y']) self.assertDatasetIdentical(ds, Dataset()) - actual = actual.assign(y = lambda ds: ds.x ** 2) + actual = actual.assign(y=lambda ds: ds.x ** 2) expected = Dataset({'y': ('x', [0, 1, 4]), 'x': [0, 1, 2]}) self.assertDatasetIdentical(actual, expected) - actual = actual.assign_coords(z = 2) + actual = actual.assign_coords(z=2) expected = Dataset({'y': ('x', [0, 1, 4])}, {'z': 2, 'x': [0, 1, 2]}) self.assertDatasetIdentical(actual, expected) ds = Dataset({'a': ('x', range(3))}, {'b': ('x', ['A'] * 2 + ['B'])}) - actual = ds.groupby('b').assign(c = lambda ds: 2 * ds.a) + actual = ds.groupby('b').assign(c=lambda ds: 2 * ds.a) expected = ds.merge({'c': ('x', [0, 2, 4])}) self.assertDatasetIdentical(actual, expected) - actual = ds.groupby('b').assign(c = lambda ds: ds.a.sum()) + actual = ds.groupby('b').assign(c=lambda ds: ds.a.sum()) expected = ds.merge({'c': ('x', [1, 1, 2])}) self.assertDatasetIdentical(actual, expected) - actual = ds.groupby('b').assign_coords(c = lambda ds: ds.a.sum()) + actual = ds.groupby('b').assign_coords(c=lambda ds: ds.a.sum()) expected = expected.set_coords('c') self.assertDatasetIdentical(actual, expected) @@ -2385,12 +2390,13 @@ def test_setitem_multiindex_level(self): def test_delitem(self): data = create_test_data() - all_items = set(data) - self.assertItemsEqual(data, all_items) + all_items = set(data.variables) + self.assertItemsEqual(data.variables, all_items) del data['var1'] - self.assertItemsEqual(data, all_items - set(['var1'])) + self.assertItemsEqual(data.variables, all_items - set(['var1'])) del data['numbers'] - self.assertItemsEqual(data, all_items - set(['var1', 'numbers'])) + self.assertItemsEqual(data.variables, + all_items - set(['var1', 'numbers'])) self.assertNotIn('numbers', data.coords) def test_squeeze(self): @@ -3586,12 +3592,12 @@ def test_dataset_transpose(self): ds = create_test_data() actual = ds.transpose() - for k in ds: + for k in ds.variables: self.assertEqual(actual[k].dims[::-1], ds[k].dims) new_order = ('dim2', 'dim3', 'dim1', 'time') actual = ds.transpose(*new_order) - for k in ds: + for k in ds.variables: expected_dims = tuple(d for d in new_order if d in ds[k].dims) self.assertEqual(actual[k].dims, expected_dims)