pydata · shoyer · Oct 27, 2017 · Oct 25, 2017 · Oct 26, 2017
diff --git a/doc/data-structures.rst b/doc/data-structures.rst
@@ -310,18 +310,15 @@ You can also create an dataset from:
 Dataset contents
 ~~~~~~~~~~~~~~~~
 
-:py:class:`~xarray.Dataset` implements the Python dictionary interface, with
+:py:class:`~xarray.Dataset` implements the Python mapping interface, with
 values given by :py:class:`xarray.DataArray` objects:
 
 .. ipython:: python
 
     'temperature' in ds
-
-    ds.keys()
-
     ds['temperature']
 
-The valid keys include each listed coordinate and data variable.
+Valid keys include each listed coordinate and data variable.
 
 Data and coordinate variables are also contained separately in the
 :py:attr:`~xarray.Dataset.data_vars` and :py:attr:`~xarray.Dataset.coords`
@@ -356,6 +353,13 @@ setting) variables and attributes:
 This is particularly useful in an exploratory context, because you can
 tab-complete these variable names with tools like IPython.
 
+.. warning::
+
+  We are changing the behavior of iterating over a Dataset the next major
+  release of xarray, to only include data variables instead of both data
+  variables and coordinates. In the meantime, prefer iterating over
+  ``ds.data_vars`` or ``ds.coords``.
+
 Dictionary like methods
 ~~~~~~~~~~~~~~~~~~~~~~~
 

diff --git a/doc/whats-new.rst b/doc/whats-new.rst
@@ -78,14 +78,23 @@ Breaking changes
   disk when calling ``repr`` (:issue:`1522`).
   By `Guido Imperiale <https://github.com/crusaderky>`_.
 
-- Deprecations:
+- Several existing features have been deprecated and will change to new
+  behavior in xarray v0.11. If you use any of them with xarray v0.10, you
+  should see a ``FutureWarning`` that describes how to update your code:
 
   - ``Dataset.T`` has been deprecated an alias for ``Dataset.transpose()``
-    (:issue:`1232`).
-  - ``key in data_array`` currently checks for membership in
-    ``data_array.coords``. This is now deprecated: in the future, it will check
-    membership in ``data_array.values`` instead.
-
+    (:issue:`1232`). In the next major version of xarray, it will provide short-
+    cut lookup for variables or attributes with name ``'T'``.
+  - ``DataArray.__contains__`` (e.g., ``key in data_array``) currently checks
+    for membership in ``DataArray.coords``. In the next major version of
+    xarray, it will check membership in the array data found in
+    ``DataArray.values`` instead (:issue:`1267`).
+  - Direct iteration over and counting a ``Dataset`` (e.g., ``[k for k in ds]``,
+    ``ds.keys()``, ``ds.values()``, ``len(ds)`` and ``if ds``) currently
+    includes all variables, both data and coordinates. For improved usability
+    and consistency with pandas, in the next major version of xarray these will
+    change to only include data variables (:issue:`884`). Use ``ds.variables``,
+    ``ds.data_vars`` or `ds.coords`` as alternatives.
 
 Backward Incompatible Changes
 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~

diff --git a/xarray/backends/api.py b/xarray/backends/api.py
@@ -86,7 +86,7 @@ def check_name(name):
             raise TypeError('DataArray.name or Dataset key must be either a '
                             'string or None for serialization to netCDF files')
 
-    for k in dataset:
+    for k in dataset.variables:
         check_name(k)
 
 

diff --git a/xarray/core/dataarray.py b/xarray/core/dataarray.py
@@ -1844,7 +1844,7 @@ def dot(self, other):
         new_dims = ([d for d in self.dims if d not in dims] +
                     [d for d in other.dims if d not in dims])
 
-        return type(self)(new_data, new_coords, new_dims)
+        return type(self)(new_data, new_coords.variables, new_dims)
 
     def sortby(self, variables, ascending=True):
         """

diff --git a/xarray/core/dataset.py b/xarray/core/dataset.py
@@ -373,6 +373,9 @@ def _set_init_vars_and_dims(self, data_vars, coords, compat):
             raise ValueError('variables %r are found in both data_vars and '
                              'coords' % both_data_and_coords)
 
+        if isinstance(coords, Dataset):
+            coords = coords.variables
+
         variables, coord_names, dims = merge_data_and_coords(
             data_vars, coords, compat=compat)
 
@@ -725,7 +728,7 @@ def _attr_sources(self):
     @property
     def _item_sources(self):
         """List of places to look-up items for key-completion"""
-        return [self, {d: self[d] for d in self.dims},
+        return [self.data_vars, self.coords, {d: self[d] for d in self.dims},
                 LevelCoordinatesSource(self)]
 
     def __contains__(self, key):
@@ -735,9 +738,31 @@ def __contains__(self, key):
         return key in self._variables
 
     def __len__(self):
+        warnings.warn('calling len() on an xarray.Dataset will change in '
+                      'xarray v0.11 to only include data variables, not '
+                      'coordinates. Call len() on the Dataset.variables '
+                      'property instead, like ``len(ds.variables)``, to '
+                      'preserve existing behavior in a forwards compatible '
+                      'manner.',
+                      FutureWarning, stacklevel=2)
         return len(self._variables)
 
+    def __bool__(self):
+        warnings.warn('casting an xarray.Dataset to a boolean will change in '
+                      'xarray v0.11 to only include data variables, not '
+                      'coordinates. Cast the Dataset.variables property '
+                      'instead to preserve existing behavior in a forwards '
+                      'compatible manner.',
+                      FutureWarning, stacklevel=2)
+        return bool(self._variables)
+
     def __iter__(self):
+        warnings.warn('iteration over an xarray.Dataset will change in xarray '
+                      'v0.11 to only include data variables, not coordinates. '
+                      'Iterate over the Dataset.variables property instead to '
+                      'preserve existing behavior in a forwards compatible '
+                      'manner.',
+                      FutureWarning, stacklevel=2)
         return iter(self._variables)
 
     @property
@@ -2201,8 +2226,7 @@ def transpose(self, *dims):
     @property
     def T(self):
         warnings.warn('xarray.Dataset.T has been deprecated as an alias for '
-                      '`.transpose()`. It will be removed in a future version '
-                      'of xarray.',
+                      '`.transpose()`. It will be removed in xarray v0.11.',
                       FutureWarning, stacklevel=2)
         return self.transpose()
 
@@ -2475,7 +2499,7 @@ def to_array(self, dim='variable', name=None):
         return DataArray(data, coords, dims, attrs=self.attrs, name=name)
 
     def _to_dataframe(self, ordered_dims):
-        columns = [k for k in self if k not in self.dims]
+        columns = [k for k in self.variables if k not in self.dims]
         data = [self._variables[k].set_dims(ordered_dims).values.reshape(-1)
                 for k in columns]
         index = self.coords.to_index(ordered_dims)

diff --git a/xarray/core/formatting.py b/xarray/core/formatting.py
@@ -411,7 +411,7 @@ def array_repr(arr):
 def dataset_repr(ds):
     summary = [u'<xarray.%s>' % type(ds).__name__]
 
-    col_width = _calculate_col_width(_get_col_items(ds))
+    col_width = _calculate_col_width(_get_col_items(ds.variables))
 
     dims_start = pretty_print(u'Dimensions:', col_width)
     summary.append(u'%s(%s)' % (dims_start, dim_summary(ds)))

diff --git a/xarray/testing.py b/xarray/testing.py
@@ -132,7 +132,7 @@ def assert_allclose(a, b, rtol=1e-05, atol=1e-08, decode_bytes=True):
             assert allclose, '{}\n{}'.format(a.coords[v].values,
                                              b.coords[v].values)
     elif isinstance(a, xr.Dataset):
-        assert set(a) == set(b)
+        assert set(a.data_vars) == set(b.data_vars)
         assert set(a.coords) == set(b.coords)
         for k in list(a.variables) + list(a.coords):
             assert_allclose(a[k], b[k], **kwargs)

diff --git a/xarray/tests/test_backends.py b/xarray/tests/test_backends.py
@@ -152,7 +152,7 @@ def test_write_store(self):
                 self.assertDatasetAllClose(expected, actual)
 
     def check_dtypes_roundtripped(self, expected, actual):
-        for k in expected:
+        for k in expected.variables:
             expected_dtype = expected.variables[k].dtype
             if (isinstance(self, Only32BitTypes) and
                     expected_dtype == 'int64'):
@@ -832,7 +832,7 @@ def test_variable_order(self):
         ds.coords['c'] = 4
 
         with self.roundtrip(ds) as actual:
-            self.assertEqual(list(ds), list(actual))
+            self.assertEqual(list(ds.variables), list(actual.variables))
 
     def test_unsorted_index_raises(self):
         # should be fixed in netcdf4 v1.2.1
@@ -976,7 +976,7 @@ def test_roundtrip_example_1_netcdf_gz(self):
     def test_netcdf3_endianness(self):
         # regression test for GH416
         expected = open_example_dataset('bears.nc', engine='scipy')
-        for var in expected.values():
+        for var in expected.variables.values():
             self.assertTrue(var.dtype.isnative)
 
     @requires_netCDF4
@@ -1097,11 +1097,12 @@ def test_cross_engine_read_write_netcdf3(self):
                         with open_dataset(tmp_file,
                                           engine=read_engine) as actual:
                             # hack to allow test to work:
-                            # coord comes back as DataArray rather than coord, and so
-                            # need to loop through here rather than in the test
-                            # function (or we get recursion)
-                            [assert_allclose(data[k].variable, actual[k].variable)
-                             for k in data]
+                            # coord comes back as DataArray rather than coord,
+                            # and so need to loop through here rather than in
+                            # the test function (or we get recursion)
+                            [assert_allclose(data[k].variable,
+                                             actual[k].variable)
+                             for k in data.variables]
 
     def test_encoding_unlimited_dims(self):
         ds = Dataset({'x': ('y', np.arange(10.0))})

diff --git a/xarray/tests/test_combine.py b/xarray/tests/test_combine.py
@@ -19,7 +19,7 @@ def test_concat(self):
 
         # drop the third dimension to keep things relatively understandable
         data = create_test_data()
-        for k in list(data):
+        for k in list(data.variables):
             if 'dim3' in data[k].dims:
                 del data[k]
 

diff --git a/xarray/tests/test_dataset.py b/xarray/tests/test_dataset.py
@@ -51,7 +51,7 @@ def create_test_data(seed=None):
     obj.coords['numbers'] = ('dim3', np.array([0, 1, 2, 0, 0, 1, 1, 2, 2, 3],
                                               dtype='int64'))
     obj.encoding = {'foo': 'bar'}
-    assert all(obj.data.flags.writeable for obj in obj.values())
+    assert all(obj.data.flags.writeable for obj in obj.variables.values())
     return obj
 
 
@@ -410,11 +410,16 @@ def test_properties(self):
         self.assertIsInstance(ds.dims.mapping, utils.SortedKeysDict)
         self.assertIs(type(ds.dims.mapping.mapping), dict)
 
-        self.assertItemsEqual(ds, list(ds.variables))
-        self.assertItemsEqual(ds.keys(), list(ds.variables))
+        with pytest.warns(FutureWarning):
+            self.assertItemsEqual(ds, list(ds.variables))
+        with pytest.warns(FutureWarning):
+            self.assertItemsEqual(ds.keys(), list(ds.variables))
         self.assertNotIn('aasldfjalskdfj', ds.variables)
         self.assertIn('dim1', repr(ds.variables))
-        self.assertEqual(len(ds), 7)
+        with pytest.warns(FutureWarning):
+            self.assertEqual(len(ds), 7)
+        with pytest.warns(FutureWarning):
+            self.assertEqual(bool(ds), True)
 
         self.assertItemsEqual(ds.data_vars, ['var1', 'var2', 'var3'])
         self.assertItemsEqual(ds.data_vars.keys(), ['var1', 'var2', 'var3'])
@@ -470,7 +475,7 @@ def test_variable(self):
         self.assertTrue('foo' in a)
         a['bar'] = (('time', 'x',), d)
         # order of creation is preserved
-        self.assertEqual(list(a), ['foo', 'bar'])
+        self.assertEqual(list(a.variables), ['foo', 'bar'])
         self.assertArrayEqual(a['foo'].values, d)
         # try to add variable with dim (10,3) with data that's (3,10)
         with self.assertRaises(ValueError):
@@ -819,7 +824,7 @@ def test_isel(self):
             else:
                 self.assertEqual(data.dims[d], ret.dims[d])
         # Verify that the data is what we expect
-        for v in data:
+        for v in data.variables:
             self.assertEqual(data[v].dims, ret[v].dims)
             self.assertEqual(data[v].attrs, ret[v].attrs)
             slice_list = [slice(None)] * data[v].values.ndim
@@ -1801,7 +1806,8 @@ def test_drop_variables(self):
 
         self.assertDatasetIdentical(data, data.drop([]))
 
-        expected = Dataset(dict((k, data[k]) for k in data if k != 'time'))
+        expected = Dataset(dict((k, data[k]) for k in data.variables
+                                if k != 'time'))
         actual = data.drop('time')
         self.assertDatasetIdentical(expected, actual)
         actual = data.drop(['time'])
@@ -1848,8 +1854,7 @@ def test_copy(self):
 
         for copied in [data.copy(deep=True), deepcopy(data)]:
             self.assertDatasetIdentical(data, copied)
-            for k in data:
-                v0 = data.variables[k]
+            for k, v0 in data.variables.items():
                 v1 = copied.variables[k]
                 self.assertIsNot(v0, v1)
 
@@ -2304,30 +2309,30 @@ def test_setitem_align_new_indexes(self):
 
     def test_assign(self):
         ds = Dataset()
-        actual = ds.assign(x = [0, 1, 2], y = 2)
+        actual = ds.assign(x=[0, 1, 2], y=2)
         expected = Dataset({'x': [0, 1, 2], 'y': 2})
         self.assertDatasetIdentical(actual, expected)
-        self.assertEqual(list(actual), ['x', 'y'])
+        self.assertEqual(list(actual.variables), ['x', 'y'])
         self.assertDatasetIdentical(ds, Dataset())
 
-        actual = actual.assign(y = lambda ds: ds.x ** 2)
+        actual = actual.assign(y=lambda ds: ds.x ** 2)
         expected = Dataset({'y': ('x', [0, 1, 4]), 'x': [0, 1, 2]})
         self.assertDatasetIdentical(actual, expected)
 
-        actual = actual.assign_coords(z = 2)
+        actual = actual.assign_coords(z=2)
         expected = Dataset({'y': ('x', [0, 1, 4])}, {'z': 2, 'x': [0, 1, 2]})
         self.assertDatasetIdentical(actual, expected)
 
         ds = Dataset({'a': ('x', range(3))}, {'b': ('x', ['A'] * 2 + ['B'])})
-        actual = ds.groupby('b').assign(c = lambda ds: 2 * ds.a)
+        actual = ds.groupby('b').assign(c=lambda ds: 2 * ds.a)
         expected = ds.merge({'c': ('x', [0, 2, 4])})
         self.assertDatasetIdentical(actual, expected)
 
-        actual = ds.groupby('b').assign(c = lambda ds: ds.a.sum())
+        actual = ds.groupby('b').assign(c=lambda ds: ds.a.sum())
         expected = ds.merge({'c': ('x', [1, 1, 2])})
         self.assertDatasetIdentical(actual, expected)
 
-        actual = ds.groupby('b').assign_coords(c = lambda ds: ds.a.sum())
+        actual = ds.groupby('b').assign_coords(c=lambda ds: ds.a.sum())
         expected = expected.set_coords('c')
         self.assertDatasetIdentical(actual, expected)
 
@@ -2385,12 +2390,13 @@ def test_setitem_multiindex_level(self):
 
     def test_delitem(self):
         data = create_test_data()
-        all_items = set(data)
-        self.assertItemsEqual(data, all_items)
+        all_items = set(data.variables)
+        self.assertItemsEqual(data.variables, all_items)
         del data['var1']
-        self.assertItemsEqual(data, all_items - set(['var1']))
+        self.assertItemsEqual(data.variables, all_items - set(['var1']))
         del data['numbers']
-        self.assertItemsEqual(data, all_items - set(['var1', 'numbers']))
+        self.assertItemsEqual(data.variables,
+                              all_items - set(['var1', 'numbers']))
         self.assertNotIn('numbers', data.coords)
 
     def test_squeeze(self):
@@ -3586,12 +3592,12 @@ def test_dataset_transpose(self):
 
         ds = create_test_data()
         actual = ds.transpose()
-        for k in ds:
+        for k in ds.variables:
             self.assertEqual(actual[k].dims[::-1], ds[k].dims)
 
         new_order = ('dim2', 'dim3', 'dim1', 'time')
         actual = ds.transpose(*new_order)
-        for k in ds:
+        for k in ds.variables:
             expected_dims = tuple(d for d in new_order if d in ds[k].dims)
             self.assertEqual(actual[k].dims, expected_dims)