pydata · shoyer · Sep 5, 2014 · Sep 5, 2014 · Sep 5, 2014 · Sep 5, 2014
diff --git a/doc/whats-new.rst b/doc/whats-new.rst
@@ -10,13 +10,25 @@ intended to allow for keeping track of arrays of metadata that describe the
 grid on which the points in "variable" arrays lie. They are preserved (when
 unambiguous) even though mathematical operations.
 
-- ``Dataset.select_vars`` deprecated: index a ``Dataset`` with a list of variables
-  instead.
-- ``DataArray.select_vars`` and ``DataArray.drop_vars`` deprecated: use
-  :py:meth:`~xray.DataArray.reset_coords` instead.
+Backwards incompatible changes
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+
+- The items in a ``Dataset`` for the purposes of iteration (`for v in ds`,
+  `ds.keys()` and `ds.items()`) and contents checks (`k in ds`) are now only
+  only the *variables*, formerly called *non-coordinates*. Correspondingly, the
+  ``Dataset.noncoords`` property has been deprecated (you can just use the
+  ``Dataset`` object itself).
 - ``Dataset.__eq__`` and ``Dataset.__ne__`` now are now undefined, because in
   a future version of xray we intend to make the operations element-wise.
 
+Deprecations
+~~~~~~~~~~~~
+
+- ``Dataset.select_vars`` deprecated: index a ``Dataset`` with a list of
+  variable names instead.
+- ``DataArray.select_vars`` and ``DataArray.drop_vars`` deprecated: use
+  :py:meth:`~xray.DataArray.reset_coords` instead.
+
 v0.2.0 (14 August 2014)
 -----------------------
 

diff --git a/xray/core/dataset.py b/xray/core/dataset.py
@@ -326,7 +326,7 @@ def _update_vars_and_coords(self, new_variables, new_coord_names={},
         variables = self._variables.copy() if needs_copy else self._variables
 
         if check_coord_names:
-            _assert_empty([k for k in self.noncoords if k in new_coord_names],
+            _assert_empty([k for k in self if k in new_coord_names],
                           'coordinates with these names already exist as '
                           'variables: %s')
 
@@ -514,13 +514,13 @@ def __contains__(self, key):
         """The 'in' operator will return true or false depending on whether
         'key' is a variable in the dataset or not.
         """
-        return key in self.variables
+        return key in self._variables and not key in self._coord_names
 
     def __len__(self):
-        return len(self.variables)
+        return len(self._variables) - len(self._coord_names)
 
     def __iter__(self):
-        return iter(self.variables)
+        return (k for k in self._variables if k not in self._coord_names)
 
     @property
     def virtual_variables(self):
@@ -651,15 +651,19 @@ def coordinates(self):
     def noncoords(self):
         """Dictionary of DataArrays whose names do not match dimensions.
         """
-        return FrozenOrderedDict((name, self[name]) for name in self
-                                 if name not in self.coords)
+        warnings.warn('the Dataset property `noncoords` has been deprecated; '
+                      'just use the Dataset object directly',
+                      FutureWarning, stacklevel=2)
+        return self
 
     @property
     def noncoordinates(self):
         """Dictionary of DataArrays whose names do not match dimensions.
         """
-        utils.alias_warning('noncoordinates', 'noncoords')
-        return self.noncoords
+        warnings.warn('the Dataset property `noncoordinates` has been '
+                      'deprecated; just use the Dataset object directly',
+                      FutureWarning, stacklevel=2)
+        return self
 
     def set_coords(self, names, inplace=False):
         """Given names of one or more variables, set them as coordinates
@@ -1229,7 +1233,7 @@ def apply(self, func, keep_attrs=False, **kwargs):
             noncoordinate are dropped.
         """
         variables = OrderedDict((k, func(v, **kwargs))
-                                for k, v in iteritems(self.noncoords))
+                                for k, v in iteritems(self))
         attrs = self.attrs if keep_attrs else {}
         return type(self)(variables, attrs=attrs)
 
@@ -1279,7 +1283,7 @@ def differs(vname, v):
         else:
             raise ValueError("Unexpected value for mode: %s" % mode)
 
-        if any(v not in datasets[0] for v in concat_over):
+        if any(v not in datasets[0]._variables for v in concat_over):
             raise ValueError('not all elements in concat_over %r found '
                              'in the first dataset %r'
                              % (concat_over, datasets[0]))
@@ -1305,9 +1309,9 @@ def differs(vname, v):
                     and not utils.dict_equiv(ds.attrs, concatenated.attrs)):
                 raise ValueError('dataset global attributes not equal')
             for k, v in iteritems(ds._variables):
-                if k not in concatenated and k not in concat_over:
+                if k not in concatenated._variables and k not in concat_over:
                     raise ValueError('encountered unexpected variable %r' % k)
-                elif (k in concatenated and k != dim_name and
+                elif (k in concatenated._variables and k != dim_name and
                           not getattr(v, compat)(concatenated[k])):
                     verb = 'equal' if compat == 'equals' else compat
                     raise ValueError(

diff --git a/xray/core/formatting.py b/xray/core/formatting.py
@@ -105,7 +105,7 @@ def _summarize_variables(variables, first_col_width, always_show_values):
     return ([summarize_var(v.name, v, first_col_width,
                            show_values=(always_show_values or _not_remote(v)))
              for v in itervalues(variables)]
-            or ['    Empty'])
+            or ['    *empty*'])
 
 
 def _summarize_coordinates(coords, first_col_width,
@@ -131,12 +131,12 @@ def coords_repr(coords):
     return '\n'.join(summary)
 
 
-def _summarize_attributes(data, indent='    '):
-    if data.attrs:
+def _summarize_attributes(attrs, indent='    '):
+    if attrs:
         attr_summaries = ['%s%s: %s' % (indent, k, v) for k, v
-                          in iteritems(data.attrs)]
+                          in iteritems(attrs)]
     else:
-        attr_summaries = [indent + 'Empty']
+        attr_summaries = [indent + '*empty*']
     return attr_summaries
 
 
@@ -160,8 +160,9 @@ def array_repr(arr):
         if arr.coords:
             summary.append(repr(arr.coords))
 
-    summary.append('Attributes:')
-    summary.extend(_summarize_attributes(arr))
+    if arr.attrs:
+        summary.append('Attributes:')
+        summary.extend(_summarize_attributes(arr.attrs))
 
     return '\n'.join(summary)
 
@@ -191,11 +192,12 @@ def dataset_repr(ds, preview_all_values=False):
     summary.extend(_summarize_coordinates(ds.coords, first_col_width,
                                           preview_all_values))
 
-    summary.append('Noncoordinates:')
-    summary.extend(_summarize_variables(ds.noncoords, first_col_width,
+    summary.append('Variables:')
+    summary.extend(_summarize_variables(ds, first_col_width,
                                         always_show_values=preview_all_values))
 
-    summary.append('Attributes:')
-    summary.extend(_summarize_attributes(ds))
+    if ds.attrs:
+        summary.append('Attributes:')
+        summary.extend(_summarize_attributes(ds.attrs))
 
     return '\n'.join(summary)
diff --git a/xray/test/__init__.py b/xray/test/__init__.py
@@ -99,8 +99,8 @@ def assertDatasetIdentical(self, d1, d2):
         # this method is functionally equivalent to `assert d1.identical(d2)`,
         # but it checks each aspect of equality separately for easier debugging
         assert utils.dict_equiv(d1.attrs, d2.attrs), (d1.attrs, d2.attrs)
-        self.assertEqual(sorted(d1.noncoords, key=str),
-                         sorted(d2.noncoords, key=str))
+        self.assertEqual(sorted(d1, key=str),
+                         sorted(d2, key=str))
         self.assertEqual(sorted(d1.coords, key=str),
                          sorted(d2.coords, key=str))
         for k in d1:

diff --git a/xray/test/test_backends.py b/xray/test/test_backends.py
@@ -382,10 +382,9 @@ def test_roundtrip_character_array(self):
             expected = Dataset({'x': ('x', values)})
             with open_dataset(tmp_file) as actual:
                 self.assertDatasetIdentical(expected, actual)
-
-            # regression test for #157
-            with self.roundtrip(actual) as roundtripped:
-                self.assertDatasetIdentical(expected, roundtripped)
+                # regression test for #157
+                with self.roundtrip(actual) as roundtripped:
+                    self.assertDatasetIdentical(expected, roundtripped)
 
     def test_default_to_char_arrays(self):
         data = Dataset({'x': np.array(['foo', 'zzzz'], dtype='S')})

diff --git a/xray/test/test_dataset.py b/xray/test/test_dataset.py
@@ -15,17 +15,13 @@
 from . import TestCase, unittest
 
 
-_dims = {'dim1': 8, 'dim2': 9, 'dim3': 10}
-_vars = {'var1': ['dim1', 'dim2'],
-         'var2': ['dim1', 'dim2'],
-         'var3': ['dim3', 'dim1'],
-         }
-_testvar = sorted(_vars.keys())[0]
-_testdim = sorted(_dims.keys())[0]
-
-
 def create_test_data(seed=None):
     rs = np.random.RandomState(seed)
+    _vars = {'var1': ['dim1', 'dim2'],
+             'var2': ['dim1', 'dim2'],
+             'var3': ['dim3', 'dim1']}
+    _dims = {'dim1': 8, 'dim2': 9, 'dim3': 10}
+
     obj = Dataset()
     obj['time'] = ('time', pd.date_range('2000-01-01', periods=20))
     obj['dim1'] = ('dim1', np.arange(_dims['dim1']))
@@ -72,6 +68,7 @@ def store_variables(self):
 class TestDataset(TestCase):
     def test_repr(self):
         data = create_test_data(seed=123)
+        data.attrs['foo'] = 'bar'
         # need to insert str dtype at runtime to handle both Python 2 & 3
         expected = dedent("""\
         <xray.Dataset>
@@ -83,25 +80,22 @@ def test_repr(self):
             time     (time) datetime64[ns] 2000-01-01 2000-01-02 2000-01-03 2000-01-04 ...
         Other Coordinates:
             numbers  (dim3) int64 0 1 2 0 0 1 1 2 2 3
-        Noncoordinates:
+        Variables:
             var1     (dim1, dim2) float64 -1.086 0.9973 0.283 -1.506 -0.5786 1.651 -2.427 -0.4289 ...
             var2     (dim1, dim2) float64 1.162 -1.097 -2.123 1.04 -0.4034 -0.126 -0.8375 -1.606 ...
             var3     (dim3, dim1) float64 0.5565 -0.2121 0.4563 1.545 -0.2397 0.1433 0.2538 ...
         Attributes:
-            Empty""") % data['dim3'].dtype
+            foo: bar""") % data['dim3'].dtype
         actual = '\n'.join(x.rstrip() for x in repr(data).split('\n'))
         print(actual)
-        self.assertEqual(expected, actual)
 
         expected = dedent("""\
         <xray.Dataset>
         Dimensions:  ()
         Index Coordinates:
-            Empty
-        Noncoordinates:
-            Empty
-        Attributes:
-            Empty""")
+            *empty*
+        Variables:
+            *empty*""")
         actual = '\n'.join(x.rstrip() for x in repr(Dataset()).split('\n'))
         print(actual)
         self.assertEqual(expected, actual)
@@ -112,11 +106,9 @@ def test_repr(self):
         <xray.Dataset>
         Dimensions:  ()
         Index Coordinates:
-            Empty
-        Noncoordinates:
-            foo      float64 1.0
-        Attributes:
-            Empty""")
+            *empty*
+        Variables:
+            foo      float64 1.0""")
         actual = '\n'.join(x.rstrip() for x in repr(data).split('\n'))
         print(actual)
         self.assertEqual(expected, actual)
@@ -164,9 +156,34 @@ def test_constructor_with_coords(self):
             Dataset({'a': ('x', [1])}, {'a': ('x', [1])})
 
         ds = Dataset({}, {'a': ('x', [1])})
-        self.assertFalse(ds.noncoords)
+        self.assertFalse(ds)
         self.assertItemsEqual(ds.coords.keys(), ['x', 'a'])
 
+    def test_properties(self):
+        ds = create_test_data()
+        self.assertEqual(ds.dims,
+                         {'dim1': 8, 'dim2': 9, 'dim3': 10, 'time': 20})
+
+        self.assertItemsEqual(ds, ['var1', 'var2', 'var3'])
+        self.assertItemsEqual(ds.keys(), ['var1', 'var2', 'var3'])
+        self.assertIn('var1', ds)
+        self.assertNotIn('dim1', ds)
+        self.assertNotIn('numbers', ds)
+        self.assertEqual(len(ds), 3)
+
+        self.assertItemsEqual(ds.indexes, ['dim1', 'dim2', 'dim3', 'time'])
+        self.assertEqual(len(ds.indexes), 4)
+
+        self.assertItemsEqual(ds.nonindexes, ['var1', 'var2', 'var3', 'numbers'])
+        self.assertEqual(len(ds.nonindexes), 4)
+
+        self.assertItemsEqual(ds.coords,
+                              ['time', 'dim1', 'dim2', 'dim3', 'numbers'])
+        self.assertIn('dim1', ds.coords)
+        self.assertIn('numbers', ds.coords)
+        self.assertNotIn('var1', ds.coords)
+        self.assertEqual(len(ds.coords), 5)
+
     def test_variable(self):
         a = Dataset()
         d = np.random.random((10, 3))
@@ -182,7 +199,7 @@ def test_variable(self):
         with self.assertRaises(ValueError):
             a['qux'] = (('time', 'x'), d.T)
 
-    def test_coords_create(self):
+    def test_modify_inplace(self):
         a = Dataset()
         vec = np.random.random((10,))
         attributes = {'foo': 'bar'}
@@ -412,19 +429,19 @@ def test_isel(self):
 
         ret = data.isel(dim1=0)
         self.assertEqual({'time': 20, 'dim2': 9, 'dim3': 10}, ret.dims)
-        self.assertItemsEqual(data.noncoords, ret.noncoords)
+        self.assertItemsEqual(data, ret)
         self.assertItemsEqual(data.coords, ret.coords)
         self.assertItemsEqual(data.indexes, list(ret.indexes) + ['dim1'])
 
         ret = data.isel(time=slice(2), dim1=0, dim2=slice(5))
         self.assertEqual({'time': 2, 'dim2': 5, 'dim3': 10}, ret.dims)
-        self.assertItemsEqual(data.noncoords, ret.noncoords)
+        self.assertItemsEqual(data, ret)
         self.assertItemsEqual(data.coords, ret.coords)
         self.assertItemsEqual(data.indexes, list(ret.indexes) + ['dim1'])
 
         ret = data.isel(time=0, dim1=0, dim2=slice(5))
         self.assertItemsEqual({'dim2': 5, 'dim3': 10}, ret.dims)
-        self.assertItemsEqual(data.noncoords, ret.noncoords)
+        self.assertItemsEqual(data, ret)
         self.assertItemsEqual(data.coords, ret.coords)
         self.assertItemsEqual(data.indexes,
                               list(ret.indexes) + ['dim1', 'time'])
@@ -738,11 +755,12 @@ def test_setitem(self):
     def test_delitem(self):
         data = create_test_data()
         all_items = set(data.variables)
-        self.assertItemsEqual(data, all_items)
+        self.assertItemsEqual(data.variables, all_items)
         del data['var1']
-        self.assertItemsEqual(data, all_items - set(['var1']))
+        self.assertItemsEqual(data.variables, all_items - set(['var1']))
         del data['dim1']
-        self.assertItemsEqual(data, set(['time', 'dim2', 'dim3', 'numbers']))
+        self.assertItemsEqual(data.variables,
+                              set(['time', 'dim2', 'dim3', 'numbers']))
         self.assertNotIn('dim1', data.dims)
         self.assertNotIn('dim1', data.coords)
 
@@ -840,7 +858,7 @@ def rectify_dim_order(dataset):
             # return a new dataset with all variable dimensions tranposed into
             # the order in which they are found in `data`
             return Dataset(dict((k, v.transpose(*data[k].dims))
-                                for k, v in iteritems(dataset.noncoords)),
+                                for k, v in iteritems(dataset)),
                            dataset.coords, attrs=dataset.attrs)
 
         for dim in ['dim1', 'dim2', 'dim3']:
@@ -985,7 +1003,7 @@ def test_reduce(self):
 
         actual = data.max()
         expected = Dataset(dict((k, v.max())
-                                for k, v in iteritems(data.noncoords)))
+                                for k, v in iteritems(data)))
         self.assertDatasetEqual(expected, actual)
 
         self.assertDatasetEqual(data.min(dim=['dim1']),
@@ -1011,7 +1029,7 @@ def test_reduce_non_numeric(self):
         data2 = create_test_data(seed=44)
         add_vars = {'var4': ['dim1', 'dim2']}
         for v, dims in sorted(add_vars.items()):
-            size = tuple(_dims[d] for d in dims)
+            size = tuple(data1.dims[d] for d in dims)
             data = np.random.random_integers(0, 100, size=size).astype(np.str_)
             data1[v] = (dims, data, {'foo': 'variable'})