Skip to content

Commit 3e732a6

Browse files
committed
Merge pull request #224 from shoyer/non-coords-as-variables
progress towards removing "non-coordinates" as a concept
2 parents 4c55ac3 + 90e862c commit 3e732a6

File tree

6 files changed

+101
-66
lines changed

6 files changed

+101
-66
lines changed

doc/whats-new.rst

Lines changed: 16 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -10,13 +10,25 @@ intended to allow for keeping track of arrays of metadata that describe the
1010
grid on which the points in "variable" arrays lie. They are preserved (when
1111
unambiguous) even though mathematical operations.
1212

13-
- ``Dataset.select_vars`` deprecated: index a ``Dataset`` with a list of variables
14-
instead.
15-
- ``DataArray.select_vars`` and ``DataArray.drop_vars`` deprecated: use
16-
:py:meth:`~xray.DataArray.reset_coords` instead.
13+
Backwards incompatible changes
14+
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
15+
16+
- The items in a ``Dataset`` for the purposes of iteration (`for v in ds`,
17+
`ds.keys()` and `ds.items()`) and contents checks (`k in ds`) are now only
18+
only the *variables*, formerly called *non-coordinates*. Correspondingly, the
19+
``Dataset.noncoords`` property has been deprecated (you can just use the
20+
``Dataset`` object itself).
1721
- ``Dataset.__eq__`` and ``Dataset.__ne__`` now are now undefined, because in
1822
a future version of xray we intend to make the operations element-wise.
1923

24+
Deprecations
25+
~~~~~~~~~~~~
26+
27+
- ``Dataset.select_vars`` deprecated: index a ``Dataset`` with a list of
28+
variable names instead.
29+
- ``DataArray.select_vars`` and ``DataArray.drop_vars`` deprecated: use
30+
:py:meth:`~xray.DataArray.reset_coords` instead.
31+
2032
v0.2.0 (14 August 2014)
2133
-----------------------
2234

xray/core/dataset.py

Lines changed: 16 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -326,7 +326,7 @@ def _update_vars_and_coords(self, new_variables, new_coord_names={},
326326
variables = self._variables.copy() if needs_copy else self._variables
327327

328328
if check_coord_names:
329-
_assert_empty([k for k in self.noncoords if k in new_coord_names],
329+
_assert_empty([k for k in self if k in new_coord_names],
330330
'coordinates with these names already exist as '
331331
'variables: %s')
332332

@@ -514,13 +514,13 @@ def __contains__(self, key):
514514
"""The 'in' operator will return true or false depending on whether
515515
'key' is a variable in the dataset or not.
516516
"""
517-
return key in self.variables
517+
return key in self._variables and not key in self._coord_names
518518

519519
def __len__(self):
520-
return len(self.variables)
520+
return len(self._variables) - len(self._coord_names)
521521

522522
def __iter__(self):
523-
return iter(self.variables)
523+
return (k for k in self._variables if k not in self._coord_names)
524524

525525
@property
526526
def virtual_variables(self):
@@ -651,15 +651,19 @@ def coordinates(self):
651651
def noncoords(self):
652652
"""Dictionary of DataArrays whose names do not match dimensions.
653653
"""
654-
return FrozenOrderedDict((name, self[name]) for name in self
655-
if name not in self.coords)
654+
warnings.warn('the Dataset property `noncoords` has been deprecated; '
655+
'just use the Dataset object directly',
656+
FutureWarning, stacklevel=2)
657+
return self
656658

657659
@property
658660
def noncoordinates(self):
659661
"""Dictionary of DataArrays whose names do not match dimensions.
660662
"""
661-
utils.alias_warning('noncoordinates', 'noncoords')
662-
return self.noncoords
663+
warnings.warn('the Dataset property `noncoordinates` has been '
664+
'deprecated; just use the Dataset object directly',
665+
FutureWarning, stacklevel=2)
666+
return self
663667

664668
def set_coords(self, names, inplace=False):
665669
"""Given names of one or more variables, set them as coordinates
@@ -1229,7 +1233,7 @@ def apply(self, func, keep_attrs=False, **kwargs):
12291233
noncoordinate are dropped.
12301234
"""
12311235
variables = OrderedDict((k, func(v, **kwargs))
1232-
for k, v in iteritems(self.noncoords))
1236+
for k, v in iteritems(self))
12331237
attrs = self.attrs if keep_attrs else {}
12341238
return type(self)(variables, attrs=attrs)
12351239

@@ -1279,7 +1283,7 @@ def differs(vname, v):
12791283
else:
12801284
raise ValueError("Unexpected value for mode: %s" % mode)
12811285

1282-
if any(v not in datasets[0] for v in concat_over):
1286+
if any(v not in datasets[0]._variables for v in concat_over):
12831287
raise ValueError('not all elements in concat_over %r found '
12841288
'in the first dataset %r'
12851289
% (concat_over, datasets[0]))
@@ -1305,9 +1309,9 @@ def differs(vname, v):
13051309
and not utils.dict_equiv(ds.attrs, concatenated.attrs)):
13061310
raise ValueError('dataset global attributes not equal')
13071311
for k, v in iteritems(ds._variables):
1308-
if k not in concatenated and k not in concat_over:
1312+
if k not in concatenated._variables and k not in concat_over:
13091313
raise ValueError('encountered unexpected variable %r' % k)
1310-
elif (k in concatenated and k != dim_name and
1314+
elif (k in concatenated._variables and k != dim_name and
13111315
not getattr(v, compat)(concatenated[k])):
13121316
verb = 'equal' if compat == 'equals' else compat
13131317
raise ValueError(

xray/core/formatting.py

Lines changed: 13 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -105,7 +105,7 @@ def _summarize_variables(variables, first_col_width, always_show_values):
105105
return ([summarize_var(v.name, v, first_col_width,
106106
show_values=(always_show_values or _not_remote(v)))
107107
for v in itervalues(variables)]
108-
or [' Empty'])
108+
or [' *empty*'])
109109

110110

111111
def _summarize_coordinates(coords, first_col_width,
@@ -131,12 +131,12 @@ def coords_repr(coords):
131131
return '\n'.join(summary)
132132

133133

134-
def _summarize_attributes(data, indent=' '):
135-
if data.attrs:
134+
def _summarize_attributes(attrs, indent=' '):
135+
if attrs:
136136
attr_summaries = ['%s%s: %s' % (indent, k, v) for k, v
137-
in iteritems(data.attrs)]
137+
in iteritems(attrs)]
138138
else:
139-
attr_summaries = [indent + 'Empty']
139+
attr_summaries = [indent + '*empty*']
140140
return attr_summaries
141141

142142

@@ -160,8 +160,9 @@ def array_repr(arr):
160160
if arr.coords:
161161
summary.append(repr(arr.coords))
162162

163-
summary.append('Attributes:')
164-
summary.extend(_summarize_attributes(arr))
163+
if arr.attrs:
164+
summary.append('Attributes:')
165+
summary.extend(_summarize_attributes(arr.attrs))
165166

166167
return '\n'.join(summary)
167168

@@ -191,11 +192,12 @@ def dataset_repr(ds, preview_all_values=False):
191192
summary.extend(_summarize_coordinates(ds.coords, first_col_width,
192193
preview_all_values))
193194

194-
summary.append('Noncoordinates:')
195-
summary.extend(_summarize_variables(ds.noncoords, first_col_width,
195+
summary.append('Variables:')
196+
summary.extend(_summarize_variables(ds, first_col_width,
196197
always_show_values=preview_all_values))
197198

198-
summary.append('Attributes:')
199-
summary.extend(_summarize_attributes(ds))
199+
if ds.attrs:
200+
summary.append('Attributes:')
201+
summary.extend(_summarize_attributes(ds.attrs))
200202

201203
return '\n'.join(summary)

xray/test/__init__.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -99,8 +99,8 @@ def assertDatasetIdentical(self, d1, d2):
9999
# this method is functionally equivalent to `assert d1.identical(d2)`,
100100
# but it checks each aspect of equality separately for easier debugging
101101
assert utils.dict_equiv(d1.attrs, d2.attrs), (d1.attrs, d2.attrs)
102-
self.assertEqual(sorted(d1.noncoords, key=str),
103-
sorted(d2.noncoords, key=str))
102+
self.assertEqual(sorted(d1, key=str),
103+
sorted(d2, key=str))
104104
self.assertEqual(sorted(d1.coords, key=str),
105105
sorted(d2.coords, key=str))
106106
for k in d1:

xray/test/test_backends.py

Lines changed: 3 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -382,10 +382,9 @@ def test_roundtrip_character_array(self):
382382
expected = Dataset({'x': ('x', values)})
383383
with open_dataset(tmp_file) as actual:
384384
self.assertDatasetIdentical(expected, actual)
385-
386-
# regression test for #157
387-
with self.roundtrip(actual) as roundtripped:
388-
self.assertDatasetIdentical(expected, roundtripped)
385+
# regression test for #157
386+
with self.roundtrip(actual) as roundtripped:
387+
self.assertDatasetIdentical(expected, roundtripped)
389388

390389
def test_default_to_char_arrays(self):
391390
data = Dataset({'x': np.array(['foo', 'zzzz'], dtype='S')})

xray/test/test_dataset.py

Lines changed: 51 additions & 33 deletions
Original file line numberDiff line numberDiff line change
@@ -15,17 +15,13 @@
1515
from . import TestCase, unittest
1616

1717

18-
_dims = {'dim1': 8, 'dim2': 9, 'dim3': 10}
19-
_vars = {'var1': ['dim1', 'dim2'],
20-
'var2': ['dim1', 'dim2'],
21-
'var3': ['dim3', 'dim1'],
22-
}
23-
_testvar = sorted(_vars.keys())[0]
24-
_testdim = sorted(_dims.keys())[0]
25-
26-
2718
def create_test_data(seed=None):
2819
rs = np.random.RandomState(seed)
20+
_vars = {'var1': ['dim1', 'dim2'],
21+
'var2': ['dim1', 'dim2'],
22+
'var3': ['dim3', 'dim1']}
23+
_dims = {'dim1': 8, 'dim2': 9, 'dim3': 10}
24+
2925
obj = Dataset()
3026
obj['time'] = ('time', pd.date_range('2000-01-01', periods=20))
3127
obj['dim1'] = ('dim1', np.arange(_dims['dim1']))
@@ -72,6 +68,7 @@ def store_variables(self):
7268
class TestDataset(TestCase):
7369
def test_repr(self):
7470
data = create_test_data(seed=123)
71+
data.attrs['foo'] = 'bar'
7572
# need to insert str dtype at runtime to handle both Python 2 & 3
7673
expected = dedent("""\
7774
<xray.Dataset>
@@ -83,25 +80,22 @@ def test_repr(self):
8380
time (time) datetime64[ns] 2000-01-01 2000-01-02 2000-01-03 2000-01-04 ...
8481
Other Coordinates:
8582
numbers (dim3) int64 0 1 2 0 0 1 1 2 2 3
86-
Noncoordinates:
83+
Variables:
8784
var1 (dim1, dim2) float64 -1.086 0.9973 0.283 -1.506 -0.5786 1.651 -2.427 -0.4289 ...
8885
var2 (dim1, dim2) float64 1.162 -1.097 -2.123 1.04 -0.4034 -0.126 -0.8375 -1.606 ...
8986
var3 (dim3, dim1) float64 0.5565 -0.2121 0.4563 1.545 -0.2397 0.1433 0.2538 ...
9087
Attributes:
91-
Empty""") % data['dim3'].dtype
88+
foo: bar""") % data['dim3'].dtype
9289
actual = '\n'.join(x.rstrip() for x in repr(data).split('\n'))
9390
print(actual)
94-
self.assertEqual(expected, actual)
9591

9692
expected = dedent("""\
9793
<xray.Dataset>
9894
Dimensions: ()
9995
Index Coordinates:
100-
Empty
101-
Noncoordinates:
102-
Empty
103-
Attributes:
104-
Empty""")
96+
*empty*
97+
Variables:
98+
*empty*""")
10599
actual = '\n'.join(x.rstrip() for x in repr(Dataset()).split('\n'))
106100
print(actual)
107101
self.assertEqual(expected, actual)
@@ -112,11 +106,9 @@ def test_repr(self):
112106
<xray.Dataset>
113107
Dimensions: ()
114108
Index Coordinates:
115-
Empty
116-
Noncoordinates:
117-
foo float64 1.0
118-
Attributes:
119-
Empty""")
109+
*empty*
110+
Variables:
111+
foo float64 1.0""")
120112
actual = '\n'.join(x.rstrip() for x in repr(data).split('\n'))
121113
print(actual)
122114
self.assertEqual(expected, actual)
@@ -164,9 +156,34 @@ def test_constructor_with_coords(self):
164156
Dataset({'a': ('x', [1])}, {'a': ('x', [1])})
165157

166158
ds = Dataset({}, {'a': ('x', [1])})
167-
self.assertFalse(ds.noncoords)
159+
self.assertFalse(ds)
168160
self.assertItemsEqual(ds.coords.keys(), ['x', 'a'])
169161

162+
def test_properties(self):
163+
ds = create_test_data()
164+
self.assertEqual(ds.dims,
165+
{'dim1': 8, 'dim2': 9, 'dim3': 10, 'time': 20})
166+
167+
self.assertItemsEqual(ds, ['var1', 'var2', 'var3'])
168+
self.assertItemsEqual(ds.keys(), ['var1', 'var2', 'var3'])
169+
self.assertIn('var1', ds)
170+
self.assertNotIn('dim1', ds)
171+
self.assertNotIn('numbers', ds)
172+
self.assertEqual(len(ds), 3)
173+
174+
self.assertItemsEqual(ds.indexes, ['dim1', 'dim2', 'dim3', 'time'])
175+
self.assertEqual(len(ds.indexes), 4)
176+
177+
self.assertItemsEqual(ds.nonindexes, ['var1', 'var2', 'var3', 'numbers'])
178+
self.assertEqual(len(ds.nonindexes), 4)
179+
180+
self.assertItemsEqual(ds.coords,
181+
['time', 'dim1', 'dim2', 'dim3', 'numbers'])
182+
self.assertIn('dim1', ds.coords)
183+
self.assertIn('numbers', ds.coords)
184+
self.assertNotIn('var1', ds.coords)
185+
self.assertEqual(len(ds.coords), 5)
186+
170187
def test_variable(self):
171188
a = Dataset()
172189
d = np.random.random((10, 3))
@@ -182,7 +199,7 @@ def test_variable(self):
182199
with self.assertRaises(ValueError):
183200
a['qux'] = (('time', 'x'), d.T)
184201

185-
def test_coords_create(self):
202+
def test_modify_inplace(self):
186203
a = Dataset()
187204
vec = np.random.random((10,))
188205
attributes = {'foo': 'bar'}
@@ -412,19 +429,19 @@ def test_isel(self):
412429

413430
ret = data.isel(dim1=0)
414431
self.assertEqual({'time': 20, 'dim2': 9, 'dim3': 10}, ret.dims)
415-
self.assertItemsEqual(data.noncoords, ret.noncoords)
432+
self.assertItemsEqual(data, ret)
416433
self.assertItemsEqual(data.coords, ret.coords)
417434
self.assertItemsEqual(data.indexes, list(ret.indexes) + ['dim1'])
418435

419436
ret = data.isel(time=slice(2), dim1=0, dim2=slice(5))
420437
self.assertEqual({'time': 2, 'dim2': 5, 'dim3': 10}, ret.dims)
421-
self.assertItemsEqual(data.noncoords, ret.noncoords)
438+
self.assertItemsEqual(data, ret)
422439
self.assertItemsEqual(data.coords, ret.coords)
423440
self.assertItemsEqual(data.indexes, list(ret.indexes) + ['dim1'])
424441

425442
ret = data.isel(time=0, dim1=0, dim2=slice(5))
426443
self.assertItemsEqual({'dim2': 5, 'dim3': 10}, ret.dims)
427-
self.assertItemsEqual(data.noncoords, ret.noncoords)
444+
self.assertItemsEqual(data, ret)
428445
self.assertItemsEqual(data.coords, ret.coords)
429446
self.assertItemsEqual(data.indexes,
430447
list(ret.indexes) + ['dim1', 'time'])
@@ -738,11 +755,12 @@ def test_setitem(self):
738755
def test_delitem(self):
739756
data = create_test_data()
740757
all_items = set(data.variables)
741-
self.assertItemsEqual(data, all_items)
758+
self.assertItemsEqual(data.variables, all_items)
742759
del data['var1']
743-
self.assertItemsEqual(data, all_items - set(['var1']))
760+
self.assertItemsEqual(data.variables, all_items - set(['var1']))
744761
del data['dim1']
745-
self.assertItemsEqual(data, set(['time', 'dim2', 'dim3', 'numbers']))
762+
self.assertItemsEqual(data.variables,
763+
set(['time', 'dim2', 'dim3', 'numbers']))
746764
self.assertNotIn('dim1', data.dims)
747765
self.assertNotIn('dim1', data.coords)
748766

@@ -840,7 +858,7 @@ def rectify_dim_order(dataset):
840858
# return a new dataset with all variable dimensions tranposed into
841859
# the order in which they are found in `data`
842860
return Dataset(dict((k, v.transpose(*data[k].dims))
843-
for k, v in iteritems(dataset.noncoords)),
861+
for k, v in iteritems(dataset)),
844862
dataset.coords, attrs=dataset.attrs)
845863

846864
for dim in ['dim1', 'dim2', 'dim3']:
@@ -985,7 +1003,7 @@ def test_reduce(self):
9851003

9861004
actual = data.max()
9871005
expected = Dataset(dict((k, v.max())
988-
for k, v in iteritems(data.noncoords)))
1006+
for k, v in iteritems(data)))
9891007
self.assertDatasetEqual(expected, actual)
9901008

9911009
self.assertDatasetEqual(data.min(dim=['dim1']),
@@ -1011,7 +1029,7 @@ def test_reduce_non_numeric(self):
10111029
data2 = create_test_data(seed=44)
10121030
add_vars = {'var4': ['dim1', 'dim2']}
10131031
for v, dims in sorted(add_vars.items()):
1014-
size = tuple(_dims[d] for d in dims)
1032+
size = tuple(data1.dims[d] for d in dims)
10151033
data = np.random.random_integers(0, 100, size=size).astype(np.str_)
10161034
data1[v] = (dims, data, {'foo': 'variable'})
10171035

0 commit comments

Comments
 (0)