Skip to content

progress towards removing "non-coordinates" as a concept #224

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 4 commits into from
Sep 5, 2014
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
20 changes: 16 additions & 4 deletions doc/whats-new.rst
Original file line number Diff line number Diff line change
Expand Up @@ -10,13 +10,25 @@ intended to allow for keeping track of arrays of metadata that describe the
grid on which the points in "variable" arrays lie. They are preserved (when
unambiguous) even though mathematical operations.

- ``Dataset.select_vars`` deprecated: index a ``Dataset`` with a list of variables
instead.
- ``DataArray.select_vars`` and ``DataArray.drop_vars`` deprecated: use
:py:meth:`~xray.DataArray.reset_coords` instead.
Backwards incompatible changes
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~

- The items in a ``Dataset`` for the purposes of iteration (`for v in ds`,
`ds.keys()` and `ds.items()`) and contents checks (`k in ds`) are now only
only the *variables*, formerly called *non-coordinates*. Correspondingly, the
``Dataset.noncoords`` property has been deprecated (you can just use the
``Dataset`` object itself).
- ``Dataset.__eq__`` and ``Dataset.__ne__`` now are now undefined, because in
a future version of xray we intend to make the operations element-wise.

Deprecations
~~~~~~~~~~~~

- ``Dataset.select_vars`` deprecated: index a ``Dataset`` with a list of
variable names instead.
- ``DataArray.select_vars`` and ``DataArray.drop_vars`` deprecated: use
:py:meth:`~xray.DataArray.reset_coords` instead.

v0.2.0 (14 August 2014)
-----------------------

Expand Down
28 changes: 16 additions & 12 deletions xray/core/dataset.py
Original file line number Diff line number Diff line change
Expand Up @@ -326,7 +326,7 @@ def _update_vars_and_coords(self, new_variables, new_coord_names={},
variables = self._variables.copy() if needs_copy else self._variables

if check_coord_names:
_assert_empty([k for k in self.noncoords if k in new_coord_names],
_assert_empty([k for k in self if k in new_coord_names],
'coordinates with these names already exist as '
'variables: %s')

Expand Down Expand Up @@ -514,13 +514,13 @@ def __contains__(self, key):
"""The 'in' operator will return true or false depending on whether
'key' is a variable in the dataset or not.
"""
return key in self.variables
return key in self._variables and not key in self._coord_names

def __len__(self):
return len(self.variables)
return len(self._variables) - len(self._coord_names)

def __iter__(self):
return iter(self.variables)
return (k for k in self._variables if k not in self._coord_names)

@property
def virtual_variables(self):
Expand Down Expand Up @@ -651,15 +651,19 @@ def coordinates(self):
def noncoords(self):
"""Dictionary of DataArrays whose names do not match dimensions.
"""
return FrozenOrderedDict((name, self[name]) for name in self
if name not in self.coords)
warnings.warn('the Dataset property `noncoords` has been deprecated; '
'just use the Dataset object directly',
FutureWarning, stacklevel=2)
return self

@property
def noncoordinates(self):
"""Dictionary of DataArrays whose names do not match dimensions.
"""
utils.alias_warning('noncoordinates', 'noncoords')
return self.noncoords
warnings.warn('the Dataset property `noncoordinates` has been '
'deprecated; just use the Dataset object directly',
FutureWarning, stacklevel=2)
return self

def set_coords(self, names, inplace=False):
"""Given names of one or more variables, set them as coordinates
Expand Down Expand Up @@ -1229,7 +1233,7 @@ def apply(self, func, keep_attrs=False, **kwargs):
noncoordinate are dropped.
"""
variables = OrderedDict((k, func(v, **kwargs))
for k, v in iteritems(self.noncoords))
for k, v in iteritems(self))
attrs = self.attrs if keep_attrs else {}
return type(self)(variables, attrs=attrs)

Expand Down Expand Up @@ -1279,7 +1283,7 @@ def differs(vname, v):
else:
raise ValueError("Unexpected value for mode: %s" % mode)

if any(v not in datasets[0] for v in concat_over):
if any(v not in datasets[0]._variables for v in concat_over):
raise ValueError('not all elements in concat_over %r found '
'in the first dataset %r'
% (concat_over, datasets[0]))
Expand All @@ -1305,9 +1309,9 @@ def differs(vname, v):
and not utils.dict_equiv(ds.attrs, concatenated.attrs)):
raise ValueError('dataset global attributes not equal')
for k, v in iteritems(ds._variables):
if k not in concatenated and k not in concat_over:
if k not in concatenated._variables and k not in concat_over:
raise ValueError('encountered unexpected variable %r' % k)
elif (k in concatenated and k != dim_name and
elif (k in concatenated._variables and k != dim_name and
not getattr(v, compat)(concatenated[k])):
verb = 'equal' if compat == 'equals' else compat
raise ValueError(
Expand Down
24 changes: 13 additions & 11 deletions xray/core/formatting.py
Original file line number Diff line number Diff line change
Expand Up @@ -105,7 +105,7 @@ def _summarize_variables(variables, first_col_width, always_show_values):
return ([summarize_var(v.name, v, first_col_width,
show_values=(always_show_values or _not_remote(v)))
for v in itervalues(variables)]
or [' Empty'])
or [' *empty*'])


def _summarize_coordinates(coords, first_col_width,
Expand All @@ -131,12 +131,12 @@ def coords_repr(coords):
return '\n'.join(summary)


def _summarize_attributes(data, indent=' '):
if data.attrs:
def _summarize_attributes(attrs, indent=' '):
if attrs:
attr_summaries = ['%s%s: %s' % (indent, k, v) for k, v
in iteritems(data.attrs)]
in iteritems(attrs)]
else:
attr_summaries = [indent + 'Empty']
attr_summaries = [indent + '*empty*']
return attr_summaries


Expand All @@ -160,8 +160,9 @@ def array_repr(arr):
if arr.coords:
summary.append(repr(arr.coords))

summary.append('Attributes:')
summary.extend(_summarize_attributes(arr))
if arr.attrs:
summary.append('Attributes:')
summary.extend(_summarize_attributes(arr.attrs))

return '\n'.join(summary)

Expand Down Expand Up @@ -191,11 +192,12 @@ def dataset_repr(ds, preview_all_values=False):
summary.extend(_summarize_coordinates(ds.coords, first_col_width,
preview_all_values))

summary.append('Noncoordinates:')
summary.extend(_summarize_variables(ds.noncoords, first_col_width,
summary.append('Variables:')
summary.extend(_summarize_variables(ds, first_col_width,
always_show_values=preview_all_values))

summary.append('Attributes:')
summary.extend(_summarize_attributes(ds))
if ds.attrs:
summary.append('Attributes:')
summary.extend(_summarize_attributes(ds.attrs))

return '\n'.join(summary)
4 changes: 2 additions & 2 deletions xray/test/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -99,8 +99,8 @@ def assertDatasetIdentical(self, d1, d2):
# this method is functionally equivalent to `assert d1.identical(d2)`,
# but it checks each aspect of equality separately for easier debugging
assert utils.dict_equiv(d1.attrs, d2.attrs), (d1.attrs, d2.attrs)
self.assertEqual(sorted(d1.noncoords, key=str),
sorted(d2.noncoords, key=str))
self.assertEqual(sorted(d1, key=str),
sorted(d2, key=str))
self.assertEqual(sorted(d1.coords, key=str),
sorted(d2.coords, key=str))
for k in d1:
Expand Down
7 changes: 3 additions & 4 deletions xray/test/test_backends.py
Original file line number Diff line number Diff line change
Expand Up @@ -382,10 +382,9 @@ def test_roundtrip_character_array(self):
expected = Dataset({'x': ('x', values)})
with open_dataset(tmp_file) as actual:
self.assertDatasetIdentical(expected, actual)

# regression test for #157
with self.roundtrip(actual) as roundtripped:
self.assertDatasetIdentical(expected, roundtripped)
# regression test for #157
with self.roundtrip(actual) as roundtripped:
self.assertDatasetIdentical(expected, roundtripped)

def test_default_to_char_arrays(self):
data = Dataset({'x': np.array(['foo', 'zzzz'], dtype='S')})
Expand Down
84 changes: 51 additions & 33 deletions xray/test/test_dataset.py
Original file line number Diff line number Diff line change
Expand Up @@ -15,17 +15,13 @@
from . import TestCase, unittest


_dims = {'dim1': 8, 'dim2': 9, 'dim3': 10}
_vars = {'var1': ['dim1', 'dim2'],
'var2': ['dim1', 'dim2'],
'var3': ['dim3', 'dim1'],
}
_testvar = sorted(_vars.keys())[0]
_testdim = sorted(_dims.keys())[0]


def create_test_data(seed=None):
rs = np.random.RandomState(seed)
_vars = {'var1': ['dim1', 'dim2'],
'var2': ['dim1', 'dim2'],
'var3': ['dim3', 'dim1']}
_dims = {'dim1': 8, 'dim2': 9, 'dim3': 10}

obj = Dataset()
obj['time'] = ('time', pd.date_range('2000-01-01', periods=20))
obj['dim1'] = ('dim1', np.arange(_dims['dim1']))
Expand Down Expand Up @@ -72,6 +68,7 @@ def store_variables(self):
class TestDataset(TestCase):
def test_repr(self):
data = create_test_data(seed=123)
data.attrs['foo'] = 'bar'
# need to insert str dtype at runtime to handle both Python 2 & 3
expected = dedent("""\
<xray.Dataset>
Expand All @@ -83,25 +80,22 @@ def test_repr(self):
time (time) datetime64[ns] 2000-01-01 2000-01-02 2000-01-03 2000-01-04 ...
Other Coordinates:
numbers (dim3) int64 0 1 2 0 0 1 1 2 2 3
Noncoordinates:
Variables:
var1 (dim1, dim2) float64 -1.086 0.9973 0.283 -1.506 -0.5786 1.651 -2.427 -0.4289 ...
var2 (dim1, dim2) float64 1.162 -1.097 -2.123 1.04 -0.4034 -0.126 -0.8375 -1.606 ...
var3 (dim3, dim1) float64 0.5565 -0.2121 0.4563 1.545 -0.2397 0.1433 0.2538 ...
Attributes:
Empty""") % data['dim3'].dtype
foo: bar""") % data['dim3'].dtype
actual = '\n'.join(x.rstrip() for x in repr(data).split('\n'))
print(actual)
self.assertEqual(expected, actual)

expected = dedent("""\
<xray.Dataset>
Dimensions: ()
Index Coordinates:
Empty
Noncoordinates:
Empty
Attributes:
Empty""")
*empty*
Variables:
*empty*""")
actual = '\n'.join(x.rstrip() for x in repr(Dataset()).split('\n'))
print(actual)
self.assertEqual(expected, actual)
Expand All @@ -112,11 +106,9 @@ def test_repr(self):
<xray.Dataset>
Dimensions: ()
Index Coordinates:
Empty
Noncoordinates:
foo float64 1.0
Attributes:
Empty""")
*empty*
Variables:
foo float64 1.0""")
actual = '\n'.join(x.rstrip() for x in repr(data).split('\n'))
print(actual)
self.assertEqual(expected, actual)
Expand Down Expand Up @@ -164,9 +156,34 @@ def test_constructor_with_coords(self):
Dataset({'a': ('x', [1])}, {'a': ('x', [1])})

ds = Dataset({}, {'a': ('x', [1])})
self.assertFalse(ds.noncoords)
self.assertFalse(ds)
self.assertItemsEqual(ds.coords.keys(), ['x', 'a'])

def test_properties(self):
ds = create_test_data()
self.assertEqual(ds.dims,
{'dim1': 8, 'dim2': 9, 'dim3': 10, 'time': 20})

self.assertItemsEqual(ds, ['var1', 'var2', 'var3'])
self.assertItemsEqual(ds.keys(), ['var1', 'var2', 'var3'])
self.assertIn('var1', ds)
self.assertNotIn('dim1', ds)
self.assertNotIn('numbers', ds)
self.assertEqual(len(ds), 3)

self.assertItemsEqual(ds.indexes, ['dim1', 'dim2', 'dim3', 'time'])
self.assertEqual(len(ds.indexes), 4)

self.assertItemsEqual(ds.nonindexes, ['var1', 'var2', 'var3', 'numbers'])
self.assertEqual(len(ds.nonindexes), 4)

self.assertItemsEqual(ds.coords,
['time', 'dim1', 'dim2', 'dim3', 'numbers'])
self.assertIn('dim1', ds.coords)
self.assertIn('numbers', ds.coords)
self.assertNotIn('var1', ds.coords)
self.assertEqual(len(ds.coords), 5)

def test_variable(self):
a = Dataset()
d = np.random.random((10, 3))
Expand All @@ -182,7 +199,7 @@ def test_variable(self):
with self.assertRaises(ValueError):
a['qux'] = (('time', 'x'), d.T)

def test_coords_create(self):
def test_modify_inplace(self):
a = Dataset()
vec = np.random.random((10,))
attributes = {'foo': 'bar'}
Expand Down Expand Up @@ -412,19 +429,19 @@ def test_isel(self):

ret = data.isel(dim1=0)
self.assertEqual({'time': 20, 'dim2': 9, 'dim3': 10}, ret.dims)
self.assertItemsEqual(data.noncoords, ret.noncoords)
self.assertItemsEqual(data, ret)
self.assertItemsEqual(data.coords, ret.coords)
self.assertItemsEqual(data.indexes, list(ret.indexes) + ['dim1'])

ret = data.isel(time=slice(2), dim1=0, dim2=slice(5))
self.assertEqual({'time': 2, 'dim2': 5, 'dim3': 10}, ret.dims)
self.assertItemsEqual(data.noncoords, ret.noncoords)
self.assertItemsEqual(data, ret)
self.assertItemsEqual(data.coords, ret.coords)
self.assertItemsEqual(data.indexes, list(ret.indexes) + ['dim1'])

ret = data.isel(time=0, dim1=0, dim2=slice(5))
self.assertItemsEqual({'dim2': 5, 'dim3': 10}, ret.dims)
self.assertItemsEqual(data.noncoords, ret.noncoords)
self.assertItemsEqual(data, ret)
self.assertItemsEqual(data.coords, ret.coords)
self.assertItemsEqual(data.indexes,
list(ret.indexes) + ['dim1', 'time'])
Expand Down Expand Up @@ -738,11 +755,12 @@ def test_setitem(self):
def test_delitem(self):
data = create_test_data()
all_items = set(data.variables)
self.assertItemsEqual(data, all_items)
self.assertItemsEqual(data.variables, all_items)
del data['var1']
self.assertItemsEqual(data, all_items - set(['var1']))
self.assertItemsEqual(data.variables, all_items - set(['var1']))
del data['dim1']
self.assertItemsEqual(data, set(['time', 'dim2', 'dim3', 'numbers']))
self.assertItemsEqual(data.variables,
set(['time', 'dim2', 'dim3', 'numbers']))
self.assertNotIn('dim1', data.dims)
self.assertNotIn('dim1', data.coords)

Expand Down Expand Up @@ -840,7 +858,7 @@ def rectify_dim_order(dataset):
# return a new dataset with all variable dimensions tranposed into
# the order in which they are found in `data`
return Dataset(dict((k, v.transpose(*data[k].dims))
for k, v in iteritems(dataset.noncoords)),
for k, v in iteritems(dataset)),
dataset.coords, attrs=dataset.attrs)

for dim in ['dim1', 'dim2', 'dim3']:
Expand Down Expand Up @@ -985,7 +1003,7 @@ def test_reduce(self):

actual = data.max()
expected = Dataset(dict((k, v.max())
for k, v in iteritems(data.noncoords)))
for k, v in iteritems(data)))
self.assertDatasetEqual(expected, actual)

self.assertDatasetEqual(data.min(dim=['dim1']),
Expand All @@ -1011,7 +1029,7 @@ def test_reduce_non_numeric(self):
data2 = create_test_data(seed=44)
add_vars = {'var4': ['dim1', 'dim2']}
for v, dims in sorted(add_vars.items()):
size = tuple(_dims[d] for d in dims)
size = tuple(data1.dims[d] for d in dims)
data = np.random.random_integers(0, 100, size=size).astype(np.str_)
data1[v] = (dims, data, {'foo': 'variable'})

Expand Down