Skip to content

Detailed report for testing.assert_equal and testing.assert_identical #1507

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 11 commits into from
Jan 18, 2019
5 changes: 5 additions & 0 deletions doc/whats-new.rst
Original file line number Diff line number Diff line change
Expand Up @@ -36,6 +36,11 @@ Enhancements
- Upsampling an array via interpolation with resample is now dask-compatible,
as long as the array is not chunked along the resampling dimension.
By `Spencer Clark <https://github.com/spencerkclark>`_.
- :py:func:`xarray.testing.assert_equal` and
:py:func:`xarray.testing.assert_identical` now provide a more detailed
report showing what exactly differs between the two objects (dimensions /
coordinates / variables / attributes) (:issue:`1507`).
By `Benoit Bovy <https://github.com/benbovy>`_.

Bug fixes
~~~~~~~~~
Expand Down
146 changes: 140 additions & 6 deletions xarray/core/formatting.py
Original file line number Diff line number Diff line change
Expand Up @@ -13,6 +13,7 @@
import numpy as np
import pandas as pd

from .duck_array_ops import array_equiv
from .options import OPTIONS
from .pycompat import (
PY2, bytes_type, dask_array_type, unicode_type, zip_longest)
Expand Down Expand Up @@ -411,6 +412,15 @@ def short_dask_repr(array, show_dtype=True):
return 'dask.array<shape=%s, chunksize=%s>' % (array.shape, chunksize)


def short_data_repr(array):
if isinstance(getattr(array, 'variable', array)._data, dask_array_type):
return short_dask_repr(array)
elif array._in_memory or array.size < 1e5:
return short_array_repr(array.values)
else:
return u'[%s values with dtype=%s]' % (array.size, array.dtype)


def array_repr(arr):
# used for DataArray, Variable and IndexVariable
if hasattr(arr, 'name') and arr.name is not None:
Expand All @@ -421,12 +431,7 @@ def array_repr(arr):
summary = [u'<xarray.%s %s(%s)>'
% (type(arr).__name__, name_str, dim_summary(arr))]

if isinstance(getattr(arr, 'variable', arr)._data, dask_array_type):
summary.append(short_dask_repr(arr))
elif arr._in_memory or arr.size < 1e5:
summary.append(short_array_repr(arr.values))
else:
summary.append(u'[%s values with dtype=%s]' % (arr.size, arr.dtype))
summary.append(short_data_repr(arr))

if hasattr(arr, 'coords'):
if arr.coords:
Expand Down Expand Up @@ -463,3 +468,132 @@ def dataset_repr(ds):
summary.append(attrs_repr(ds.attrs))

return u'\n'.join(summary)


def diff_dim_summary(a, b):
if a.dims != b.dims:
return "Differing dimensions:\n ({}) != ({})".format(
dim_summary(a), dim_summary(b))
else:
return ""


def _diff_mapping_repr(a_mapping, b_mapping, compat,
title, summarizer, col_width=None):

def extra_items_repr(extra_keys, mapping, ab_side):
extra_repr = [summarizer(k, mapping[k], col_width) for k in extra_keys]
if extra_repr:
header = "{} only on the {} object:".format(title, ab_side)
return [header] + extra_repr
else:
return []

a_keys = set(a_mapping)
b_keys = set(b_mapping)

summary = []

diff_items = []

for k in a_keys & b_keys:
try:
# compare xarray variable
compatible = getattr(a_mapping[k], compat)(b_mapping[k])
is_variable = True
except AttributeError:
# compare attribute value
compatible = a_mapping[k] == b_mapping[k]
is_variable = False

if not compatible:
temp = [summarizer(k, vars[k], col_width)
for vars in (a_mapping, b_mapping)]

if compat == 'identical' and is_variable:
attrs_summary = []

for m in (a_mapping, b_mapping):
attr_s = "\n".join([summarize_attr(ak, av)
for ak, av in m[k].attrs.items()])
attrs_summary.append(attr_s)

temp = ["\n".join([var_s, attr_s]) if attr_s else var_s
for var_s, attr_s in zip(temp, attrs_summary)]

diff_items += [ab_side + s[1:]
for ab_side, s in zip(('L', 'R'), temp)]

if diff_items:
summary += ["Differing {}:".format(title.lower())] + diff_items

summary += extra_items_repr(a_keys - b_keys, a_mapping, "left")
summary += extra_items_repr(b_keys - a_keys, b_mapping, "right")

return "\n".join(summary)


diff_coords_repr = functools.partial(_diff_mapping_repr,
title="Coordinates",
summarizer=summarize_coord)


diff_data_vars_repr = functools.partial(_diff_mapping_repr,
title="Data variables",
summarizer=summarize_datavar)


diff_attrs_repr = functools.partial(_diff_mapping_repr,
title="Attributes",
summarizer=summarize_attr)


def _compat_to_str(compat):
if compat == "equals":
return "equal"
else:
return compat


def diff_array_repr(a, b, compat):
# used for DataArray, Variable and IndexVariable
summary = ["Left and right {} objects are not {}"
.format(type(a).__name__, _compat_to_str(compat))]

summary.append(diff_dim_summary(a, b))

if not array_equiv(a.data, b.data):
temp = [wrap_indent(short_array_repr(obj), start=' ')
for obj in (a, b)]
diff_data_repr = [ab_side + "\n" + ab_data_repr
for ab_side, ab_data_repr in zip(('L', 'R'), temp)]
summary += ["Differing values:"] + diff_data_repr

if hasattr(a, 'coords'):
col_width = _calculate_col_width(set(a.coords) | set(b.coords))
summary.append(diff_coords_repr(a.coords, b.coords, compat,
col_width=col_width))

if compat == 'identical':
summary.append(diff_attrs_repr(a.attrs, b.attrs, compat))

return "\n".join(summary)


def diff_dataset_repr(a, b, compat):
summary = ["Left and right {} objects are not {}"
.format(type(a).__name__, _compat_to_str(compat))]

col_width = _calculate_col_width(
set(_get_col_items(a.variables) + _get_col_items(b.variables)))

summary.append(diff_dim_summary(a, b))
summary.append(diff_coords_repr(a.coords, b.coords, compat,
col_width=col_width))
summary.append(diff_data_vars_repr(a.data_vars, b.data_vars, compat,
col_width=col_width))

if compat == 'identical':
summary.append(diff_attrs_repr(a.attrs, b.attrs, compat))

return "\n".join(summary)
15 changes: 10 additions & 5 deletions xarray/testing.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,7 @@
import numpy as np

from xarray.core import duck_array_ops
from xarray.core import formatting


def _decode_string_data(data):
Expand Down Expand Up @@ -49,8 +50,10 @@ def assert_equal(a, b):
import xarray as xr
__tracebackhide__ = True # noqa: F841
assert type(a) == type(b) # noqa
if isinstance(a, (xr.Variable, xr.DataArray, xr.Dataset)):
assert a.equals(b), '{}\n{}'.format(a, b)
if isinstance(a, (xr.Variable, xr.DataArray)):
assert a.equals(b), formatting.diff_array_repr(a, b, 'equals')
elif isinstance(a, xr.Dataset):
assert a.equals(b), formatting.diff_dataset_repr(a, b, 'equals')
else:
raise TypeError('{} not supported by assertion comparison'
.format(type(a)))
Expand All @@ -76,11 +79,13 @@ def assert_identical(a, b):
import xarray as xr
__tracebackhide__ = True # noqa: F841
assert type(a) == type(b) # noqa
if isinstance(a, xr.DataArray):
if isinstance(a, xr.Variable):
assert a.identical(b), formatting.diff_array_repr(a, b, 'identical')
elif isinstance(a, xr.DataArray):
assert a.name == b.name
assert_identical(a._to_temp_dataset(), b._to_temp_dataset())
assert a.identical(b), formatting.diff_array_repr(a, b, 'identical')
elif isinstance(a, (xr.Dataset, xr.Variable)):
assert a.identical(b), '{}\n{}'.format(a, b)
assert a.identical(b), formatting.diff_dataset_repr(a, b, 'identical')
else:
raise TypeError('{} not supported by assertion comparison'
.format(type(a)))
Expand Down
117 changes: 117 additions & 0 deletions xarray/tests/test_formatting.py
Original file line number Diff line number Diff line change
@@ -1,9 +1,12 @@
# -*- coding: utf-8 -*-
from __future__ import absolute_import, division, print_function

from textwrap import dedent

import numpy as np
import pandas as pd

import xarray as xr
from xarray.core import formatting
from xarray.core.pycompat import PY3

Expand Down Expand Up @@ -190,6 +193,120 @@ def test_attribute_repr(self):
assert u'\n' not in newlines
assert u'\t' not in tabs

def test_diff_array_repr(self):
da_a = xr.DataArray(
np.array([[1, 2, 3], [4, 5, 6]], dtype='int64'),
dims=('x', 'y'),
coords={'x': np.array(['a', 'b'], dtype='U1'),
'y': np.array([1, 2, 3], dtype='int64')},
attrs={'units': 'm', 'description': 'desc'})

da_b = xr.DataArray(
np.array([1, 2], dtype='int64'),
dims='x',
coords={'x': np.array(['a', 'c'], dtype='U1'),
'label': ('x', np.array([1, 2], dtype='int64'))},
attrs={'units': 'kg'})

expected = dedent("""\
Left and right DataArray objects are not identical
Differing dimensions:
(x: 2, y: 3) != (x: 2)
Differing values:
L
array([[1, 2, 3],
[4, 5, 6]], dtype=int64)
R
array([1, 2], dtype=int64)
Differing coordinates:
L * x (x) <U1 'a' 'b'
R * x (x) <U1 'a' 'c'
Coordinates only on the left object:
* y (y) int64 1 2 3
Coordinates only on the right object:
label (x) int64 1 2
Differing attributes:
L units: m
R units: kg
Attributes only on the left object:
description: desc""")

actual = formatting.diff_array_repr(da_a, da_b, 'identical')
try:
assert actual == expected
except AssertionError:
# depending on platform, dtype may not be shown in numpy array repr
assert actual == expected.replace(", dtype=int64", "")

va = xr.Variable('x', np.array([1, 2, 3], dtype='int64'),
{'title': 'test Variable'})
vb = xr.Variable(('x', 'y'),
np.array([[1, 2, 3], [4, 5, 6]], dtype='int64'))

expected = dedent("""\
Left and right Variable objects are not equal
Differing dimensions:
(x: 3) != (x: 2, y: 3)
Differing values:
L
array([1, 2, 3], dtype=int64)
R
array([[1, 2, 3],
[4, 5, 6]], dtype=int64)""")

actual = formatting.diff_array_repr(va, vb, 'equals')
try:
assert actual == expected
except AssertionError:
assert actual == expected.replace(", dtype=int64", "")

def test_diff_dataset_repr(self):
ds_a = xr.Dataset(
data_vars={
'var1': (('x', 'y'),
np.array([[1, 2, 3], [4, 5, 6]], dtype='int64')),
'var2': ('x', np.array([3, 4], dtype='int64'))
},
coords={'x': np.array(['a', 'b'], dtype='U1'),
'y': np.array([1, 2, 3], dtype='int64')},
attrs={'units': 'm', 'description': 'desc'}
)

ds_b = xr.Dataset(
data_vars={'var1': ('x', np.array([1, 2], dtype='int64'))},
coords={
'x': ('x', np.array(['a', 'c'], dtype='U1'), {'source': 0}),
'label': ('x', np.array([1, 2], dtype='int64'))
},
attrs={'units': 'kg'}
)

expected = dedent("""\
Left and right Dataset objects are not identical
Differing dimensions:
(x: 2, y: 3) != (x: 2)
Differing coordinates:
L * x (x) <U1 'a' 'b'
R * x (x) <U1 'a' 'c'
source: 0
Coordinates only on the left object:
* y (y) int64 1 2 3
Coordinates only on the right object:
label (x) int64 1 2
Differing data variables:
L var1 (x, y) int64 1 2 3 4 5 6
R var1 (x) int64 1 2
Data variables only on the left object:
var2 (x) int64 3 4
Differing attributes:
L units: m
R units: kg
Attributes only on the left object:
description: desc""")

actual = formatting.diff_dataset_repr(ds_a, ds_b, 'identical')
assert actual == expected


def test_set_numpy_options():
original_options = np.get_printoptions()
Expand Down