Skip to content

Commit 0afbd45

Browse files
mgunyhodcherian
andauthored
Consistently report all dimensions in error messages if invalid dimensions are given (#8079)
* Show dims and coords in idxmin/idxmax error message if an invalid dim is given * Show data dims in error messages of Dataset and update tests Remove _assert_empty, not used anymore * Update test for dataarray * Show data dims in error messages of weighted and update test * Show dimensions in error message of group_indexers_by_index * List coordinates in concat error message, update test * List coordinates in coords __delitem__ error message, update tests * Show list of names in error message of PandasMultiIndex.sel, update test * Show list of dimensions in error messages of Rolling and Coarsen, update tests * Show dims in Variable.concat error message as tuple for consistency * Change 'dataset' to 'data' in error messages * Update whats-new --------- Co-authored-by: Deepak Cherian <[email protected]>
1 parent aea9af0 commit 0afbd45

18 files changed

+177
-69
lines changed

doc/whats-new.rst

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -67,6 +67,8 @@ Documentation
6767
Internal Changes
6868
~~~~~~~~~~~~~~~~
6969

70+
- Many error messages related to invalid dimensions or coordinates now always show the list of valid dims/coords (:pull:`8079`).
71+
By `András Gunyhó <https://github.com/mgunyho>`_.
7072

7173
.. _whats-new.2023.08.0:
7274

xarray/core/computation.py

Lines changed: 6 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -2046,9 +2046,13 @@ def _calc_idxminmax(
20462046
raise ValueError("Must supply 'dim' argument for multidimensional arrays")
20472047

20482048
if dim not in array.dims:
2049-
raise KeyError(f'Dimension "{dim}" not in dimension')
2049+
raise KeyError(
2050+
f"Dimension {dim!r} not found in array dimensions {array.dims!r}"
2051+
)
20502052
if dim not in array.coords:
2051-
raise KeyError(f'Dimension "{dim}" does not have coordinates')
2053+
raise KeyError(
2054+
f"Dimension {dim!r} is not one of the coordinates {tuple(array.coords.keys())}"
2055+
)
20522056

20532057
# These are dtypes with NaN values argmin and argmax can handle
20542058
na_dtypes = "cfO"

xarray/core/concat.py

Lines changed: 8 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -391,17 +391,20 @@ def process_subset_opt(opt, subset):
391391
else:
392392
raise ValueError(f"unexpected value for {subset}: {opt}")
393393
else:
394-
invalid_vars = [k for k in opt if k not in getattr(datasets[0], subset)]
394+
valid_vars = tuple(getattr(datasets[0], subset))
395+
invalid_vars = [k for k in opt if k not in valid_vars]
395396
if invalid_vars:
396397
if subset == "coords":
397398
raise ValueError(
398-
"some variables in coords are not coordinates on "
399-
f"the first dataset: {invalid_vars}"
399+
f"the variables {invalid_vars} in coords are not "
400+
f"found in the coordinates of the first dataset {valid_vars}"
400401
)
401402
else:
403+
# note: data_vars are not listed in the error message here,
404+
# because there may be lots of them
402405
raise ValueError(
403-
"some variables in data_vars are not data variables "
404-
f"on the first dataset: {invalid_vars}"
406+
f"the variables {invalid_vars} in data_vars are not "
407+
f"found in the data variables of the first dataset"
405408
)
406409
concat_over.update(opt)
407410

xarray/core/coordinates.py

Lines changed: 6 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -769,7 +769,9 @@ def __delitem__(self, key: Hashable) -> None:
769769
if key in self:
770770
del self._data[key]
771771
else:
772-
raise KeyError(f"{key!r} is not a coordinate variable.")
772+
raise KeyError(
773+
f"{key!r} is not in coordinate variables {tuple(self.keys())}"
774+
)
773775

774776
def _ipython_key_completions_(self):
775777
"""Provide method for the key-autocompletions in IPython."""
@@ -855,7 +857,9 @@ def to_dataset(self) -> Dataset:
855857

856858
def __delitem__(self, key: Hashable) -> None:
857859
if key not in self:
858-
raise KeyError(f"{key!r} is not a coordinate variable.")
860+
raise KeyError(
861+
f"{key!r} is not in coordinate variables {tuple(self.keys())}"
862+
)
859863
assert_no_index_corrupted(self._data.xindexes, {key})
860864

861865
del self._data._coords[key]

xarray/core/dataset.py

Lines changed: 42 additions & 27 deletions
Original file line numberDiff line numberDiff line change
@@ -213,11 +213,6 @@ def _get_virtual_variable(
213213
return ref_name, var_name, virtual_var
214214

215215

216-
def _assert_empty(args: tuple, msg: str = "%s") -> None:
217-
if args:
218-
raise ValueError(msg % args)
219-
220-
221216
def _get_chunk(var: Variable, chunks, chunkmanager: ChunkManagerEntrypoint):
222217
"""
223218
Return map from each dim to chunk sizes, accounting for backend's preferred chunks.
@@ -2640,7 +2635,7 @@ def chunk(
26402635
bad_dims = chunks.keys() - self.dims.keys()
26412636
if bad_dims:
26422637
raise ValueError(
2643-
f"some chunks keys are not dimensions on this object: {bad_dims}"
2638+
f"chunks keys {tuple(bad_dims)} not found in data dimensions {tuple(self.dims)}"
26442639
)
26452640

26462641
chunkmanager = guess_chunkmanager(chunked_array_type)
@@ -4243,8 +4238,8 @@ def rename_dims(
42434238
for k, v in dims_dict.items():
42444239
if k not in self.dims:
42454240
raise ValueError(
4246-
f"cannot rename {k!r} because it is not a "
4247-
"dimension in this dataset"
4241+
f"cannot rename {k!r} because it is not found "
4242+
f"in the dimensions of this dataset {tuple(self.dims)}"
42484243
)
42494244
if v in self.dims or v in self:
42504245
raise ValueError(
@@ -4366,7 +4361,7 @@ def swap_dims(
43664361
if k not in self.dims:
43674362
raise ValueError(
43684363
f"cannot swap from dimension {k!r} because it is "
4369-
"not an existing dimension"
4364+
f"not one of the dimensions of this dataset {tuple(self.dims)}"
43704365
)
43714366
if v in self.variables and self.variables[v].dims != (k,):
43724367
raise ValueError(
@@ -5448,10 +5443,10 @@ def unstack(
54485443
else:
54495444
dims = list(dim)
54505445

5451-
missing_dims = [d for d in dims if d not in self.dims]
5446+
missing_dims = set(dims) - set(self.dims)
54525447
if missing_dims:
54535448
raise ValueError(
5454-
f"Dataset does not contain the dimensions: {missing_dims}"
5449+
f"Dimensions {tuple(missing_dims)} not found in data dimensions {tuple(self.dims)}"
54555450
)
54565451

54575452
# each specified dimension must have exactly one multi-index
@@ -5836,7 +5831,10 @@ def drop_indexes(
58365831
if errors == "raise":
58375832
invalid_coords = coord_names - self._coord_names
58385833
if invalid_coords:
5839-
raise ValueError(f"those coordinates don't exist: {invalid_coords}")
5834+
raise ValueError(
5835+
f"The coordinates {tuple(invalid_coords)} are not found in the "
5836+
f"dataset coordinates {tuple(self.coords.keys())}"
5837+
)
58405838

58415839
unindexed_coords = set(coord_names) - set(self._indexes)
58425840
if unindexed_coords:
@@ -6084,7 +6082,7 @@ def drop_dims(
60846082
missing_dims = drop_dims - set(self.dims)
60856083
if missing_dims:
60866084
raise ValueError(
6087-
f"Dataset does not contain the dimensions: {missing_dims}"
6085+
f"Dimensions {tuple(missing_dims)} not found in data dimensions {tuple(self.dims)}"
60886086
)
60896087

60906088
drop_vars = {k for k, v in self._variables.items() if set(v.dims) & drop_dims}
@@ -6244,7 +6242,9 @@ def dropna(
62446242
# depending on the order of the supplied axes.
62456243

62466244
if dim not in self.dims:
6247-
raise ValueError(f"{dim} must be a single dataset dimension")
6245+
raise ValueError(
6246+
f"Dimension {dim!r} not found in data dimensions {tuple(self.dims)}"
6247+
)
62486248

62496249
if subset is None:
62506250
subset = iter(self.data_vars)
@@ -6725,10 +6725,10 @@ def reduce(
67256725
else:
67266726
dims = set(dim)
67276727

6728-
missing_dimensions = [d for d in dims if d not in self.dims]
6728+
missing_dimensions = tuple(d for d in dims if d not in self.dims)
67296729
if missing_dimensions:
67306730
raise ValueError(
6731-
f"Dataset does not contain the dimensions: {missing_dimensions}"
6731+
f"Dimensions {missing_dimensions} not found in data dimensions {tuple(self.dims)}"
67326732
)
67336733

67346734
if keep_attrs is None:
@@ -7710,9 +7710,11 @@ def shift(
77107710
foo (x) object nan nan 'a' 'b' 'c'
77117711
"""
77127712
shifts = either_dict_or_kwargs(shifts, shifts_kwargs, "shift")
7713-
invalid = [k for k in shifts if k not in self.dims]
7713+
invalid = tuple(k for k in shifts if k not in self.dims)
77147714
if invalid:
7715-
raise ValueError(f"dimensions {invalid!r} do not exist")
7715+
raise ValueError(
7716+
f"Dimensions {invalid} not found in data dimensions {tuple(self.dims)}"
7717+
)
77167718

77177719
variables = {}
77187720
for name, var in self.variables.items():
@@ -7789,7 +7791,9 @@ def roll(
77897791
shifts = either_dict_or_kwargs(shifts, shifts_kwargs, "roll")
77907792
invalid = [k for k in shifts if k not in self.dims]
77917793
if invalid:
7792-
raise ValueError(f"dimensions {invalid!r} do not exist")
7794+
raise ValueError(
7795+
f"Dimensions {invalid} not found in data dimensions {tuple(self.dims)}"
7796+
)
77937797

77947798
unrolled_vars: tuple[Hashable, ...]
77957799

@@ -8038,10 +8042,11 @@ def quantile(
80388042
else:
80398043
dims = set(dim)
80408044

8041-
_assert_empty(
8042-
tuple(d for d in dims if d not in self.dims),
8043-
"Dataset does not contain the dimensions: %s",
8044-
)
8045+
invalid_dims = set(dims) - set(self.dims)
8046+
if invalid_dims:
8047+
raise ValueError(
8048+
f"Dimensions {tuple(invalid_dims)} not found in data dimensions {tuple(self.dims)}"
8049+
)
80458050

80468051
q = np.asarray(q, dtype=np.float64)
80478052

@@ -8117,7 +8122,9 @@ def rank(
81178122
)
81188123

81198124
if dim not in self.dims:
8120-
raise ValueError(f"Dataset does not contain the dimension: {dim}")
8125+
raise ValueError(
8126+
f"Dimension {dim!r} not found in data dimensions {tuple(self.dims)}"
8127+
)
81218128

81228129
variables = {}
81238130
for name, var in self.variables.items():
@@ -8167,7 +8174,10 @@ def differentiate(
81678174
from xarray.core.variable import Variable
81688175

81698176
if coord not in self.variables and coord not in self.dims:
8170-
raise ValueError(f"Coordinate {coord} does not exist.")
8177+
variables_and_dims = tuple(set(self.variables.keys()).union(self.dims))
8178+
raise ValueError(
8179+
f"Coordinate {coord!r} not found in variables or dimensions {variables_and_dims}."
8180+
)
81718181

81728182
coord_var = self[coord].variable
81738183
if coord_var.ndim != 1:
@@ -8269,7 +8279,10 @@ def _integrate_one(self, coord, datetime_unit=None, cumulative=False):
82698279
from xarray.core.variable import Variable
82708280

82718281
if coord not in self.variables and coord not in self.dims:
8272-
raise ValueError(f"Coordinate {coord} does not exist.")
8282+
variables_and_dims = tuple(set(self.variables.keys()).union(self.dims))
8283+
raise ValueError(
8284+
f"Coordinate {coord!r} not found in variables or dimensions {variables_and_dims}."
8285+
)
82738286

82748287
coord_var = self[coord].variable
82758288
if coord_var.ndim != 1:
@@ -9771,7 +9784,9 @@ def drop_duplicates(
97719784

97729785
missing_dims = set(dims) - set(self.dims)
97739786
if missing_dims:
9774-
raise ValueError(f"'{missing_dims}' not found in dimensions")
9787+
raise ValueError(
9788+
f"Dimensions {tuple(missing_dims)} not found in data dimensions {tuple(self.dims)}"
9789+
)
97759790

97769791
indexes = {dim: ~self.get_index(dim).duplicated(keep=keep) for dim in dims}
97779792
return self.isel(indexes)

xarray/core/indexes.py

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -1203,12 +1203,12 @@ def sel(self, labels, method=None, tolerance=None) -> IndexSelResult:
12031203
coord_name, label = next(iter(labels.items()))
12041204

12051205
if is_dict_like(label):
1206-
invalid_levels = [
1206+
invalid_levels = tuple(
12071207
name for name in label if name not in self.index.names
1208-
]
1208+
)
12091209
if invalid_levels:
12101210
raise ValueError(
1211-
f"invalid multi-index level names {invalid_levels}"
1211+
f"multi-index level names {invalid_levels} not found in indexes {tuple(self.index.names)}"
12121212
)
12131213
return self.sel(label)
12141214

xarray/core/indexing.py

Lines changed: 4 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -143,7 +143,10 @@ def group_indexers_by_index(
143143
elif key in obj.coords:
144144
raise KeyError(f"no index found for coordinate {key!r}")
145145
elif key not in obj.dims:
146-
raise KeyError(f"{key!r} is not a valid dimension or coordinate")
146+
raise KeyError(
147+
f"{key!r} is not a valid dimension or coordinate for "
148+
f"{obj.__class__.__name__} with dimensions {obj.dims!r}"
149+
)
147150
elif len(options):
148151
raise ValueError(
149152
f"cannot supply selection options {options!r} for dimension {key!r}"

xarray/core/rolling.py

Lines changed: 13 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -102,6 +102,14 @@ def __init__(
102102
self.center = self._mapping_to_list(center, default=False)
103103
self.obj: T_Xarray = obj
104104

105+
missing_dims = tuple(dim for dim in self.dim if dim not in self.obj.dims)
106+
if missing_dims:
107+
# NOTE: we raise KeyError here but ValueError in Coarsen.
108+
raise KeyError(
109+
f"Window dimensions {missing_dims} not found in {self.obj.__class__.__name__} "
110+
f"dimensions {tuple(self.obj.dims)}"
111+
)
112+
105113
# attributes
106114
if min_periods is not None and min_periods <= 0:
107115
raise ValueError("min_periods must be greater than zero or None")
@@ -624,8 +632,7 @@ def __init__(
624632
xarray.DataArray.groupby
625633
"""
626634
super().__init__(obj, windows, min_periods, center)
627-
if any(d not in self.obj.dims for d in self.dim):
628-
raise KeyError(self.dim)
635+
629636
# Keep each Rolling object as a dictionary
630637
self.rollings = {}
631638
for key, da in self.obj.data_vars.items():
@@ -839,10 +846,11 @@ def __init__(
839846
self.side = side
840847
self.boundary = boundary
841848

842-
absent_dims = [dim for dim in windows.keys() if dim not in self.obj.dims]
843-
if absent_dims:
849+
missing_dims = tuple(dim for dim in windows.keys() if dim not in self.obj.dims)
850+
if missing_dims:
844851
raise ValueError(
845-
f"Dimensions {absent_dims!r} not found in {self.obj.__class__.__name__}."
852+
f"Window dimensions {missing_dims} not found in {self.obj.__class__.__name__} "
853+
f"dimensions {tuple(self.obj.dims)}"
846854
)
847855
if not utils.is_dict_like(coord_func):
848856
coord_func = {d: coord_func for d in self.obj.dims} # type: ignore[misc]

xarray/core/variable.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -2117,7 +2117,7 @@ def concat(
21172117
for var in variables:
21182118
if var.dims != first_var_dims:
21192119
raise ValueError(
2120-
f"Variable has dimensions {list(var.dims)} but first Variable has dimensions {list(first_var_dims)}"
2120+
f"Variable has dimensions {tuple(var.dims)} but first Variable has dimensions {tuple(first_var_dims)}"
21212121
)
21222122

21232123
return cls(dims, data, attrs, encoding, fastpath=True)

xarray/core/weighted.py

Lines changed: 3 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -198,10 +198,11 @@ def _check_dim(self, dim: Dims):
198198
dims = [dim] if dim else []
199199
else:
200200
dims = list(dim)
201-
missing_dims = set(dims) - set(self.obj.dims) - set(self.weights.dims)
201+
all_dims = set(self.obj.dims).union(set(self.weights.dims))
202+
missing_dims = set(dims) - all_dims
202203
if missing_dims:
203204
raise ValueError(
204-
f"{self.__class__.__name__} does not contain the dimensions: {missing_dims}"
205+
f"Dimensions {tuple(missing_dims)} not found in {self.__class__.__name__} dimensions {tuple(all_dims)}"
205206
)
206207

207208
@staticmethod

xarray/tests/test_coarsen.py

Lines changed: 4 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -17,7 +17,10 @@
1717

1818

1919
def test_coarsen_absent_dims_error(ds: Dataset) -> None:
20-
with pytest.raises(ValueError, match=r"not found in Dataset."):
20+
with pytest.raises(
21+
ValueError,
22+
match=r"Window dimensions \('foo',\) not found in Dataset dimensions",
23+
):
2124
ds.coarsen(foo=2)
2225

2326

xarray/tests/test_concat.py

Lines changed: 4 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -614,9 +614,12 @@ def test_concat_errors(self):
614614
with pytest.raises(ValueError, match=r"must supply at least one"):
615615
concat([], "dim1")
616616

617-
with pytest.raises(ValueError, match=r"are not coordinates"):
617+
with pytest.raises(ValueError, match=r"are not found in the coordinates"):
618618
concat([data, data], "new_dim", coords=["not_found"])
619619

620+
with pytest.raises(ValueError, match=r"are not found in the data variables"):
621+
concat([data, data], "new_dim", data_vars=["not_found"])
622+
620623
with pytest.raises(ValueError, match=r"global attributes not"):
621624
# call deepcopy seperately to get unique attrs
622625
data0 = deepcopy(split_data[0])

xarray/tests/test_coordinates.py

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -103,6 +103,11 @@ def test_delitem(self) -> None:
103103
del coords["x"]
104104
assert "x" not in coords
105105

106+
with pytest.raises(
107+
KeyError, match="'nonexistent' is not in coordinate variables"
108+
):
109+
del coords["nonexistent"]
110+
106111
def test_update(self) -> None:
107112
coords = Coordinates(coords={"x": [0, 1, 2]})
108113

0 commit comments

Comments
 (0)