From e01bca0d1748cfd3545b01e2457c8d221773567a Mon Sep 17 00:00:00 2001 From: mgunyho <20118130+mgunyho@users.noreply.github.com> Date: Thu, 17 Aug 2023 17:02:31 +0300 Subject: [PATCH 01/12] Show dims and coords in idxmin/idxmax error message if an invalid dim is given --- xarray/core/computation.py | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) diff --git a/xarray/core/computation.py b/xarray/core/computation.py index 685307fc8c3..87da75fd616 100644 --- a/xarray/core/computation.py +++ b/xarray/core/computation.py @@ -2046,9 +2046,13 @@ def _calc_idxminmax( raise ValueError("Must supply 'dim' argument for multidimensional arrays") if dim not in array.dims: - raise KeyError(f'Dimension "{dim}" not in dimension') + raise KeyError( + f"Dimension {dim!r} not found in array dimensions {array.dims!r}" + ) if dim not in array.coords: - raise KeyError(f'Dimension "{dim}" does not have coordinates') + raise KeyError( + f"Dimension {dim!r} is not one of the coordinates {tuple(array.coords.keys())}" + ) # These are dtypes with NaN values argmin and argmax can handle na_dtypes = "cfO" From 0ffc8bc4adece1116cfc67ae96f0f84404a8e2b3 Mon Sep 17 00:00:00 2001 From: mgunyho <20118130+mgunyho@users.noreply.github.com> Date: Thu, 17 Aug 2023 17:04:57 +0300 Subject: [PATCH 02/12] Show data dims in error messages of Dataset and update tests Remove _assert_empty, not used anymore --- xarray/core/dataset.py | 69 ++++++++++++++++++++++-------------- xarray/tests/test_dataset.py | 48 ++++++++++++++++++++----- 2 files changed, 82 insertions(+), 35 deletions(-) diff --git a/xarray/core/dataset.py b/xarray/core/dataset.py index f1a0cb9dc34..22de2892d50 100644 --- a/xarray/core/dataset.py +++ b/xarray/core/dataset.py @@ -212,11 +212,6 @@ def _get_virtual_variable( return ref_name, var_name, virtual_var -def _assert_empty(args: tuple, msg: str = "%s") -> None: - if args: - raise ValueError(msg % args) - - def _get_chunk(var: Variable, chunks, chunkmanager: ChunkManagerEntrypoint): """ Return map from each dim to chunk sizes, accounting for backend's preferred chunks. @@ -2634,7 +2629,7 @@ def chunk( bad_dims = chunks.keys() - self.dims.keys() if bad_dims: raise ValueError( - f"some chunks keys are not dimensions on this object: {bad_dims}" + f"chunks keys {tuple(bad_dims)} not found in dataset dimensions {tuple(self.dims)}" ) chunkmanager = guess_chunkmanager(chunked_array_type) @@ -4237,8 +4232,8 @@ def rename_dims( for k, v in dims_dict.items(): if k not in self.dims: raise ValueError( - f"cannot rename {k!r} because it is not a " - "dimension in this dataset" + f"cannot rename {k!r} because it is not found " + f"in the dimensions of this dataset {tuple(self.dims)}" ) if v in self.dims or v in self: raise ValueError( @@ -4360,7 +4355,7 @@ def swap_dims( if k not in self.dims: raise ValueError( f"cannot swap from dimension {k!r} because it is " - "not an existing dimension" + f"not one of the dimensions of this dataset {tuple(self.dims)}" ) if v in self.variables and self.variables[v].dims != (k,): raise ValueError( @@ -5444,10 +5439,10 @@ def unstack( else: dims = list(dim) - missing_dims = [d for d in dims if d not in self.dims] + missing_dims = set(dims) - set(self.dims) if missing_dims: raise ValueError( - f"Dataset does not contain the dimensions: {missing_dims}" + f"Dimensions {tuple(missing_dims)} not found in dataset dimensions {tuple(self.dims)}" ) # each specified dimension must have exactly one multi-index @@ -5832,7 +5827,10 @@ def drop_indexes( if errors == "raise": invalid_coords = coord_names - self._coord_names if invalid_coords: - raise ValueError(f"those coordinates don't exist: {invalid_coords}") + raise ValueError( + f"The coordinates {tuple(invalid_coords)} are not found in the " + f"dataset coordinates {tuple(self.coords.keys())}" + ) unindexed_coords = set(coord_names) - set(self._indexes) if unindexed_coords: @@ -6080,7 +6078,7 @@ def drop_dims( missing_dims = drop_dims - set(self.dims) if missing_dims: raise ValueError( - f"Dataset does not contain the dimensions: {missing_dims}" + f"Dimensions {tuple(missing_dims)} not found in dataset dimensions {tuple(self.dims)}" ) drop_vars = {k for k, v in self._variables.items() if set(v.dims) & drop_dims} @@ -6240,7 +6238,9 @@ def dropna( # depending on the order of the supplied axes. if dim not in self.dims: - raise ValueError(f"{dim} must be a single dataset dimension") + raise ValueError( + f"Dimension {dim!r} not found in dataset dimensions {tuple(self.dims)}" + ) if subset is None: subset = iter(self.data_vars) @@ -6721,10 +6721,10 @@ def reduce( else: dims = set(dim) - missing_dimensions = [d for d in dims if d not in self.dims] + missing_dimensions = tuple(d for d in dims if d not in self.dims) if missing_dimensions: raise ValueError( - f"Dataset does not contain the dimensions: {missing_dimensions}" + f"Dimensions {missing_dimensions} not found in dataset dimensions {tuple(self.dims)}" ) if keep_attrs is None: @@ -7707,9 +7707,11 @@ def shift( foo (x) object nan nan 'a' 'b' 'c' """ shifts = either_dict_or_kwargs(shifts, shifts_kwargs, "shift") - invalid = [k for k in shifts if k not in self.dims] + invalid = tuple(k for k in shifts if k not in self.dims) if invalid: - raise ValueError(f"dimensions {invalid!r} do not exist") + raise ValueError( + f"Dimensions {invalid} not found in dataset dimensions {tuple(self.dims)}" + ) variables = {} for name, var in self.variables.items(): @@ -7786,7 +7788,9 @@ def roll( shifts = either_dict_or_kwargs(shifts, shifts_kwargs, "roll") invalid = [k for k in shifts if k not in self.dims] if invalid: - raise ValueError(f"dimensions {invalid!r} do not exist") + raise ValueError( + f"Dimensions {invalid} not found in dataset dimensions {tuple(self.dims)}" + ) unrolled_vars: tuple[Hashable, ...] @@ -8035,10 +8039,11 @@ def quantile( else: dims = set(dim) - _assert_empty( - tuple(d for d in dims if d not in self.dims), - "Dataset does not contain the dimensions: %s", - ) + invalid_dims = set(dims) - set(self.dims) + if invalid_dims: + raise ValueError( + f"Dimensions {tuple(invalid_dims)} not found in dataset dimensions {tuple(self.dims)}" + ) q = np.asarray(q, dtype=np.float64) @@ -8114,7 +8119,9 @@ def rank( ) if dim not in self.dims: - raise ValueError(f"Dataset does not contain the dimension: {dim}") + raise ValueError( + f"Dimension {dim!r} not found in dataset dimensions {tuple(self.dims)}" + ) variables = {} for name, var in self.variables.items(): @@ -8164,7 +8171,10 @@ def differentiate( from xarray.core.variable import Variable if coord not in self.variables and coord not in self.dims: - raise ValueError(f"Coordinate {coord} does not exist.") + variables_and_dims = tuple(set(self.variables.keys()).union(self.dims)) + raise ValueError( + f"Coordinate {coord!r} not found in variables or dimensions {variables_and_dims}." + ) coord_var = self[coord].variable if coord_var.ndim != 1: @@ -8266,7 +8276,10 @@ def _integrate_one(self, coord, datetime_unit=None, cumulative=False): from xarray.core.variable import Variable if coord not in self.variables and coord not in self.dims: - raise ValueError(f"Coordinate {coord} does not exist.") + variables_and_dims = tuple(set(self.variables.keys()).union(self.dims)) + raise ValueError( + f"Coordinate {coord!r} not found in variables or dimensions {variables_and_dims}." + ) coord_var = self[coord].variable if coord_var.ndim != 1: @@ -9768,7 +9781,9 @@ def drop_duplicates( missing_dims = set(dims) - set(self.dims) if missing_dims: - raise ValueError(f"'{missing_dims}' not found in dimensions") + raise ValueError( + f"Dimensions {tuple(missing_dims)} not found in dataset dimensions {tuple(self.dims)}" + ) indexes = {dim: ~self.get_index(dim).duplicated(keep=keep) for dim in dims} return self.isel(indexes) diff --git a/xarray/tests/test_dataset.py b/xarray/tests/test_dataset.py index e119cfe9bc6..b457b52d733 100644 --- a/xarray/tests/test_dataset.py +++ b/xarray/tests/test_dataset.py @@ -1158,7 +1158,12 @@ def get_dask_names(ds): for k, v in new_dask_names.items(): assert v == orig_dask_names[k] - with pytest.raises(ValueError, match=r"some chunks"): + with pytest.raises( + ValueError, + match=re.escape( + "chunks keys ('foo',) not found in dataset dimensions ('dim2', 'dim3', 'time', 'dim1')" + ), + ): data.chunk({"foo": 10}) @requires_dask @@ -2780,7 +2785,10 @@ def test_drop_indexes(self) -> None: assert type(actual.x.variable) is Variable assert type(actual.y.variable) is Variable - with pytest.raises(ValueError, match="those coordinates don't exist"): + with pytest.raises( + ValueError, + match=r"The coordinates \('not_a_coord',\) are not found in the dataset coordinates", + ): ds.drop_indexes("not_a_coord") with pytest.raises(ValueError, match="those coordinates do not have an index"): @@ -3672,7 +3680,12 @@ def test_unstack(self) -> None: def test_unstack_errors(self) -> None: ds = Dataset({"x": [1, 2, 3]}) - with pytest.raises(ValueError, match=r"does not contain the dimensions"): + with pytest.raises( + ValueError, + match=re.escape( + "Dimensions ('foo',) not found in dataset dimensions ('x',)" + ), + ): ds.unstack("foo") with pytest.raises(ValueError, match=r".*do not have exactly one multi-index"): ds.unstack("x") @@ -4962,7 +4975,10 @@ def test_dropna(self) -> None: expected = ds.isel(a=[1, 3]) assert_identical(actual, ds) - with pytest.raises(ValueError, match=r"a single dataset dimension"): + with pytest.raises( + ValueError, + match=r"'foo' not found in dataset dimensions \('a', 'b'\)", + ): ds.dropna("foo") with pytest.raises(ValueError, match=r"invalid how"): ds.dropna("a", how="somehow") # type: ignore @@ -5280,7 +5296,10 @@ def test_mean_uint_dtype(self) -> None: def test_reduce_bad_dim(self) -> None: data = create_test_data() - with pytest.raises(ValueError, match=r"Dataset does not contain"): + with pytest.raises( + ValueError, + match=r"Dimensions \('bad_dim',\) not found in dataset dimensions", + ): data.mean(dim="bad_dim") def test_reduce_cumsum(self) -> None: @@ -5306,7 +5325,10 @@ def test_reduce_cumsum(self) -> None: @pytest.mark.parametrize("func", ["cumsum", "cumprod"]) def test_reduce_cumsum_test_dims(self, reduct, expected, func) -> None: data = create_test_data() - with pytest.raises(ValueError, match=r"Dataset does not contain"): + with pytest.raises( + ValueError, + match=r"Dimensions \('bad_dim',\) not found in dataset dimensions", + ): getattr(data, func)(dim="bad_dim") # ensure dimensions are correct @@ -5554,7 +5576,12 @@ def test_rank(self) -> None: assert list(z.coords) == list(ds.coords) assert list(x.coords) == list(y.coords) # invalid dim - with pytest.raises(ValueError, match=r"does not contain"): + with pytest.raises( + ValueError, + match=re.escape( + "Dimension 'invalid_dim' not found in dataset dimensions ('dim3', 'dim1')" + ), + ): x.rank("invalid_dim") def test_rank_use_bottleneck(self) -> None: @@ -7087,7 +7114,12 @@ def test_drop_duplicates_1d(self, keep) -> None: result = ds.drop_duplicates("time", keep=keep) assert_equal(expected, result) - with pytest.raises(ValueError, match="['space'] not found"): + with pytest.raises( + ValueError, + match=re.escape( + "Dimensions ('space',) not found in dataset dimensions ('time',)" + ), + ): ds.drop_duplicates("space", keep=keep) From dc6edcbb5665ad554e6e7bd0a44a5a864a0dd536 Mon Sep 17 00:00:00 2001 From: mgunyho <20118130+mgunyho@users.noreply.github.com> Date: Fri, 18 Aug 2023 21:35:39 +0300 Subject: [PATCH 03/12] Update test for dataarray --- xarray/tests/test_dataarray.py | 20 +++++++++++++++----- 1 file changed, 15 insertions(+), 5 deletions(-) diff --git a/xarray/tests/test_dataarray.py b/xarray/tests/test_dataarray.py index b4efe4ab2a7..28c47767619 100644 --- a/xarray/tests/test_dataarray.py +++ b/xarray/tests/test_dataarray.py @@ -1,6 +1,7 @@ from __future__ import annotations import pickle +import re import sys import warnings from collections.abc import Hashable @@ -4886,8 +4887,10 @@ def test_idxmin( else: ar0 = ar0_raw - # dim doesn't exist - with pytest.raises(KeyError): + with pytest.raises( + KeyError, + match=r"'spam' not found in array dimensions", + ): ar0.idxmin(dim="spam") # Scalar Dataarray @@ -4999,8 +5002,10 @@ def test_idxmax( else: ar0 = ar0_raw - # dim doesn't exist - with pytest.raises(KeyError): + with pytest.raises( + KeyError, + match=r"'spam' not found in array dimensions", + ): ar0.idxmax(dim="spam") # Scalar Dataarray @@ -6954,7 +6959,12 @@ def test_drop_duplicates_1d(self, keep) -> None: result = da.drop_duplicates("time", keep=keep) assert_equal(expected, result) - with pytest.raises(ValueError, match="['space'] not found"): + with pytest.raises( + ValueError, + match=re.escape( + "Dimensions ('space',) not found in dataset dimensions ('time',)" + ), + ): da.drop_duplicates("space", keep=keep) def test_drop_duplicates_2d(self) -> None: From d84ab2a3e5d86045399696fab4bd77bed110d47a Mon Sep 17 00:00:00 2001 From: mgunyho <20118130+mgunyho@users.noreply.github.com> Date: Sat, 19 Aug 2023 14:35:54 +0300 Subject: [PATCH 04/12] Show data dims in error messages of weighted and update test --- xarray/core/weighted.py | 5 +++-- xarray/tests/test_weighted.py | 13 ++++++++----- 2 files changed, 11 insertions(+), 7 deletions(-) diff --git a/xarray/core/weighted.py b/xarray/core/weighted.py index e21091fad6b..82ffe684ec7 100644 --- a/xarray/core/weighted.py +++ b/xarray/core/weighted.py @@ -198,10 +198,11 @@ def _check_dim(self, dim: Dims): dims = [dim] if dim else [] else: dims = list(dim) - missing_dims = set(dims) - set(self.obj.dims) - set(self.weights.dims) + all_dims = set(self.obj.dims).union(set(self.weights.dims)) + missing_dims = set(dims) - all_dims if missing_dims: raise ValueError( - f"{self.__class__.__name__} does not contain the dimensions: {missing_dims}" + f"Dimensions {tuple(missing_dims)} not found in {self.__class__.__name__} dimensions {tuple(all_dims)}" ) @staticmethod diff --git a/xarray/tests/test_weighted.py b/xarray/tests/test_weighted.py index e2530d41fbe..628d6310945 100644 --- a/xarray/tests/test_weighted.py +++ b/xarray/tests/test_weighted.py @@ -782,9 +782,12 @@ def test_weighted_bad_dim(operation, as_dataset): if operation == "quantile": kwargs["q"] = 0.5 - error_msg = ( - f"{data.__class__.__name__}Weighted" - " does not contain the dimensions: {'bad_dim'}" - ) - with pytest.raises(ValueError, match=error_msg): + with pytest.raises( + ValueError, + match=( + f"Dimensions \\('bad_dim',\\) not found in {data.__class__.__name__}Weighted " + # the order of (dim_0, dim_1) varies + "dimensions \\(('dim_0', 'dim_1'|'dim_1', 'dim_0')\\)" + ), + ): getattr(data.weighted(weights), operation)(**kwargs) From 1388bf05ee58c4ab4e5e673de6715c4133a18871 Mon Sep 17 00:00:00 2001 From: mgunyho <20118130+mgunyho@users.noreply.github.com> Date: Thu, 17 Aug 2023 18:29:43 +0300 Subject: [PATCH 05/12] Show dimensions in error message of group_indexers_by_index --- xarray/core/indexing.py | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/xarray/core/indexing.py b/xarray/core/indexing.py index acab9ccc60b..0bb7004fca4 100644 --- a/xarray/core/indexing.py +++ b/xarray/core/indexing.py @@ -143,7 +143,10 @@ def group_indexers_by_index( elif key in obj.coords: raise KeyError(f"no index found for coordinate {key!r}") elif key not in obj.dims: - raise KeyError(f"{key!r} is not a valid dimension or coordinate") + raise KeyError( + f"{key!r} is not a valid dimension or coordinate for " + f"{obj.__class__.__name__} with dimensions {obj.dims!r}" + ) elif len(options): raise ValueError( f"cannot supply selection options {options!r} for dimension {key!r}" From 99ca40bddbe1614166426d2e2a3b5fb6f08ae737 Mon Sep 17 00:00:00 2001 From: mgunyho <20118130+mgunyho@users.noreply.github.com> Date: Sat, 19 Aug 2023 12:32:11 +0300 Subject: [PATCH 06/12] List coordinates in concat error message, update test --- xarray/core/concat.py | 13 ++++++++----- xarray/tests/test_concat.py | 5 ++++- 2 files changed, 12 insertions(+), 6 deletions(-) diff --git a/xarray/core/concat.py b/xarray/core/concat.py index d7aad8c7188..a76bb6b0033 100644 --- a/xarray/core/concat.py +++ b/xarray/core/concat.py @@ -391,17 +391,20 @@ def process_subset_opt(opt, subset): else: raise ValueError(f"unexpected value for {subset}: {opt}") else: - invalid_vars = [k for k in opt if k not in getattr(datasets[0], subset)] + valid_vars = tuple(getattr(datasets[0], subset)) + invalid_vars = [k for k in opt if k not in valid_vars] if invalid_vars: if subset == "coords": raise ValueError( - "some variables in coords are not coordinates on " - f"the first dataset: {invalid_vars}" + f"the variables {invalid_vars} in coords are not " + f"found in the coordinates of the first dataset {valid_vars}" ) else: + # note: data_vars are not listed in the error message here, + # because there may be lots of them raise ValueError( - "some variables in data_vars are not data variables " - f"on the first dataset: {invalid_vars}" + f"the variables {invalid_vars} in data_vars are not " + f"found in the data variables of the first dataset" ) concat_over.update(opt) diff --git a/xarray/tests/test_concat.py b/xarray/tests/test_concat.py index 030f653e031..543b6d33cb9 100644 --- a/xarray/tests/test_concat.py +++ b/xarray/tests/test_concat.py @@ -614,9 +614,12 @@ def test_concat_errors(self): with pytest.raises(ValueError, match=r"must supply at least one"): concat([], "dim1") - with pytest.raises(ValueError, match=r"are not coordinates"): + with pytest.raises(ValueError, match=r"are not found in the coordinates"): concat([data, data], "new_dim", coords=["not_found"]) + with pytest.raises(ValueError, match=r"are not found in the data variables"): + concat([data, data], "new_dim", data_vars=["not_found"]) + with pytest.raises(ValueError, match=r"global attributes not"): # call deepcopy seperately to get unique attrs data0 = deepcopy(split_data[0]) From 595c735db58eda3ad5738f8947dfb0f9952b795e Mon Sep 17 00:00:00 2001 From: mgunyho <20118130+mgunyho@users.noreply.github.com> Date: Sat, 19 Aug 2023 12:53:55 +0300 Subject: [PATCH 07/12] List coordinates in coords __delitem__ error message, update tests --- xarray/core/coordinates.py | 8 ++++++-- xarray/tests/test_coordinates.py | 5 +++++ 2 files changed, 11 insertions(+), 2 deletions(-) diff --git a/xarray/core/coordinates.py b/xarray/core/coordinates.py index bebf9362532..77fd18ab1d8 100644 --- a/xarray/core/coordinates.py +++ b/xarray/core/coordinates.py @@ -769,7 +769,9 @@ def __delitem__(self, key: Hashable) -> None: if key in self: del self._data[key] else: - raise KeyError(f"{key!r} is not a coordinate variable.") + raise KeyError( + f"{key!r} is not in coordinate variables {tuple(self.keys())}" + ) def _ipython_key_completions_(self): """Provide method for the key-autocompletions in IPython.""" @@ -855,7 +857,9 @@ def to_dataset(self) -> Dataset: def __delitem__(self, key: Hashable) -> None: if key not in self: - raise KeyError(f"{key!r} is not a coordinate variable.") + raise KeyError( + f"{key!r} is not in coordinate variables {tuple(self.keys())}" + ) assert_no_index_corrupted(self._data.xindexes, {key}) del self._data._coords[key] diff --git a/xarray/tests/test_coordinates.py b/xarray/tests/test_coordinates.py index 27abc6c0ae2..ef73371dfe4 100644 --- a/xarray/tests/test_coordinates.py +++ b/xarray/tests/test_coordinates.py @@ -103,6 +103,11 @@ def test_delitem(self) -> None: del coords["x"] assert "x" not in coords + with pytest.raises( + KeyError, match="'nonexistent' is not in coordinate variables" + ): + del coords["nonexistent"] + def test_update(self) -> None: coords = Coordinates(coords={"x": [0, 1, 2]}) From a498ef58728d02066fc129431182ffcec596ca4d Mon Sep 17 00:00:00 2001 From: mgunyho <20118130+mgunyho@users.noreply.github.com> Date: Sat, 19 Aug 2023 13:31:23 +0300 Subject: [PATCH 08/12] Show list of names in error message of PandasMultiIndex.sel, update test --- xarray/core/indexes.py | 6 +++--- xarray/tests/test_indexes.py | 5 ++++- 2 files changed, 7 insertions(+), 4 deletions(-) diff --git a/xarray/core/indexes.py b/xarray/core/indexes.py index b5e396963a1..dffc012c582 100644 --- a/xarray/core/indexes.py +++ b/xarray/core/indexes.py @@ -1203,12 +1203,12 @@ def sel(self, labels, method=None, tolerance=None) -> IndexSelResult: coord_name, label = next(iter(labels.items())) if is_dict_like(label): - invalid_levels = [ + invalid_levels = tuple( name for name in label if name not in self.index.names - ] + ) if invalid_levels: raise ValueError( - f"invalid multi-index level names {invalid_levels}" + f"multi-index level names {invalid_levels} not found in indexes {tuple(self.index.names)}" ) return self.sel(label) diff --git a/xarray/tests/test_indexes.py b/xarray/tests/test_indexes.py index 05d748541ed..866c2ef7e85 100644 --- a/xarray/tests/test_indexes.py +++ b/xarray/tests/test_indexes.py @@ -487,7 +487,10 @@ def test_sel(self) -> None: index.sel({"x": 0}) with pytest.raises(ValueError, match=r"cannot provide labels for both.*"): index.sel({"one": 0, "x": "a"}) - with pytest.raises(ValueError, match=r"invalid multi-index level names"): + with pytest.raises( + ValueError, + match=r"multi-index level names \('three',\) not found in indexes", + ): index.sel({"x": {"three": 0}}) with pytest.raises(IndexError): index.sel({"x": (slice(None), 1, "no_level")}) From 67addd7044a9da79bb8c156c0f1550bd67d6b9ef Mon Sep 17 00:00:00 2001 From: mgunyho <20118130+mgunyho@users.noreply.github.com> Date: Sat, 19 Aug 2023 14:02:40 +0300 Subject: [PATCH 09/12] Show list of dimensions in error messages of Rolling and Coarsen, update tests --- xarray/core/rolling.py | 18 +++++++++++++----- xarray/tests/test_coarsen.py | 5 ++++- xarray/tests/test_rolling.py | 11 +++++++++++ 3 files changed, 28 insertions(+), 6 deletions(-) diff --git a/xarray/core/rolling.py b/xarray/core/rolling.py index 916fabe42ac..dcd01a0e0f1 100644 --- a/xarray/core/rolling.py +++ b/xarray/core/rolling.py @@ -102,6 +102,14 @@ def __init__( self.center = self._mapping_to_list(center, default=False) self.obj: T_Xarray = obj + missing_dims = tuple(dim for dim in self.dim if dim not in self.obj.dims) + if missing_dims: + # NOTE: we raise KeyError here but ValueError in Coarsen. + raise KeyError( + f"Window dimensions {missing_dims} not found in {self.obj.__class__.__name__} " + f"dimensions {tuple(self.obj.dims)}" + ) + # attributes if min_periods is not None and min_periods <= 0: raise ValueError("min_periods must be greater than zero or None") @@ -624,8 +632,7 @@ def __init__( xarray.DataArray.groupby """ super().__init__(obj, windows, min_periods, center) - if any(d not in self.obj.dims for d in self.dim): - raise KeyError(self.dim) + # Keep each Rolling object as a dictionary self.rollings = {} for key, da in self.obj.data_vars.items(): @@ -839,10 +846,11 @@ def __init__( self.side = side self.boundary = boundary - absent_dims = [dim for dim in windows.keys() if dim not in self.obj.dims] - if absent_dims: + missing_dims = tuple(dim for dim in windows.keys() if dim not in self.obj.dims) + if missing_dims: raise ValueError( - f"Dimensions {absent_dims!r} not found in {self.obj.__class__.__name__}." + f"Window dimensions {missing_dims} not found in {self.obj.__class__.__name__} " + f"dimensions {tuple(self.obj.dims)}" ) if not utils.is_dict_like(coord_func): coord_func = {d: coord_func for d in self.obj.dims} # type: ignore[misc] diff --git a/xarray/tests/test_coarsen.py b/xarray/tests/test_coarsen.py index d58361afdd3..e345ae691ec 100644 --- a/xarray/tests/test_coarsen.py +++ b/xarray/tests/test_coarsen.py @@ -17,7 +17,10 @@ def test_coarsen_absent_dims_error(ds: Dataset) -> None: - with pytest.raises(ValueError, match=r"not found in Dataset."): + with pytest.raises( + ValueError, + match=r"Window dimensions \('foo',\) not found in Dataset dimensions", + ): ds.coarsen(foo=2) diff --git a/xarray/tests/test_rolling.py b/xarray/tests/test_rolling.py index 73aebc1b1f0..0e3c0874a0a 100644 --- a/xarray/tests/test_rolling.py +++ b/xarray/tests/test_rolling.py @@ -77,6 +77,12 @@ def test_rolling_properties(self, da) -> None: with pytest.raises(ValueError, match="min_periods must be greater than zero"): da.rolling(time=2, min_periods=0) + with pytest.raises( + KeyError, + match=r"\('foo',\) not found in DataArray dimensions", + ): + da.rolling(foo=2) + @pytest.mark.parametrize("name", ("sum", "mean", "std", "min", "max", "median")) @pytest.mark.parametrize("center", (True, False, None)) @pytest.mark.parametrize("min_periods", (1, None)) @@ -540,6 +546,11 @@ def test_rolling_properties(self, ds) -> None: ds.rolling(time=2, min_periods=0) with pytest.raises(KeyError, match="time2"): ds.rolling(time2=2) + with pytest.raises( + KeyError, + match=r"\('foo',\) not found in Dataset dimensions", + ): + ds.rolling(foo=2) @pytest.mark.parametrize( "name", ("sum", "mean", "std", "var", "min", "max", "median") From 7fa50262ddb113166b3f72de5492d60ce49bf09a Mon Sep 17 00:00:00 2001 From: mgunyho <20118130+mgunyho@users.noreply.github.com> Date: Sat, 19 Aug 2023 15:52:26 +0300 Subject: [PATCH 10/12] Show dims in Variable.concat error message as tuple for consistency --- xarray/core/variable.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/xarray/core/variable.py b/xarray/core/variable.py index c89545c43ae..b9ecbb3ee06 100644 --- a/xarray/core/variable.py +++ b/xarray/core/variable.py @@ -2119,7 +2119,7 @@ def concat( for var in variables: if var.dims != first_var_dims: raise ValueError( - f"Variable has dimensions {list(var.dims)} but first Variable has dimensions {list(first_var_dims)}" + f"Variable has dimensions {tuple(var.dims)} but first Variable has dimensions {tuple(first_var_dims)}" ) return cls(dims, data, attrs, encoding, fastpath=True) From fdcad9a7af22fbed8e484deb2eeed2f8f94a41a7 Mon Sep 17 00:00:00 2001 From: mgunyho <20118130+mgunyho@users.noreply.github.com> Date: Sat, 26 Aug 2023 21:27:20 +0300 Subject: [PATCH 11/12] Change 'dataset' to 'data' in error messages --- xarray/core/dataset.py | 20 ++++++++++---------- xarray/tests/test_dataarray.py | 2 +- xarray/tests/test_dataset.py | 16 +++++++--------- 3 files changed, 18 insertions(+), 20 deletions(-) diff --git a/xarray/core/dataset.py b/xarray/core/dataset.py index 22de2892d50..e71e7f4ec21 100644 --- a/xarray/core/dataset.py +++ b/xarray/core/dataset.py @@ -2629,7 +2629,7 @@ def chunk( bad_dims = chunks.keys() - self.dims.keys() if bad_dims: raise ValueError( - f"chunks keys {tuple(bad_dims)} not found in dataset dimensions {tuple(self.dims)}" + f"chunks keys {tuple(bad_dims)} not found in data dimensions {tuple(self.dims)}" ) chunkmanager = guess_chunkmanager(chunked_array_type) @@ -5442,7 +5442,7 @@ def unstack( missing_dims = set(dims) - set(self.dims) if missing_dims: raise ValueError( - f"Dimensions {tuple(missing_dims)} not found in dataset dimensions {tuple(self.dims)}" + f"Dimensions {tuple(missing_dims)} not found in data dimensions {tuple(self.dims)}" ) # each specified dimension must have exactly one multi-index @@ -6078,7 +6078,7 @@ def drop_dims( missing_dims = drop_dims - set(self.dims) if missing_dims: raise ValueError( - f"Dimensions {tuple(missing_dims)} not found in dataset dimensions {tuple(self.dims)}" + f"Dimensions {tuple(missing_dims)} not found in data dimensions {tuple(self.dims)}" ) drop_vars = {k for k, v in self._variables.items() if set(v.dims) & drop_dims} @@ -6239,7 +6239,7 @@ def dropna( if dim not in self.dims: raise ValueError( - f"Dimension {dim!r} not found in dataset dimensions {tuple(self.dims)}" + f"Dimension {dim!r} not found in data dimensions {tuple(self.dims)}" ) if subset is None: @@ -6724,7 +6724,7 @@ def reduce( missing_dimensions = tuple(d for d in dims if d not in self.dims) if missing_dimensions: raise ValueError( - f"Dimensions {missing_dimensions} not found in dataset dimensions {tuple(self.dims)}" + f"Dimensions {missing_dimensions} not found in data dimensions {tuple(self.dims)}" ) if keep_attrs is None: @@ -7710,7 +7710,7 @@ def shift( invalid = tuple(k for k in shifts if k not in self.dims) if invalid: raise ValueError( - f"Dimensions {invalid} not found in dataset dimensions {tuple(self.dims)}" + f"Dimensions {invalid} not found in data dimensions {tuple(self.dims)}" ) variables = {} @@ -7789,7 +7789,7 @@ def roll( invalid = [k for k in shifts if k not in self.dims] if invalid: raise ValueError( - f"Dimensions {invalid} not found in dataset dimensions {tuple(self.dims)}" + f"Dimensions {invalid} not found in data dimensions {tuple(self.dims)}" ) unrolled_vars: tuple[Hashable, ...] @@ -8042,7 +8042,7 @@ def quantile( invalid_dims = set(dims) - set(self.dims) if invalid_dims: raise ValueError( - f"Dimensions {tuple(invalid_dims)} not found in dataset dimensions {tuple(self.dims)}" + f"Dimensions {tuple(invalid_dims)} not found in data dimensions {tuple(self.dims)}" ) q = np.asarray(q, dtype=np.float64) @@ -8120,7 +8120,7 @@ def rank( if dim not in self.dims: raise ValueError( - f"Dimension {dim!r} not found in dataset dimensions {tuple(self.dims)}" + f"Dimension {dim!r} not found in data dimensions {tuple(self.dims)}" ) variables = {} @@ -9782,7 +9782,7 @@ def drop_duplicates( missing_dims = set(dims) - set(self.dims) if missing_dims: raise ValueError( - f"Dimensions {tuple(missing_dims)} not found in dataset dimensions {tuple(self.dims)}" + f"Dimensions {tuple(missing_dims)} not found in data dimensions {tuple(self.dims)}" ) indexes = {dim: ~self.get_index(dim).duplicated(keep=keep) for dim in dims} diff --git a/xarray/tests/test_dataarray.py b/xarray/tests/test_dataarray.py index 28c47767619..2a28939df41 100644 --- a/xarray/tests/test_dataarray.py +++ b/xarray/tests/test_dataarray.py @@ -6962,7 +6962,7 @@ def test_drop_duplicates_1d(self, keep) -> None: with pytest.raises( ValueError, match=re.escape( - "Dimensions ('space',) not found in dataset dimensions ('time',)" + "Dimensions ('space',) not found in data dimensions ('time',)" ), ): da.drop_duplicates("space", keep=keep) diff --git a/xarray/tests/test_dataset.py b/xarray/tests/test_dataset.py index b457b52d733..cfece5865be 100644 --- a/xarray/tests/test_dataset.py +++ b/xarray/tests/test_dataset.py @@ -1161,7 +1161,7 @@ def get_dask_names(ds): with pytest.raises( ValueError, match=re.escape( - "chunks keys ('foo',) not found in dataset dimensions ('dim2', 'dim3', 'time', 'dim1')" + "chunks keys ('foo',) not found in data dimensions ('dim2', 'dim3', 'time', 'dim1')" ), ): data.chunk({"foo": 10}) @@ -3682,9 +3682,7 @@ def test_unstack_errors(self) -> None: ds = Dataset({"x": [1, 2, 3]}) with pytest.raises( ValueError, - match=re.escape( - "Dimensions ('foo',) not found in dataset dimensions ('x',)" - ), + match=re.escape("Dimensions ('foo',) not found in data dimensions ('x',)"), ): ds.unstack("foo") with pytest.raises(ValueError, match=r".*do not have exactly one multi-index"): @@ -4977,7 +4975,7 @@ def test_dropna(self) -> None: with pytest.raises( ValueError, - match=r"'foo' not found in dataset dimensions \('a', 'b'\)", + match=r"'foo' not found in data dimensions \('a', 'b'\)", ): ds.dropna("foo") with pytest.raises(ValueError, match=r"invalid how"): @@ -5298,7 +5296,7 @@ def test_reduce_bad_dim(self) -> None: data = create_test_data() with pytest.raises( ValueError, - match=r"Dimensions \('bad_dim',\) not found in dataset dimensions", + match=r"Dimensions \('bad_dim',\) not found in data dimensions", ): data.mean(dim="bad_dim") @@ -5327,7 +5325,7 @@ def test_reduce_cumsum_test_dims(self, reduct, expected, func) -> None: data = create_test_data() with pytest.raises( ValueError, - match=r"Dimensions \('bad_dim',\) not found in dataset dimensions", + match=r"Dimensions \('bad_dim',\) not found in data dimensions", ): getattr(data, func)(dim="bad_dim") @@ -5579,7 +5577,7 @@ def test_rank(self) -> None: with pytest.raises( ValueError, match=re.escape( - "Dimension 'invalid_dim' not found in dataset dimensions ('dim3', 'dim1')" + "Dimension 'invalid_dim' not found in data dimensions ('dim3', 'dim1')" ), ): x.rank("invalid_dim") @@ -7117,7 +7115,7 @@ def test_drop_duplicates_1d(self, keep) -> None: with pytest.raises( ValueError, match=re.escape( - "Dimensions ('space',) not found in dataset dimensions ('time',)" + "Dimensions ('space',) not found in data dimensions ('time',)" ), ): ds.drop_duplicates("space", keep=keep) From ee1ced1c040545f7b2b4c0db595bc261042fc9e8 Mon Sep 17 00:00:00 2001 From: mgunyho <20118130+mgunyho@users.noreply.github.com> Date: Sat, 19 Aug 2023 15:59:13 +0300 Subject: [PATCH 12/12] Update whats-new --- doc/whats-new.rst | 2 ++ 1 file changed, 2 insertions(+) diff --git a/doc/whats-new.rst b/doc/whats-new.rst index 157795f08d1..80bf9f09bdb 100644 --- a/doc/whats-new.rst +++ b/doc/whats-new.rst @@ -65,6 +65,8 @@ Documentation Internal Changes ~~~~~~~~~~~~~~~~ +- Many error messages related to invalid dimensions or coordinates now always show the list of valid dims/coords (:pull:`8079`). + By `András Gunyhó `_. .. _whats-new.2023.08.0: