Skip to content

Commit d46c5b6

Browse files
Warn on repeated dimension names during construction (#8491)
Co-authored-by: pre-commit-ci[bot] <66853113+pre-commit-ci[bot]@users.noreply.github.com>
1 parent 8ea565d commit d46c5b6

File tree

6 files changed

+37
-6
lines changed

6 files changed

+37
-6
lines changed

doc/whats-new.rst

+6
Original file line numberDiff line numberDiff line change
@@ -34,6 +34,12 @@ New Features
3434
Breaking changes
3535
~~~~~~~~~~~~~~~~
3636

37+
- Explicitly warn when creating xarray objects with repeated dimension names.
38+
Such objects will also now raise when :py:meth:`DataArray.get_axis_num` is called,
39+
which means many functions will raise.
40+
This latter change is technically a breaking change, but whilst allowed,
41+
this behaviour was never actually supported! (:issue:`3731`, :pull:`8491`)
42+
By `Tom Nicholas <https://github.com/TomNicholas>`_.
3743

3844
Deprecations
3945
~~~~~~~~~~~~

xarray/core/common.py

+2
Original file line numberDiff line numberDiff line change
@@ -21,6 +21,7 @@
2121
emit_user_level_warning,
2222
is_scalar,
2323
)
24+
from xarray.namedarray.core import _raise_if_any_duplicate_dimensions
2425

2526
try:
2627
import cftime
@@ -217,6 +218,7 @@ def get_axis_num(self, dim: Hashable | Iterable[Hashable]) -> int | tuple[int, .
217218
return self._get_axis_num(dim)
218219

219220
def _get_axis_num(self: Any, dim: Hashable) -> int:
221+
_raise_if_any_duplicate_dimensions(self.dims)
220222
try:
221223
return self.dims.index(dim)
222224
except ValueError:

xarray/core/variable.py

+3-6
Original file line numberDiff line numberDiff line change
@@ -46,7 +46,7 @@
4646
is_duck_array,
4747
maybe_coerce_to_str,
4848
)
49-
from xarray.namedarray.core import NamedArray
49+
from xarray.namedarray.core import NamedArray, _raise_if_any_duplicate_dimensions
5050

5151
NON_NUMPY_SUPPORTED_ARRAY_TYPES = (
5252
indexing.ExplicitlyIndexed,
@@ -2876,11 +2876,8 @@ def _unified_dims(variables):
28762876
all_dims = {}
28772877
for var in variables:
28782878
var_dims = var.dims
2879-
if len(set(var_dims)) < len(var_dims):
2880-
raise ValueError(
2881-
"broadcasting cannot handle duplicate "
2882-
f"dimensions: {list(var_dims)!r}"
2883-
)
2879+
_raise_if_any_duplicate_dimensions(var_dims, err_context="Broadcasting")
2880+
28842881
for d, s in zip(var_dims, var.shape):
28852882
if d not in all_dims:
28862883
all_dims[d] = s

xarray/namedarray/core.py

+20
Original file line numberDiff line numberDiff line change
@@ -481,6 +481,15 @@ def _parse_dimensions(self, dims: _DimsLike) -> _Dims:
481481
f"dimensions {dims} must have the same length as the "
482482
f"number of data dimensions, ndim={self.ndim}"
483483
)
484+
if len(set(dims)) < len(dims):
485+
repeated_dims = set([d for d in dims if dims.count(d) > 1])
486+
warnings.warn(
487+
f"Duplicate dimension names present: dimensions {repeated_dims} appear more than once in dims={dims}. "
488+
"We do not yet support duplicate dimension names, but we do allow initial construction of the object. "
489+
"We recommend you rename the dims immediately to become distinct, as most xarray functionality is likely to fail silently if you do not. "
490+
"To rename the dimensions you will need to set the ``.dims`` attribute of each variable, ``e.g. var.dims=('x0', 'x1')``.",
491+
UserWarning,
492+
)
484493
return dims
485494

486495
@property
@@ -651,6 +660,7 @@ def get_axis_num(self, dim: Hashable | Iterable[Hashable]) -> int | tuple[int, .
651660
return self._get_axis_num(dim)
652661

653662
def _get_axis_num(self: Any, dim: Hashable) -> int:
663+
_raise_if_any_duplicate_dimensions(self.dims)
654664
try:
655665
return self.dims.index(dim) # type: ignore[no-any-return]
656666
except ValueError:
@@ -846,3 +856,13 @@ def _to_dense(self) -> NamedArray[Any, _DType_co]:
846856

847857

848858
_NamedArray = NamedArray[Any, np.dtype[_ScalarType_co]]
859+
860+
861+
def _raise_if_any_duplicate_dimensions(
862+
dims: _Dims, err_context: str = "This function"
863+
) -> None:
864+
if len(set(dims)) < len(dims):
865+
repeated_dims = set([d for d in dims if dims.count(d) > 1])
866+
raise ValueError(
867+
f"{err_context} cannot handle duplicate dimensions, but dimensions {repeated_dims} appear more than once on this object's dims: {dims}"
868+
)

xarray/tests/test_backends.py

+2
Original file line numberDiff line numberDiff line change
@@ -3464,6 +3464,7 @@ class TestH5NetCDFDataRos3Driver(TestCommon):
34643464
"https://www.unidata.ucar.edu/software/netcdf/examples/OMI-Aura_L2-example.nc"
34653465
)
34663466

3467+
@pytest.mark.filterwarnings("ignore:Duplicate dimension names")
34673468
def test_get_variable_list(self) -> None:
34683469
with open_dataset(
34693470
self.test_remote_dataset,
@@ -3472,6 +3473,7 @@ def test_get_variable_list(self) -> None:
34723473
) as actual:
34733474
assert "Temperature" in list(actual)
34743475

3476+
@pytest.mark.filterwarnings("ignore:Duplicate dimension names")
34753477
def test_get_variable_list_empty_driver_kwds(self) -> None:
34763478
driver_kwds = {
34773479
"secret_id": b"",

xarray/tests/test_namedarray.py

+4
Original file line numberDiff line numberDiff line change
@@ -475,3 +475,7 @@ def _new(
475475
var_float2: Variable[Any, np.dtype[np.float32]]
476476
var_float2 = var_float._replace(("x",), np_val2)
477477
assert var_float2.dtype == dtype_float
478+
479+
def test_warn_on_repeated_dimension_names(self) -> None:
480+
with pytest.warns(UserWarning, match="Duplicate dimension names"):
481+
NamedArray(("x", "x"), np.arange(4).reshape(2, 2))

0 commit comments

Comments
 (0)