Skip to content

Improve alignment checks #10251

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 8 commits into from
Apr 28, 2025
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions doc/api.rst
Original file line number Diff line number Diff line change
Expand Up @@ -1644,6 +1644,7 @@ Exceptions
.. autosummary::
:toctree: generated/

AlignmentError
MergeError
SerializationWarning

Expand Down
5 changes: 4 additions & 1 deletion doc/whats-new.rst
Original file line number Diff line number Diff line change
Expand Up @@ -31,7 +31,10 @@ New Features
`Miguel Jimenez-Urias <https://github.com/Mikejmnez>`_.
- Improved support pandas Extension Arrays. (:issue:`9661`, :pull:`9671`)
By `Ilan Gold <https://github.com/ilan-gold>`_.

- Improved checks and errors raised when trying to align objects with conflicting indexes.
It is now possible to align objects each with multiple indexes sharing common dimension(s).
(:issue:`7695`, :pull:`10251`)
By `Benoit Bovy <https://github.com/benbovy>`_.

Breaking changes
~~~~~~~~~~~~~~~~
Expand Down
3 changes: 2 additions & 1 deletion xarray/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -50,7 +50,7 @@
)
from xarray.core.variable import IndexVariable, Variable, as_variable
from xarray.namedarray.core import NamedArray
from xarray.structure.alignment import align, broadcast
from xarray.structure.alignment import AlignmentError, align, broadcast
from xarray.structure.chunks import unify_chunks
from xarray.structure.combine import combine_by_coords, combine_nested
from xarray.structure.concat import concat
Expand Down Expand Up @@ -128,6 +128,7 @@
"NamedArray",
"Variable",
# Exceptions
"AlignmentError",
"InvalidTreeError",
"MergeError",
"NotFoundInTreeError",
Expand Down
122 changes: 53 additions & 69 deletions xarray/structure/alignment.py
Original file line number Diff line number Diff line change
Expand Up @@ -35,6 +35,10 @@
)


class AlignmentError(ValueError):
"""Error class for alignment failures due to incompatible arguments."""


def reindex_variables(
variables: Mapping[Any, Variable],
dim_pos_indexers: Mapping[Any, Any],
Expand Down Expand Up @@ -196,7 +200,7 @@ def _normalize_indexes(
for k, idx in indexes.items():
if not isinstance(idx, Index):
if getattr(idx, "dims", (k,)) != (k,):
raise ValueError(
raise AlignmentError(
f"Indexer has dimensions {idx.dims} that are different "
f"from that to be indexed along '{k}'"
)
Expand Down Expand Up @@ -227,7 +231,7 @@ def _normalize_indexes(
elif exclude_dims:
excl_dims_str = ", ".join(str(d) for d in exclude_dims)
incl_dims_str = ", ".join(str(d) for d in all_dims - exclude_dims)
raise ValueError(
raise AlignmentError(
f"cannot exclude dimension(s) {excl_dims_str} from alignment because "
"these are used by an index together with non-excluded dimensions "
f"{incl_dims_str}"
Expand Down Expand Up @@ -268,7 +272,7 @@ def find_matching_indexes(self) -> None:
for dim_sizes in all_indexes_dim_sizes.values():
for dim, sizes in dim_sizes.items():
if len(sizes) > 1:
raise ValueError(
raise AlignmentError(
"cannot align objects with join='override' with matching indexes "
f"along dimension {dim!r} that don't have the same size"
)
Expand All @@ -283,47 +287,6 @@ def find_matching_unindexed_dims(self) -> None:

self.unindexed_dim_sizes = unindexed_dim_sizes

def assert_no_index_conflict(self) -> None:
"""Check for uniqueness of both coordinate and dimension names across all sets
of matching indexes.

We need to make sure that all indexes used for re-indexing or alignment
are fully compatible and do not conflict each other.

Note: perhaps we could choose less restrictive constraints and instead
check for conflicts among the dimension (position) indexers returned by
`Index.reindex_like()` for each matching pair of object index / aligned
index?
(ref: https://github.com/pydata/xarray/issues/1603#issuecomment-442965602)

"""
matching_keys = set(self.all_indexes) | set(self.indexes)

coord_count: dict[Hashable, int] = defaultdict(int)
dim_count: dict[Hashable, int] = defaultdict(int)
for coord_names_dims, _ in matching_keys:
dims_set: set[Hashable] = set()
for name, dims in coord_names_dims:
coord_count[name] += 1
dims_set.update(dims)
for dim in dims_set:
dim_count[dim] += 1

for count, msg in [(coord_count, "coordinates"), (dim_count, "dimensions")]:
dup = {k: v for k, v in count.items() if v > 1}
if dup:
items_msg = ", ".join(
f"{k!r} ({v} conflicting indexes)" for k, v in dup.items()
)
raise ValueError(
"cannot re-index or align objects with conflicting indexes found for "
f"the following {msg}: {items_msg}\n"
"Conflicting indexes may occur when\n"
"- they relate to different sets of coordinate and/or dimension names\n"
"- they don't have the same type\n"
"- they may be used to reindex data along common dimensions"
)

def _need_reindex(self, dim, cmp_indexes) -> bool:
"""Whether or not we need to reindex variables for a set of
matching indexes.
Expand Down Expand Up @@ -383,11 +346,33 @@ def _get_index_joiner(self, index_cls) -> Callable:
def align_indexes(self) -> None:
"""Compute all aligned indexes and their corresponding coordinate variables."""

aligned_indexes = {}
aligned_index_vars = {}
reindex = {}
new_indexes = {}
new_index_vars = {}
aligned_indexes: dict[MatchingIndexKey, Index] = {}
aligned_index_vars: dict[MatchingIndexKey, dict[Hashable, Variable]] = {}
reindex: dict[MatchingIndexKey, bool] = {}
new_indexes: dict[Hashable, Index] = {}
new_index_vars: dict[Hashable, Variable] = {}

def update_dicts(
key: MatchingIndexKey,
idx: Index,
idx_vars: dict[Hashable, Variable],
need_reindex: bool,
):
reindex[key] = need_reindex
aligned_indexes[key] = idx
aligned_index_vars[key] = idx_vars

for name, var in idx_vars.items():
if name in new_indexes:
other_idx = new_indexes[name]
other_var = new_index_vars[name]
raise AlignmentError(
f"cannot align objects on coordinate {name!r} because of conflicting indexes\n"
f"first index: {idx!r}\nsecond index: {other_idx!r}\n"
f"first variable: {var!r}\nsecond variable: {other_var!r}\n"
)
new_indexes[name] = idx
new_index_vars[name] = var

for key, matching_indexes in self.all_indexes.items():
matching_index_vars = self.all_index_vars[key]
Expand Down Expand Up @@ -419,7 +404,7 @@ def align_indexes(self) -> None:
need_reindex = False
if need_reindex:
if self.join == "exact":
raise ValueError(
raise AlignmentError(
"cannot align objects with join='exact' where "
"index/labels/sizes are not equal along "
"these coordinates (dimensions): "
Expand All @@ -437,25 +422,14 @@ def align_indexes(self) -> None:
joined_index = matching_indexes[0]
joined_index_vars = matching_index_vars[0]

reindex[key] = need_reindex
aligned_indexes[key] = joined_index
aligned_index_vars[key] = joined_index_vars

for name, var in joined_index_vars.items():
new_indexes[name] = joined_index
new_index_vars[name] = var
update_dicts(key, joined_index, joined_index_vars, need_reindex)

# Explicitly provided indexes that are not found in objects to align
# may relate to unindexed dimensions so we add them too
for key, idx in self.indexes.items():
if key not in aligned_indexes:
index_vars = self.index_vars[key]
reindex[key] = False
aligned_indexes[key] = idx
aligned_index_vars[key] = index_vars
for name, var in index_vars.items():
new_indexes[name] = idx
new_index_vars[name] = var
update_dicts(key, idx, index_vars, False)

self.aligned_indexes = aligned_indexes
self.aligned_index_vars = aligned_index_vars
Expand All @@ -474,7 +448,7 @@ def assert_unindexed_dim_sizes_equal(self) -> None:
else:
add_err_msg = ""
if len(sizes) > 1:
raise ValueError(
raise AlignmentError(
f"cannot reindex or align along dimension {dim!r} "
f"because of conflicting dimension sizes: {sizes!r}" + add_err_msg
)
Expand Down Expand Up @@ -502,14 +476,25 @@ def _get_dim_pos_indexers(
self,
matching_indexes: dict[MatchingIndexKey, Index],
) -> dict[Hashable, Any]:
dim_pos_indexers = {}
dim_pos_indexers: dict[Hashable, Any] = {}
dim_index: dict[Hashable, Index] = {}

for key, aligned_idx in self.aligned_indexes.items():
obj_idx = matching_indexes.get(key)
if obj_idx is not None:
if self.reindex[key]:
indexers = obj_idx.reindex_like(aligned_idx, **self.reindex_kwargs)
dim_pos_indexers.update(indexers)
for dim, idxer in indexers.items():
if dim in dim_pos_indexers and not np.array_equal(
idxer, dim_pos_indexers[dim]
):
raise AlignmentError(
f"cannot reindex or align along dimension {dim!r} because "
"of conflicting re-indexers returned by multiple indexes\n"
f"first index: {obj_idx!r}\nsecond index: {dim_index[dim]!r}\n"
)
dim_pos_indexers[dim] = idxer
dim_index[dim] = obj_idx

return dim_pos_indexers

Expand Down Expand Up @@ -571,7 +556,6 @@ def align(self) -> None:

self.find_matching_indexes()
self.find_matching_unindexed_dims()
self.assert_no_index_conflict()
self.align_indexes()
self.assert_unindexed_dim_sizes_equal()

Expand Down Expand Up @@ -735,7 +719,7 @@ def align(

Raises
------
ValueError
AlignmentError
If any dimensions without labels on the arguments have different sizes,
or a different size than the size of the aligned dimension labels.

Expand Down Expand Up @@ -853,7 +837,7 @@ def align(
>>> a, b = xr.align(x, y, join="exact")
Traceback (most recent call last):
...
ValueError: cannot align objects with join='exact' ...
xarray.structure.alignment.AlignmentError: cannot align objects with join='exact' ...
Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Will be cleaner to later have Xarray exceptions defined in their own module xarray.exceptions.AlignmentError (same for MergeError, etc.)


>>> a, b = xr.align(x, y, join="override")
>>> a
Expand Down
2 changes: 1 addition & 1 deletion xarray/structure/merge.py
Original file line number Diff line number Diff line change
Expand Up @@ -942,7 +942,7 @@ def merge(
>>> xr.merge([x, y, z], join="exact")
Traceback (most recent call last):
...
ValueError: cannot align objects with join='exact' where ...
xarray.structure.alignment.AlignmentError: cannot align objects with join='exact' where ...

Raises
------
Expand Down
23 changes: 23 additions & 0 deletions xarray/tests/test_dataset.py
Original file line number Diff line number Diff line change
Expand Up @@ -23,6 +23,7 @@

import xarray as xr
from xarray import (
AlignmentError,
DataArray,
Dataset,
IndexVariable,
Expand Down Expand Up @@ -2543,6 +2544,28 @@ def test_align_indexes(self) -> None:

assert_identical(expected_x2, x2)

def test_align_multiple_indexes_common_dim(self) -> None:
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

nice test!

a = Dataset(coords={"x": [1, 2], "xb": ("x", [3, 4])}).set_xindex("xb")
b = Dataset(coords={"x": [1], "xb": ("x", [3])}).set_xindex("xb")

(a2, b2) = align(a, b, join="inner")
assert_identical(a2, b, check_default_indexes=False)
assert_identical(b2, b, check_default_indexes=False)

c = Dataset(coords={"x": [1, 3], "xb": ("x", [2, 4])}).set_xindex("xb")

with pytest.raises(AlignmentError, match=".*conflicting re-indexers"):
align(a, c)

def test_align_conflicting_indexes(self) -> None:
class CustomIndex(PandasIndex): ...

a = Dataset(coords={"xb": ("x", [3, 4])}).set_xindex("xb")
b = Dataset(coords={"xb": ("x", [3])}).set_xindex("xb", CustomIndex)

with pytest.raises(AlignmentError, match="cannot align.*conflicting indexes"):
align(a, b)

def test_align_non_unique(self) -> None:
x = Dataset({"foo": ("x", [3, 4, 5]), "x": [0, 0, 1]})
x1, x2 = align(x, x)
Expand Down
Loading