From 83f31976087d69990a20abef59a8f11ffd44bace Mon Sep 17 00:00:00 2001 From: Benoit Bovy Date: Fri, 25 Apr 2025 14:52:23 +0200 Subject: [PATCH 1/6] refactor alignment index conflict checks - Improved error messages (more context) - Simplified logic - Removed overly restrictive checks that caused alignment to fail when multiple indexes are set along common dimensions --- xarray/structure/alignment.py | 102 ++++++++++++++-------------------- 1 file changed, 41 insertions(+), 61 deletions(-) diff --git a/xarray/structure/alignment.py b/xarray/structure/alignment.py index a3c26a0d023..4bb66f2a7fd 100644 --- a/xarray/structure/alignment.py +++ b/xarray/structure/alignment.py @@ -283,47 +283,6 @@ def find_matching_unindexed_dims(self) -> None: self.unindexed_dim_sizes = unindexed_dim_sizes - def assert_no_index_conflict(self) -> None: - """Check for uniqueness of both coordinate and dimension names across all sets - of matching indexes. - - We need to make sure that all indexes used for re-indexing or alignment - are fully compatible and do not conflict each other. - - Note: perhaps we could choose less restrictive constraints and instead - check for conflicts among the dimension (position) indexers returned by - `Index.reindex_like()` for each matching pair of object index / aligned - index? - (ref: https://github.com/pydata/xarray/issues/1603#issuecomment-442965602) - - """ - matching_keys = set(self.all_indexes) | set(self.indexes) - - coord_count: dict[Hashable, int] = defaultdict(int) - dim_count: dict[Hashable, int] = defaultdict(int) - for coord_names_dims, _ in matching_keys: - dims_set: set[Hashable] = set() - for name, dims in coord_names_dims: - coord_count[name] += 1 - dims_set.update(dims) - for dim in dims_set: - dim_count[dim] += 1 - - for count, msg in [(coord_count, "coordinates"), (dim_count, "dimensions")]: - dup = {k: v for k, v in count.items() if v > 1} - if dup: - items_msg = ", ".join( - f"{k!r} ({v} conflicting indexes)" for k, v in dup.items() - ) - raise ValueError( - "cannot re-index or align objects with conflicting indexes found for " - f"the following {msg}: {items_msg}\n" - "Conflicting indexes may occur when\n" - "- they relate to different sets of coordinate and/or dimension names\n" - "- they don't have the same type\n" - "- they may be used to reindex data along common dimensions" - ) - def _need_reindex(self, dim, cmp_indexes) -> bool: """Whether or not we need to reindex variables for a set of matching indexes. @@ -383,11 +342,33 @@ def _get_index_joiner(self, index_cls) -> Callable: def align_indexes(self) -> None: """Compute all aligned indexes and their corresponding coordinate variables.""" - aligned_indexes = {} - aligned_index_vars = {} - reindex = {} - new_indexes = {} - new_index_vars = {} + aligned_indexes: dict[MatchingIndexKey, Index] = {} + aligned_index_vars: dict[MatchingIndexKey, dict[Hashable, Variable]] = {} + reindex: dict[MatchingIndexKey, bool] = {} + new_indexes: dict[Hashable, Index] = {} + new_index_vars: dict[Hashable, Variable] = {} + + def update_dicts( + key: MatchingIndexKey, + idx: Index, + idx_vars: dict[Hashable, Variable], + need_reindex: bool, + ): + reindex[key] = need_reindex + aligned_indexes[key] = idx + aligned_index_vars[key] = idx_vars + + for name, var in idx_vars.items(): + if name in new_indexes: + other_idx = new_indexes[name] + other_var = new_index_vars[name] + raise ValueError( + "cannot align objects on coordinate {name!r} because of conflicting indexes\n" + f"first index: {idx!r}\nsecond index: {other_idx!r}\n" + f"first variable: {var!r}\nsecond variable: {other_var!r}\n" + ) + new_indexes[name] = idx + new_index_vars[name] = var for key, matching_indexes in self.all_indexes.items(): matching_index_vars = self.all_index_vars[key] @@ -437,25 +418,14 @@ def align_indexes(self) -> None: joined_index = matching_indexes[0] joined_index_vars = matching_index_vars[0] - reindex[key] = need_reindex - aligned_indexes[key] = joined_index - aligned_index_vars[key] = joined_index_vars - - for name, var in joined_index_vars.items(): - new_indexes[name] = joined_index - new_index_vars[name] = var + update_dicts(key, joined_index, joined_index_vars, need_reindex) # Explicitly provided indexes that are not found in objects to align # may relate to unindexed dimensions so we add them too for key, idx in self.indexes.items(): if key not in aligned_indexes: index_vars = self.index_vars[key] - reindex[key] = False - aligned_indexes[key] = idx - aligned_index_vars[key] = index_vars - for name, var in index_vars.items(): - new_indexes[name] = idx - new_index_vars[name] = var + update_dicts(key, idx, index_vars, False) self.aligned_indexes = aligned_indexes self.aligned_index_vars = aligned_index_vars @@ -503,13 +473,24 @@ def _get_dim_pos_indexers( matching_indexes: dict[MatchingIndexKey, Index], ) -> dict[Hashable, Any]: dim_pos_indexers = {} + dim_index = {} for key, aligned_idx in self.aligned_indexes.items(): obj_idx = matching_indexes.get(key) if obj_idx is not None: if self.reindex[key]: indexers = obj_idx.reindex_like(aligned_idx, **self.reindex_kwargs) - dim_pos_indexers.update(indexers) + for dim, idxer in indexers.items(): + if dim in dim_pos_indexers and not np.array_equal( + idxer, dim_pos_indexers[dim] + ): + raise ValueError( + "cannot align or reindex object along dimension {dim!r} because " + "of conflicting re-indexers computed from distinct indexes\n" + f"first index: {obj_idx!r}\nsecond index: {dim_index[dim]!r}\n" + ) + dim_pos_indexers[dim] = idxer + dim_index[dim] = obj_idx return dim_pos_indexers @@ -571,7 +552,6 @@ def align(self) -> None: self.find_matching_indexes() self.find_matching_unindexed_dims() - self.assert_no_index_conflict() self.align_indexes() self.assert_unindexed_dim_sizes_equal() From 12adc01d712fbf49f432ec74cdb06caac9da07e6 Mon Sep 17 00:00:00 2001 From: Benoit Bovy Date: Fri, 25 Apr 2025 15:07:38 +0200 Subject: [PATCH 2/6] add AlignmentError exception class --- doc/api.rst | 1 + xarray/__init__.py | 3 ++- xarray/structure/alignment.py | 20 ++++++++++++-------- 3 files changed, 15 insertions(+), 9 deletions(-) diff --git a/doc/api.rst b/doc/api.rst index 966d7b82ddc..75232d284a4 100644 --- a/doc/api.rst +++ b/doc/api.rst @@ -1644,6 +1644,7 @@ Exceptions .. autosummary:: :toctree: generated/ + AlignmentError MergeError SerializationWarning diff --git a/xarray/__init__.py b/xarray/__init__.py index 07e6fe5b207..b08729f7478 100644 --- a/xarray/__init__.py +++ b/xarray/__init__.py @@ -50,7 +50,7 @@ ) from xarray.core.variable import IndexVariable, Variable, as_variable from xarray.namedarray.core import NamedArray -from xarray.structure.alignment import align, broadcast +from xarray.structure.alignment import AlignmentError, align, broadcast from xarray.structure.chunks import unify_chunks from xarray.structure.combine import combine_by_coords, combine_nested from xarray.structure.concat import concat @@ -128,6 +128,7 @@ "NamedArray", "Variable", # Exceptions + "AlignmentError", "InvalidTreeError", "MergeError", "NotFoundInTreeError", diff --git a/xarray/structure/alignment.py b/xarray/structure/alignment.py index 4bb66f2a7fd..f231dfe28b8 100644 --- a/xarray/structure/alignment.py +++ b/xarray/structure/alignment.py @@ -35,6 +35,10 @@ ) +class AlignmentError(ValueError): + """Error class for alignment failures due to incompatible arguments.""" + + def reindex_variables( variables: Mapping[Any, Variable], dim_pos_indexers: Mapping[Any, Any], @@ -196,7 +200,7 @@ def _normalize_indexes( for k, idx in indexes.items(): if not isinstance(idx, Index): if getattr(idx, "dims", (k,)) != (k,): - raise ValueError( + raise AlignmentError( f"Indexer has dimensions {idx.dims} that are different " f"from that to be indexed along '{k}'" ) @@ -227,7 +231,7 @@ def _normalize_indexes( elif exclude_dims: excl_dims_str = ", ".join(str(d) for d in exclude_dims) incl_dims_str = ", ".join(str(d) for d in all_dims - exclude_dims) - raise ValueError( + raise AlignmentError( f"cannot exclude dimension(s) {excl_dims_str} from alignment because " "these are used by an index together with non-excluded dimensions " f"{incl_dims_str}" @@ -268,7 +272,7 @@ def find_matching_indexes(self) -> None: for dim_sizes in all_indexes_dim_sizes.values(): for dim, sizes in dim_sizes.items(): if len(sizes) > 1: - raise ValueError( + raise AlignmentError( "cannot align objects with join='override' with matching indexes " f"along dimension {dim!r} that don't have the same size" ) @@ -362,7 +366,7 @@ def update_dicts( if name in new_indexes: other_idx = new_indexes[name] other_var = new_index_vars[name] - raise ValueError( + raise AlignmentError( "cannot align objects on coordinate {name!r} because of conflicting indexes\n" f"first index: {idx!r}\nsecond index: {other_idx!r}\n" f"first variable: {var!r}\nsecond variable: {other_var!r}\n" @@ -400,7 +404,7 @@ def update_dicts( need_reindex = False if need_reindex: if self.join == "exact": - raise ValueError( + raise AlignmentError( "cannot align objects with join='exact' where " "index/labels/sizes are not equal along " "these coordinates (dimensions): " @@ -444,7 +448,7 @@ def assert_unindexed_dim_sizes_equal(self) -> None: else: add_err_msg = "" if len(sizes) > 1: - raise ValueError( + raise AlignmentError( f"cannot reindex or align along dimension {dim!r} " f"because of conflicting dimension sizes: {sizes!r}" + add_err_msg ) @@ -484,7 +488,7 @@ def _get_dim_pos_indexers( if dim in dim_pos_indexers and not np.array_equal( idxer, dim_pos_indexers[dim] ): - raise ValueError( + raise AlignmentError( "cannot align or reindex object along dimension {dim!r} because " "of conflicting re-indexers computed from distinct indexes\n" f"first index: {obj_idx!r}\nsecond index: {dim_index[dim]!r}\n" @@ -715,7 +719,7 @@ def align( Raises ------ - ValueError + AlignmentError If any dimensions without labels on the arguments have different sizes, or a different size than the size of the aligned dimension labels. From 835ae4b833a191de7657d0c2a6b14213a58c6fb5 Mon Sep 17 00:00:00 2001 From: Benoit Bovy Date: Fri, 25 Apr 2025 16:17:53 +0200 Subject: [PATCH 3/6] add tests --- xarray/structure/alignment.py | 6 +++--- xarray/tests/test_dataset.py | 23 +++++++++++++++++++++++ 2 files changed, 26 insertions(+), 3 deletions(-) diff --git a/xarray/structure/alignment.py b/xarray/structure/alignment.py index f231dfe28b8..00ea2cb6654 100644 --- a/xarray/structure/alignment.py +++ b/xarray/structure/alignment.py @@ -367,7 +367,7 @@ def update_dicts( other_idx = new_indexes[name] other_var = new_index_vars[name] raise AlignmentError( - "cannot align objects on coordinate {name!r} because of conflicting indexes\n" + f"cannot align objects on coordinate {name!r} because of conflicting indexes\n" f"first index: {idx!r}\nsecond index: {other_idx!r}\n" f"first variable: {var!r}\nsecond variable: {other_var!r}\n" ) @@ -489,8 +489,8 @@ def _get_dim_pos_indexers( idxer, dim_pos_indexers[dim] ): raise AlignmentError( - "cannot align or reindex object along dimension {dim!r} because " - "of conflicting re-indexers computed from distinct indexes\n" + f"cannot reindex or align along dimension {dim!r} because " + "of conflicting re-indexers returned by multiple indexes\n" f"first index: {obj_idx!r}\nsecond index: {dim_index[dim]!r}\n" ) dim_pos_indexers[dim] = idxer diff --git a/xarray/tests/test_dataset.py b/xarray/tests/test_dataset.py index c1310bc7e1d..165b1f1aa1a 100644 --- a/xarray/tests/test_dataset.py +++ b/xarray/tests/test_dataset.py @@ -23,6 +23,7 @@ import xarray as xr from xarray import ( + AlignmentError, DataArray, Dataset, IndexVariable, @@ -2543,6 +2544,28 @@ def test_align_indexes(self) -> None: assert_identical(expected_x2, x2) + def test_align_multiple_indexes_common_dim(self) -> None: + a = Dataset(coords={"x": [1, 2], "xb": ("x", [3, 4])}).set_xindex("xb") + b = Dataset(coords={"x": [1], "xb": ("x", [3])}).set_xindex("xb") + + (a2, b2) = align(a, b, join="inner") + assert_identical(a2, b, check_default_indexes=False) + assert_identical(b2, b, check_default_indexes=False) + + c = Dataset(coords={"x": [1, 3], "xb": ("x", [2, 4])}).set_xindex("xb") + + with pytest.raises(AlignmentError, match=".*conflicting re-indexers"): + align(a, c) + + def test_align_conflicting_indexes(self) -> None: + class CustomIndex(PandasIndex): ... + + a = Dataset(coords={"xb": ("x", [3, 4])}).set_xindex("xb") + b = Dataset(coords={"xb": ("x", [3])}).set_xindex("xb", CustomIndex) + + with pytest.raises(AlignmentError, match="cannot align.*conflicting indexes"): + align(a, b) + def test_align_non_unique(self) -> None: x = Dataset({"foo": ("x", [3, 4, 5]), "x": [0, 0, 1]}) x1, x2 = align(x, x) From fbf0af63e22551ebd766cb6600a1defc96b94264 Mon Sep 17 00:00:00 2001 From: Deepak Cherian Date: Sat, 26 Apr 2025 11:58:24 -0600 Subject: [PATCH 4/6] Fix CI --- xarray/structure/alignment.py | 6 +++--- xarray/structure/merge.py | 2 +- 2 files changed, 4 insertions(+), 4 deletions(-) diff --git a/xarray/structure/alignment.py b/xarray/structure/alignment.py index 00ea2cb6654..cd98ae5d267 100644 --- a/xarray/structure/alignment.py +++ b/xarray/structure/alignment.py @@ -476,8 +476,8 @@ def _get_dim_pos_indexers( self, matching_indexes: dict[MatchingIndexKey, Index], ) -> dict[Hashable, Any]: - dim_pos_indexers = {} - dim_index = {} + dim_pos_indexers: dict[Hashable, Any] = {} + dim_index: dict[Hashable, Index] = {} for key, aligned_idx in self.aligned_indexes.items(): obj_idx = matching_indexes.get(key) @@ -837,7 +837,7 @@ def align( >>> a, b = xr.align(x, y, join="exact") Traceback (most recent call last): ... - ValueError: cannot align objects with join='exact' ... + AlignmentError: cannot align objects with join='exact' ... >>> a, b = xr.align(x, y, join="override") >>> a diff --git a/xarray/structure/merge.py b/xarray/structure/merge.py index 8f9835aaaa1..e1722d7e32e 100644 --- a/xarray/structure/merge.py +++ b/xarray/structure/merge.py @@ -942,7 +942,7 @@ def merge( >>> xr.merge([x, y, z], join="exact") Traceback (most recent call last): ... - ValueError: cannot align objects with join='exact' where ... + AlignmentError: cannot align objects with join='exact' where ... Raises ------ From e6424c9b5c2e24833f000964418463718f4616f1 Mon Sep 17 00:00:00 2001 From: Benoit Bovy Date: Sat, 26 Apr 2025 22:03:28 +0200 Subject: [PATCH 5/6] update whats new --- doc/whats-new.rst | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/doc/whats-new.rst b/doc/whats-new.rst index 20bbdc7ec69..4427e10d994 100644 --- a/doc/whats-new.rst +++ b/doc/whats-new.rst @@ -31,7 +31,10 @@ New Features `Miguel Jimenez-Urias `_. - Improved support pandas Extension Arrays. (:issue:`9661`, :pull:`9671`) By `Ilan Gold `_. - +- Improved checks and errors raised when trying to align objects with conflicting indexes. + It is now possible to align objects each with multiple indexes sharing common dimension(s). + (:issue:`7695`, :pull:`10251`) + By `Benoit Bovy `_. Breaking changes ~~~~~~~~~~~~~~~~ From 837c9700abd9741c93bbb750e11d238de902e4e1 Mon Sep 17 00:00:00 2001 From: Benoit Bovy Date: Sat, 26 Apr 2025 22:19:05 +0200 Subject: [PATCH 6/6] fix doctests (custom exception class path) --- xarray/structure/alignment.py | 2 +- xarray/structure/merge.py | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/xarray/structure/alignment.py b/xarray/structure/alignment.py index cd98ae5d267..ea90519143c 100644 --- a/xarray/structure/alignment.py +++ b/xarray/structure/alignment.py @@ -837,7 +837,7 @@ def align( >>> a, b = xr.align(x, y, join="exact") Traceback (most recent call last): ... - AlignmentError: cannot align objects with join='exact' ... + xarray.structure.alignment.AlignmentError: cannot align objects with join='exact' ... >>> a, b = xr.align(x, y, join="override") >>> a diff --git a/xarray/structure/merge.py b/xarray/structure/merge.py index e1722d7e32e..7d773ce0b4b 100644 --- a/xarray/structure/merge.py +++ b/xarray/structure/merge.py @@ -942,7 +942,7 @@ def merge( >>> xr.merge([x, y, z], join="exact") Traceback (most recent call last): ... - AlignmentError: cannot align objects with join='exact' where ... + xarray.structure.alignment.AlignmentError: cannot align objects with join='exact' where ... Raises ------