From 00baaddefae0a189874ca64d9f4be4d2d83cc744 Mon Sep 17 00:00:00 2001 From: Benoit Bovy Date: Wed, 7 Sep 2022 15:59:00 +0200 Subject: [PATCH] review PandasMultiIndex.sel internals Review only the case where labels are provided for index levels. Allow providing array-like objects as labels. Handle slices in a cleaner way pandas MultiIndex methods are used like this: - use ``pandas.MultiIndex.get_loc`` when all levels are provided with each a scalar label (no slice, no array) - use ``pandas.MultiIndex.get_loc_level`` when only a subset of levels are provided with scalar labels - use ``pandas.MultiIndex.get_locs`` for all other cases. --- xarray/core/indexes.py | 38 ++++++++++++++++++++++++-------------- 1 file changed, 24 insertions(+), 14 deletions(-) diff --git a/xarray/core/indexes.py b/xarray/core/indexes.py index 8ff0d40ff07..e0c9a4f2448 100644 --- a/xarray/core/indexes.py +++ b/xarray/core/indexes.py @@ -783,24 +783,28 @@ def sel(self, labels, method=None, tolerance=None) -> IndexSelResult: # label(s) given for multi-index level(s) if all([lbl in self.index.names for lbl in labels]): label_values = {} + has_array = False for k, v in labels.items(): - label_array = normalize_label(v, dtype=self.level_coords_dtype[k]) - try: - label_values[k] = as_scalar(label_array) - except ValueError: - # label should be an item not an array-like - raise ValueError( - "Vectorized selection is not " - f"available along coordinate {k!r} (multi-index level)" - ) - - has_slice = any([isinstance(v, slice) for v in label_values.values()]) - - if len(label_values) == self.index.nlevels and not has_slice: + if isinstance(v, slice): + label_values[k] = v + else: + label_array = normalize_label(v, dtype=self.level_coords_dtype[k]) + try: + label_values[k] = as_scalar(label_array) + except ValueError: + label_values[k] = label_array + has_array = True + + all_levels = len(label_values) == self.index.nlevels + is_slice = [isinstance(v, slice) for v in label_values.values()] + + if all_levels and not any(is_slice) and not has_array: + # only one item is selected (or KeyError raised by pandas) indexer = self.index.get_loc( tuple(label_values[k] for k in self.index.names) ) - else: + elif not any(is_slice) and not has_array: + # select only one level or only one item indexer, new_index = self.index.get_loc_level( tuple(label_values.values()), level=tuple(label_values.keys()) ) @@ -808,6 +812,12 @@ def sel(self, labels, method=None, tolerance=None) -> IndexSelResult: # GH2619. Raise a KeyError if nothing is chosen if indexer.dtype.kind == "b" and indexer.sum() == 0: raise KeyError(f"{labels} not found") + else: + # all other cases + seq = [label_values.get(k, slice(None)) for k in self.index.names] + indexer = self.index.get_locs(seq) + if not len(indexer): + raise KeyError(f"{labels} not found") # assume one label value given for the multi-index "array" (dimension) else: