Skip to content

Commit c9d3084

Browse files
andersy005dcherian
andauthored
Expand use of .oindex and .vindex (#8790)
* refactor __getitem__() by removing vectorized and orthogonal indexing logic from it * implement explicit routing of vectorized and outer indexers * Add VectorizedIndexer and OuterIndexer to ScipyArrayWrapper's __getitem__ method * Refactor indexing in LazilyIndexedArray and LazilyVectorizedIndexedArray * Add vindex and oindex methods to StackedBytesArray * handle explicitlyindexed arrays * Refactor LazilyIndexedArray and LazilyVectorizedIndexedArray classes * Remove TODO comments in indexing.py * use indexing.explicit_indexing_adapter() in scipy backend * update docstring * fix docstring * Add _oindex_get and _vindex_get methods to NativeEndiannessArray and BoolTypeArray * Update indexing support in ScipyArrayWrapper * Update xarray/tests/test_indexing.py Co-authored-by: Deepak Cherian <[email protected]> * Fix assert statement in test_decompose_indexers * add comments to clarifying why the else branch is needed * Add _oindex_get and _vindex_get methods to _ElementwiseFunctionArray * update whats-new * Refactor apply_indexer function in indexing.py and variable.py for code reuse * cleanup --------- Co-authored-by: Deepak Cherian <[email protected]> Co-authored-by: Deepak Cherian <[email protected]>
1 parent 3dcfa31 commit c9d3084

File tree

8 files changed

+137
-55
lines changed

8 files changed

+137
-55
lines changed

doc/whats-new.rst

+3
Original file line numberDiff line numberDiff line change
@@ -32,6 +32,9 @@ New Features
3232
- Add the ``.vindex`` property to Explicitly Indexed Arrays for vectorized indexing functionality. (:issue:`8238`, :pull:`8780`)
3333
By `Anderson Banihirwe <https://github.com/andersy005>`_.
3434

35+
- Expand use of ``.oindex`` and ``.vindex`` properties. (:pull: `8790`)
36+
By `Anderson Banihirwe <https://github.com/andersy005>`_ and `Deepak Cherian <https://github.com/dcherian>`_.
37+
3538
Breaking changes
3639
~~~~~~~~~~~~~~~~
3740

xarray/backends/scipy_.py

+9-2
Original file line numberDiff line numberDiff line change
@@ -23,7 +23,7 @@
2323
is_valid_nc3_name,
2424
)
2525
from xarray.backends.store import StoreBackendEntrypoint
26-
from xarray.core.indexing import NumpyIndexingAdapter
26+
from xarray.core import indexing
2727
from xarray.core.utils import (
2828
Frozen,
2929
FrozenDict,
@@ -63,8 +63,15 @@ def get_variable(self, needs_lock=True):
6363
ds = self.datastore._manager.acquire(needs_lock)
6464
return ds.variables[self.variable_name]
6565

66+
def _getitem(self, key):
67+
with self.datastore.lock:
68+
data = self.get_variable(needs_lock=False).data
69+
return data[key]
70+
6671
def __getitem__(self, key):
67-
data = NumpyIndexingAdapter(self.get_variable().data)[key]
72+
data = indexing.explicit_indexing_adapter(
73+
key, self.shape, indexing.IndexingSupport.OUTER_1VECTOR, self._getitem
74+
)
6875
# Copy data if the source file is mmapped. This makes things consistent
6976
# with the netCDF4 library by ensuring we can safely read arrays even
7077
# after closing associated files.

xarray/coding/strings.py

+6
Original file line numberDiff line numberDiff line change
@@ -238,6 +238,12 @@ def shape(self) -> tuple[int, ...]:
238238
def __repr__(self):
239239
return f"{type(self).__name__}({self.array!r})"
240240

241+
def _vindex_get(self, key):
242+
return _numpy_char_to_bytes(self.array.vindex[key])
243+
244+
def _oindex_get(self, key):
245+
return _numpy_char_to_bytes(self.array.oindex[key])
246+
241247
def __getitem__(self, key):
242248
# require slicing the last dimension completely
243249
key = type(key)(indexing.expanded_indexer(key.tuple, self.array.ndim))

xarray/coding/variables.py

+18
Original file line numberDiff line numberDiff line change
@@ -68,6 +68,12 @@ def __init__(self, array, func: Callable, dtype: np.typing.DTypeLike):
6868
def dtype(self) -> np.dtype:
6969
return np.dtype(self._dtype)
7070

71+
def _oindex_get(self, key):
72+
return type(self)(self.array.oindex[key], self.func, self.dtype)
73+
74+
def _vindex_get(self, key):
75+
return type(self)(self.array.vindex[key], self.func, self.dtype)
76+
7177
def __getitem__(self, key):
7278
return type(self)(self.array[key], self.func, self.dtype)
7379

@@ -109,6 +115,12 @@ def __init__(self, array) -> None:
109115
def dtype(self) -> np.dtype:
110116
return np.dtype(self.array.dtype.kind + str(self.array.dtype.itemsize))
111117

118+
def _oindex_get(self, key):
119+
return np.asarray(self.array.oindex[key], dtype=self.dtype)
120+
121+
def _vindex_get(self, key):
122+
return np.asarray(self.array.vindex[key], dtype=self.dtype)
123+
112124
def __getitem__(self, key) -> np.ndarray:
113125
return np.asarray(self.array[key], dtype=self.dtype)
114126

@@ -141,6 +153,12 @@ def __init__(self, array) -> None:
141153
def dtype(self) -> np.dtype:
142154
return np.dtype("bool")
143155

156+
def _oindex_get(self, key):
157+
return np.asarray(self.array.oindex[key], dtype=self.dtype)
158+
159+
def _vindex_get(self, key):
160+
return np.asarray(self.array.vindex[key], dtype=self.dtype)
161+
144162
def __getitem__(self, key) -> np.ndarray:
145163
return np.asarray(self.array[key], dtype=self.dtype)
146164

xarray/core/indexing.py

+56-32
Original file line numberDiff line numberDiff line change
@@ -491,6 +491,13 @@ def _oindex_get(self, key):
491491
def _vindex_get(self, key):
492492
raise NotImplementedError("This method should be overridden")
493493

494+
def _check_and_raise_if_non_basic_indexer(self, key):
495+
if isinstance(key, (VectorizedIndexer, OuterIndexer)):
496+
raise TypeError(
497+
"Vectorized indexing with vectorized or outer indexers is not supported. "
498+
"Please use .vindex and .oindex properties to index the array."
499+
)
500+
494501
@property
495502
def oindex(self):
496503
return IndexCallable(self._oindex_get)
@@ -517,7 +524,10 @@ def get_duck_array(self):
517524

518525
def __getitem__(self, key):
519526
key = expanded_indexer(key, self.ndim)
520-
result = self.array[self.indexer_cls(key)]
527+
indexer = self.indexer_cls(key)
528+
529+
result = apply_indexer(self.array, indexer)
530+
521531
if isinstance(result, ExplicitlyIndexed):
522532
return type(self)(result, self.indexer_cls)
523533
else:
@@ -577,7 +587,13 @@ def shape(self) -> tuple[int, ...]:
577587
return tuple(shape)
578588

579589
def get_duck_array(self):
580-
array = self.array[self.key]
590+
if isinstance(self.array, ExplicitlyIndexedNDArrayMixin):
591+
array = apply_indexer(self.array, self.key)
592+
else:
593+
# If the array is not an ExplicitlyIndexedNDArrayMixin,
594+
# it may wrap a BackendArray so use its __getitem__
595+
array = self.array[self.key]
596+
581597
# self.array[self.key] is now a numpy array when
582598
# self.array is a BackendArray subclass
583599
# and self.key is BasicIndexer((slice(None, None, None),))
@@ -594,12 +610,10 @@ def _oindex_get(self, indexer):
594610

595611
def _vindex_get(self, indexer):
596612
array = LazilyVectorizedIndexedArray(self.array, self.key)
597-
return array[indexer]
613+
return array.vindex[indexer]
598614

599615
def __getitem__(self, indexer):
600-
if isinstance(indexer, VectorizedIndexer):
601-
array = LazilyVectorizedIndexedArray(self.array, self.key)
602-
return array[indexer]
616+
self._check_and_raise_if_non_basic_indexer(indexer)
603617
return type(self)(self.array, self._updated_key(indexer))
604618

605619
def __setitem__(self, key, value):
@@ -643,7 +657,13 @@ def shape(self) -> tuple[int, ...]:
643657
return np.broadcast(*self.key.tuple).shape
644658

645659
def get_duck_array(self):
646-
array = self.array[self.key]
660+
661+
if isinstance(self.array, ExplicitlyIndexedNDArrayMixin):
662+
array = apply_indexer(self.array, self.key)
663+
else:
664+
# If the array is not an ExplicitlyIndexedNDArrayMixin,
665+
# it may wrap a BackendArray so use its __getitem__
666+
array = self.array[self.key]
647667
# self.array[self.key] is now a numpy array when
648668
# self.array is a BackendArray subclass
649669
# and self.key is BasicIndexer((slice(None, None, None),))
@@ -662,6 +682,7 @@ def _vindex_get(self, indexer):
662682
return type(self)(self.array, self._updated_key(indexer))
663683

664684
def __getitem__(self, indexer):
685+
self._check_and_raise_if_non_basic_indexer(indexer)
665686
# If the indexed array becomes a scalar, return LazilyIndexedArray
666687
if all(isinstance(ind, integer_types) for ind in indexer.tuple):
667688
key = BasicIndexer(tuple(k[indexer.tuple] for k in self.key.tuple))
@@ -706,12 +727,13 @@ def get_duck_array(self):
706727
return self.array.get_duck_array()
707728

708729
def _oindex_get(self, key):
709-
return type(self)(_wrap_numpy_scalars(self.array[key]))
730+
return type(self)(_wrap_numpy_scalars(self.array.oindex[key]))
710731

711732
def _vindex_get(self, key):
712-
return type(self)(_wrap_numpy_scalars(self.array[key]))
733+
return type(self)(_wrap_numpy_scalars(self.array.vindex[key]))
713734

714735
def __getitem__(self, key):
736+
self._check_and_raise_if_non_basic_indexer(key)
715737
return type(self)(_wrap_numpy_scalars(self.array[key]))
716738

717739
def transpose(self, order):
@@ -745,12 +767,13 @@ def get_duck_array(self):
745767
return self.array.get_duck_array()
746768

747769
def _oindex_get(self, key):
748-
return type(self)(_wrap_numpy_scalars(self.array[key]))
770+
return type(self)(_wrap_numpy_scalars(self.array.oindex[key]))
749771

750772
def _vindex_get(self, key):
751-
return type(self)(_wrap_numpy_scalars(self.array[key]))
773+
return type(self)(_wrap_numpy_scalars(self.array.vindex[key]))
752774

753775
def __getitem__(self, key):
776+
self._check_and_raise_if_non_basic_indexer(key)
754777
return type(self)(_wrap_numpy_scalars(self.array[key]))
755778

756779
def transpose(self, order):
@@ -912,10 +935,21 @@ def explicit_indexing_adapter(
912935
result = raw_indexing_method(raw_key.tuple)
913936
if numpy_indices.tuple:
914937
# index the loaded np.ndarray
915-
result = NumpyIndexingAdapter(result)[numpy_indices]
938+
indexable = NumpyIndexingAdapter(result)
939+
result = apply_indexer(indexable, numpy_indices)
916940
return result
917941

918942

943+
def apply_indexer(indexable, indexer):
944+
"""Apply an indexer to an indexable object."""
945+
if isinstance(indexer, VectorizedIndexer):
946+
return indexable.vindex[indexer]
947+
elif isinstance(indexer, OuterIndexer):
948+
return indexable.oindex[indexer]
949+
else:
950+
return indexable[indexer]
951+
952+
919953
def decompose_indexer(
920954
indexer: ExplicitIndexer, shape: tuple[int, ...], indexing_support: IndexingSupport
921955
) -> tuple[ExplicitIndexer, ExplicitIndexer]:
@@ -987,10 +1021,10 @@ def _decompose_vectorized_indexer(
9871021
>>> array = np.arange(36).reshape(6, 6)
9881022
>>> backend_indexer = OuterIndexer((np.array([0, 1, 3]), np.array([2, 3])))
9891023
>>> # load subslice of the array
990-
... array = NumpyIndexingAdapter(array)[backend_indexer]
1024+
... array = NumpyIndexingAdapter(array).oindex[backend_indexer]
9911025
>>> np_indexer = VectorizedIndexer((np.array([0, 2, 1]), np.array([0, 1, 0])))
9921026
>>> # vectorized indexing for on-memory np.ndarray.
993-
... NumpyIndexingAdapter(array)[np_indexer]
1027+
... NumpyIndexingAdapter(array).vindex[np_indexer]
9941028
array([ 2, 21, 8])
9951029
"""
9961030
assert isinstance(indexer, VectorizedIndexer)
@@ -1072,7 +1106,7 @@ def _decompose_outer_indexer(
10721106
... array = NumpyIndexingAdapter(array)[backend_indexer]
10731107
>>> np_indexer = OuterIndexer((np.array([0, 2, 1]), np.array([0, 1, 0])))
10741108
>>> # outer indexing for on-memory np.ndarray.
1075-
... NumpyIndexingAdapter(array)[np_indexer]
1109+
... NumpyIndexingAdapter(array).oindex[np_indexer]
10761110
array([[ 2, 3, 2],
10771111
[14, 15, 14],
10781112
[ 8, 9, 8]])
@@ -1395,6 +1429,7 @@ def _vindex_get(self, key):
13951429
return array[key.tuple]
13961430

13971431
def __getitem__(self, key):
1432+
self._check_and_raise_if_non_basic_indexer(key)
13981433
array, key = self._indexing_array_and_key(key)
13991434
return array[key]
14001435

@@ -1450,15 +1485,8 @@ def _vindex_get(self, key):
14501485
raise TypeError("Vectorized indexing is not supported")
14511486

14521487
def __getitem__(self, key):
1453-
if isinstance(key, BasicIndexer):
1454-
return self.array[key.tuple]
1455-
elif isinstance(key, OuterIndexer):
1456-
return self.oindex[key]
1457-
else:
1458-
if isinstance(key, VectorizedIndexer):
1459-
raise TypeError("Vectorized indexing is not supported")
1460-
else:
1461-
raise TypeError(f"Unrecognized indexer: {key}")
1488+
self._check_and_raise_if_non_basic_indexer(key)
1489+
return self.array[key.tuple]
14621490

14631491
def __setitem__(self, key, value):
14641492
if isinstance(key, (BasicIndexer, OuterIndexer)):
@@ -1499,13 +1527,8 @@ def _vindex_get(self, key):
14991527
return self.array.vindex[key.tuple]
15001528

15011529
def __getitem__(self, key):
1502-
if isinstance(key, BasicIndexer):
1503-
return self.array[key.tuple]
1504-
elif isinstance(key, VectorizedIndexer):
1505-
return self.vindex[key]
1506-
else:
1507-
assert isinstance(key, OuterIndexer)
1508-
return self.oindex[key]
1530+
self._check_and_raise_if_non_basic_indexer(key)
1531+
return self.array[key.tuple]
15091532

15101533
def __setitem__(self, key, value):
15111534
if isinstance(key, BasicIndexer):
@@ -1603,7 +1626,8 @@ def __getitem__(
16031626
(key,) = key
16041627

16051628
if getattr(key, "ndim", 0) > 1: # Return np-array if multidimensional
1606-
return NumpyIndexingAdapter(np.asarray(self))[indexer]
1629+
indexable = NumpyIndexingAdapter(np.asarray(self))
1630+
return apply_indexer(indexable, indexer)
16071631

16081632
result = self.array[key]
16091633

xarray/core/variable.py

+4-13
Original file line numberDiff line numberDiff line change
@@ -761,12 +761,8 @@ def __getitem__(self, key) -> Self:
761761
dims, indexer, new_order = self._broadcast_indexes(key)
762762
indexable = as_indexable(self._data)
763763

764-
if isinstance(indexer, OuterIndexer):
765-
data = indexable.oindex[indexer]
766-
elif isinstance(indexer, VectorizedIndexer):
767-
data = indexable.vindex[indexer]
768-
else:
769-
data = indexable[indexer]
764+
data = indexing.apply_indexer(indexable, indexer)
765+
770766
if new_order:
771767
data = np.moveaxis(data, range(len(new_order)), new_order)
772768
return self._finalize_indexing_result(dims, data)
@@ -791,6 +787,7 @@ def _getitem_with_mask(self, key, fill_value=dtypes.NA):
791787
dims, indexer, new_order = self._broadcast_indexes(key)
792788

793789
if self.size:
790+
794791
if is_duck_dask_array(self._data):
795792
# dask's indexing is faster this way; also vindex does not
796793
# support negative indices yet:
@@ -800,14 +797,8 @@ def _getitem_with_mask(self, key, fill_value=dtypes.NA):
800797
actual_indexer = indexer
801798

802799
indexable = as_indexable(self._data)
800+
data = indexing.apply_indexer(indexable, actual_indexer)
803801

804-
if isinstance(indexer, OuterIndexer):
805-
data = indexable.oindex[indexer]
806-
807-
elif isinstance(indexer, VectorizedIndexer):
808-
data = indexable.vindex[indexer]
809-
else:
810-
data = indexable[actual_indexer]
811802
mask = indexing.create_mask(indexer, self.shape, data)
812803
# we need to invert the mask in order to pass data first. This helps
813804
# pint to choose the correct unit

xarray/tests/test_coding_strings.py

+1-1
Original file line numberDiff line numberDiff line change
@@ -181,7 +181,7 @@ def test_StackedBytesArray_vectorized_indexing() -> None:
181181

182182
V = IndexerMaker(indexing.VectorizedIndexer)
183183
indexer = V[np.array([[0, 1], [1, 0]])]
184-
actual = stacked[indexer]
184+
actual = stacked.vindex[indexer]
185185
assert_array_equal(actual, expected)
186186

187187

0 commit comments

Comments
 (0)