diff --git a/xarray/backends/common.py b/xarray/backends/common.py index 58a98598a5b..4775eedde32 100644 --- a/xarray/backends/common.py +++ b/xarray/backends/common.py @@ -31,6 +31,7 @@ if TYPE_CHECKING: from xarray.core.dataset import Dataset from xarray.core.types import NestedSequence + from xarray.namedarray._typing import _OuterIndexerKey, _VectorizedIndexerKey T_Name = Union[Hashable, None] @@ -268,11 +269,35 @@ def robust_getitem(array, key, catch=Exception, max_retries=6, initial_delay=500 class BackendArray(NdimSizeLenMixin, indexing.ExplicitlyIndexed): - __slots__ = () - def get_duck_array(self, dtype: np.typing.DTypeLike = None): key = indexing.BasicIndexer((slice(None),) * self.ndim) - return self[key] # type: ignore[index] + return self[key] # type: ignore [index] + + +class NewBackendArray(NdimSizeLenMixin, indexing.ExplicitlyIndexed): + __slots__ = ("indexing_support",) + + def get_duck_array(self, dtype: np.typing.DTypeLike = None): + key = (slice(None),) * self.ndim + return self[key] # type: ignore [index] + + def _oindex_get(self, key: _OuterIndexerKey) -> Any: + raise NotImplementedError( + f"{self.__class__.__name__}._oindex_get method should be overridden" + ) + + def _vindex_get(self, key: _VectorizedIndexerKey) -> Any: + raise NotImplementedError( + f"{self.__class__.__name__}._vindex_get method should be overridden" + ) + + @property + def oindex(self) -> indexing.IndexCallable: + return indexing.IndexCallable(self._oindex_get) + + @property + def vindex(self) -> indexing.IndexCallable: + return indexing.IndexCallable(self._vindex_get) class AbstractDataStore: diff --git a/xarray/backends/h5netcdf_.py b/xarray/backends/h5netcdf_.py index 717ee48db3b..e827ecbae6d 100644 --- a/xarray/backends/h5netcdf_.py +++ b/xarray/backends/h5netcdf_.py @@ -44,16 +44,33 @@ from xarray.core.dataset import Dataset from xarray.core.datatree import DataTree from xarray.core.types import ReadBuffer + from xarray.namedarray._typing import ( + _BasicIndexerKey, + _OuterIndexerKey, + _VectorizedIndexerKey, + ) class H5NetCDFArrayWrapper(BaseNetCDF4Array): + indexing_support = indexing.IndexingSupport.OUTER_1VECTOR + def get_array(self, needs_lock=True): ds = self.datastore._acquire(needs_lock) return ds.variables[self.variable_name] - def __getitem__(self, key): - return indexing.explicit_indexing_adapter( - key, self.shape, indexing.IndexingSupport.OUTER_1VECTOR, self._getitem + def _oindex_get(self, key: _OuterIndexerKey) -> Any: + return indexing.outer_indexing_adapter( + key, self.shape, self.indexing_support, self._getitem + ) + + def _vindex_get(self, key: _VectorizedIndexerKey) -> Any: + return indexing.vectorized_indexing_adapter( + key, self.shape, self.indexing_support, self._getitem + ) + + def __getitem__(self, key: _BasicIndexerKey) -> Any: + return indexing.basic_indexing_adapter( + key, self.shape, self.indexing_support, self._getitem ) def _getitem(self, key): diff --git a/xarray/backends/netCDF4_.py b/xarray/backends/netCDF4_.py index a23d247b6c3..8130c264021 100644 --- a/xarray/backends/netCDF4_.py +++ b/xarray/backends/netCDF4_.py @@ -12,8 +12,8 @@ from xarray import coding from xarray.backends.common import ( BACKEND_ENTRYPOINTS, - BackendArray, BackendEntrypoint, + NewBackendArray, WritableCFDataStore, _normalize_path, datatree_from_dict_with_io_cleanup, @@ -48,6 +48,11 @@ from xarray.core.dataset import Dataset from xarray.core.datatree import DataTree from xarray.core.types import ReadBuffer + from xarray.namedarray._typing import ( + _BasicIndexerKey, + _OuterIndexerKey, + _VectorizedIndexerKey, + ) # This lookup table maps from dtype.byteorder to a readable endian # string used by netCDF4. @@ -56,7 +61,7 @@ NETCDF4_PYTHON_LOCK = combine_locks([NETCDFC_LOCK, HDF5_LOCK]) -class BaseNetCDF4Array(BackendArray): +class BaseNetCDF4Array(NewBackendArray): __slots__ = ("datastore", "dtype", "shape", "variable_name") def __init__(self, variable_name, datastore): @@ -88,7 +93,7 @@ def get_array(self, needs_lock=True): class NetCDF4ArrayWrapper(BaseNetCDF4Array): - __slots__ = () + indexing_support = indexing.IndexingSupport.OUTER def get_array(self, needs_lock=True): ds = self.datastore._acquire(needs_lock) @@ -99,9 +104,19 @@ def get_array(self, needs_lock=True): variable.set_auto_chartostring(False) return variable - def __getitem__(self, key): - return indexing.explicit_indexing_adapter( - key, self.shape, indexing.IndexingSupport.OUTER, self._getitem + def _oindex_get(self, key: _OuterIndexerKey): + return indexing.outer_indexing_adapter( + key, self.shape, self.indexing_support, self._getitem + ) + + def _vindex_get(self, key: _VectorizedIndexerKey): + return indexing.vectorized_indexing_adapter( + key, self.shape, self.indexing_support, self._getitem + ) + + def __getitem__(self, key: _BasicIndexerKey): + return indexing.basic_indexing_adapter( + key, self.shape, self.indexing_support, self._getitem ) def _getitem(self, key): diff --git a/xarray/backends/pydap_.py b/xarray/backends/pydap_.py index 74ddbc8443b..2ec260a3d11 100644 --- a/xarray/backends/pydap_.py +++ b/xarray/backends/pydap_.py @@ -8,8 +8,8 @@ from xarray.backends.common import ( BACKEND_ENTRYPOINTS, AbstractDataStore, - BackendArray, BackendEntrypoint, + NewBackendArray, robust_getitem, ) from xarray.backends.store import StoreBackendEntrypoint @@ -29,10 +29,17 @@ from xarray.core.dataset import Dataset from xarray.core.types import ReadBuffer + from xarray.namedarray._typing import ( + _BasicIndexerKey, + _OuterIndexerKey, + _VectorizedIndexerKey, + ) -class PydapArrayWrapper(BackendArray): - def __init__(self, array): +class PydapArrayWrapper(NewBackendArray): + indexing_support = indexing.IndexingSupport.BASIC + + def __init__(self, array) -> None: self.array = array @property @@ -43,9 +50,19 @@ def shape(self) -> tuple[int, ...]: def dtype(self): return self.array.dtype - def __getitem__(self, key): - return indexing.explicit_indexing_adapter( - key, self.shape, indexing.IndexingSupport.BASIC, self._getitem + def _oindex_get(self, key: _OuterIndexerKey) -> Any: + return indexing.outer_indexing_adapter( + key, self.shape, self.indexing_support, self._getitem + ) + + def _vindex_get(self, key: _VectorizedIndexerKey) -> Any: + return indexing.vectorized_indexing_adapter( + key, self.shape, self.indexing_support, self._getitem + ) + + def __getitem__(self, key: _BasicIndexerKey) -> Any: + return indexing.basic_indexing_adapter( + key, self.shape, self.indexing_support, self._getitem ) def _getitem(self, key): diff --git a/xarray/backends/scipy_.py b/xarray/backends/scipy_.py index 93d0e40a6e1..1793f619a85 100644 --- a/xarray/backends/scipy_.py +++ b/xarray/backends/scipy_.py @@ -10,8 +10,8 @@ from xarray.backends.common import ( BACKEND_ENTRYPOINTS, - BackendArray, BackendEntrypoint, + NewBackendArray, WritableCFDataStore, _normalize_path, ) @@ -37,6 +37,11 @@ from xarray.backends.common import AbstractDataStore from xarray.core.dataset import Dataset from xarray.core.types import ReadBuffer + from xarray.namedarray._typing import ( + _BasicIndexerKey, + _OuterIndexerKey, + _VectorizedIndexerKey, + ) HAS_NUMPY_2_0 = module_available("numpy", minversion="2.0.0.dev0") @@ -54,7 +59,9 @@ def _decode_attrs(d): return {k: v if k == "_FillValue" else _decode_string(v) for (k, v) in d.items()} -class ScipyArrayWrapper(BackendArray): +class ScipyArrayWrapper(NewBackendArray): + indexing_support = indexing.IndexingSupport.OUTER_1VECTOR + def __init__(self, variable_name, datastore): self.datastore = datastore self.variable_name = variable_name @@ -66,15 +73,7 @@ def get_variable(self, needs_lock=True): ds = self.datastore._manager.acquire(needs_lock) return ds.variables[self.variable_name] - def _getitem(self, key): - with self.datastore.lock: - data = self.get_variable(needs_lock=False).data - return data[key] - - def __getitem__(self, key): - data = indexing.explicit_indexing_adapter( - key, self.shape, indexing.IndexingSupport.OUTER_1VECTOR, self._getitem - ) + def _finalize_result(self, data): # Copy data if the source file is mmapped. This makes things consistent # with the netCDF4 library by ensuring we can safely read arrays even # after closing associated files. @@ -87,7 +86,30 @@ def __getitem__(self, key): return np.array(data, dtype=self.dtype, copy=copy) - def __setitem__(self, key, value): + def _getitem(self, key): + with self.datastore.lock: + data = self.get_variable(needs_lock=False).data + return data[key] + + def _vindex_get(self, key: _VectorizedIndexerKey) -> Any: + data = indexing.vectorized_indexing_adapter( + key, self.shape, self.indexing_support, self._getitem + ) + return self._finalize_result(data) + + def _oindex_get(self, key: _OuterIndexerKey) -> Any: + data = indexing.outer_indexing_adapter( + key, self.shape, self.indexing_support, self._getitem + ) + return self._finalize_result(data) + + def __getitem__(self, key: _BasicIndexerKey) -> Any: + data = indexing.basic_indexing_adapter( + key, self.shape, self.indexing_support, self._getitem + ) + return self._finalize_result(data) + + def __setitem__(self, key, value) -> None: with self.datastore.lock: data = self.get_variable(needs_lock=False) try: diff --git a/xarray/backends/zarr.py b/xarray/backends/zarr.py index e83f5556369..e79cb488ac5 100644 --- a/xarray/backends/zarr.py +++ b/xarray/backends/zarr.py @@ -14,8 +14,8 @@ from xarray.backends.common import ( BACKEND_ENTRYPOINTS, AbstractWritableDataStore, - BackendArray, BackendEntrypoint, + NewBackendArray, _encode_variable_name, _normalize_path, datatree_from_dict_with_io_cleanup, @@ -42,6 +42,11 @@ from xarray.core.dataset import Dataset from xarray.core.datatree import DataTree from xarray.core.types import ReadBuffer, ZarrArray, ZarrGroup + from xarray.namedarray._typing import ( + _BasicIndexerKey, + _OuterIndexerKey, + _VectorizedIndexerKey, + ) def _get_mappers(*, storage_options, store, chunk_store): @@ -179,8 +184,8 @@ def encode_zarr_attr_value(value): return encoded -class ZarrArrayWrapper(BackendArray): - __slots__ = ("_array", "dtype", "shape") +class ZarrArrayWrapper(NewBackendArray): + indexing_support = indexing.IndexingSupport.VECTORIZED def __init__(self, zarr_array): # some callers attempt to evaluate an array if an `array` property exists on the object. @@ -203,25 +208,28 @@ def __init__(self, zarr_array): def get_array(self): return self._array - def _oindex(self, key): - return self._array.oindex[key] - - def _vindex(self, key): - return self._array.vindex[key] - - def _getitem(self, key): - return self._array[key] - - def __getitem__(self, key): - array = self._array - if isinstance(key, indexing.BasicIndexer): - method = self._getitem - elif isinstance(key, indexing.VectorizedIndexer): - method = self._vindex - elif isinstance(key, indexing.OuterIndexer): - method = self._oindex - return indexing.explicit_indexing_adapter( - key, array.shape, indexing.IndexingSupport.VECTORIZED, method + def _oindex_get(self, key: _OuterIndexerKey) -> Any: + def raw_indexing_method(key): + return self._array.oindex[key] + + return indexing.outer_indexing_adapter( + key, self._array.shape, self.indexing_support, raw_indexing_method + ) + + def _vindex_get(self, key: _VectorizedIndexerKey) -> Any: + def raw_indexing_method(key): + return self._array.vindex[key] + + return indexing.vectorized_indexing_adapter( + key, self._array.shape, self.indexing_support, raw_indexing_method + ) + + def __getitem__(self, key: _BasicIndexerKey) -> Any: + def raw_indexing_method(key): + return self._array[key] + + return indexing.basic_indexing_adapter( + key, self._array.shape, self.indexing_support, raw_indexing_method ) # if self.ndim == 0: diff --git a/xarray/coding/strings.py b/xarray/coding/strings.py index 4ca6a3f0a46..37b9e548e0f 100644 --- a/xarray/coding/strings.py +++ b/xarray/coding/strings.py @@ -3,6 +3,7 @@ from __future__ import annotations from functools import partial +from typing import TYPE_CHECKING import numpy as np @@ -22,6 +23,13 @@ HAS_NUMPY_2_0 = module_available("numpy", minversion="2.0.0.dev0") +if TYPE_CHECKING: + from xarray.namedarray._typing import ( + _BasicIndexerKey, + _OuterIndexerKey, + _VectorizedIndexerKey, + ) + def create_vlen_dtype(element_type): if element_type not in (str, bytes): @@ -220,8 +228,7 @@ class StackedBytesArray(indexing.ExplicitlyIndexedNDArrayMixin): """Wrapper around array-like objects to create a new indexable object where values, when accessed, are automatically stacked along the last dimension. - >>> indexer = indexing.BasicIndexer((slice(None),)) - >>> StackedBytesArray(np.array(["a", "b", "c"], dtype="S1"))[indexer] + >>> StackedBytesArray(np.array(["a", "b", "c"], dtype="S1"))[(slice(None),)] array(b'abc', dtype='|S3') """ @@ -240,7 +247,7 @@ def __init__(self, array): @property def dtype(self): - return np.dtype("S" + str(self.array.shape[-1])) + return np.dtype(f"S{self.array.shape[-1]!s}") @property def shape(self) -> tuple[int, ...]: @@ -249,15 +256,15 @@ def shape(self) -> tuple[int, ...]: def __repr__(self): return f"{type(self).__name__}({self.array!r})" - def _vindex_get(self, key): + def _vindex_get(self, key: _VectorizedIndexerKey): return _numpy_char_to_bytes(self.array.vindex[key]) - def _oindex_get(self, key): + def _oindex_get(self, key: _OuterIndexerKey): return _numpy_char_to_bytes(self.array.oindex[key]) - def __getitem__(self, key): + def __getitem__(self, key: _BasicIndexerKey): # require slicing the last dimension completely - key = type(key)(indexing.expanded_indexer(key.tuple, self.array.ndim)) - if key.tuple[-1] != slice(None): + indexer = indexing.expanded_indexer(key, self.array.ndim) + if indexer[-1] != slice(None): raise IndexError("too many indices") - return _numpy_char_to_bytes(self.array[key]) + return _numpy_char_to_bytes(self.array[indexer]) diff --git a/xarray/coding/variables.py b/xarray/coding/variables.py index 83112628dbb..ef794ec763a 100644 --- a/xarray/coding/variables.py +++ b/xarray/coding/variables.py @@ -99,8 +99,7 @@ class NativeEndiannessArray(indexing.ExplicitlyIndexedNDArrayMixin): >>> NativeEndiannessArray(x).dtype dtype('int16') - >>> indexer = indexing.BasicIndexer((slice(None),)) - >>> NativeEndiannessArray(x)[indexer].dtype + >>> NativeEndiannessArray(x)[(slice(None),)].dtype dtype('int16') """ @@ -137,8 +136,7 @@ class BoolTypeArray(indexing.ExplicitlyIndexedNDArrayMixin): >>> BoolTypeArray(x).dtype dtype('bool') - >>> indexer = indexing.BasicIndexer((slice(None),)) - >>> BoolTypeArray(x)[indexer].dtype + >>> BoolTypeArray(x)[(slice(None),)].dtype dtype('bool') """ diff --git a/xarray/core/indexing.py b/xarray/core/indexing.py index 51fc4a00421..3fda88956d1 100644 --- a/xarray/core/indexing.py +++ b/xarray/core/indexing.py @@ -38,7 +38,17 @@ from xarray.core.indexes import Index from xarray.core.types import Self from xarray.core.variable import Variable - from xarray.namedarray._typing import _Shape, duckarray + from xarray.namedarray._typing import ( + _BasicIndexerKey, + _Chunks, + _IndexerKey, + _IndexKey, + _IndexKeys, + _OuterIndexerKey, + _Shape, + _VectorizedIndexerKey, + duckarray, + ) from xarray.namedarray.parallelcompat import ChunkManagerEntrypoint @@ -209,7 +219,7 @@ def map_index_queries( return merged -def expanded_indexer(key, ndim): +def expanded_indexer(key: _IndexerKey | _IndexKeys, ndim: int) -> _IndexKeys: """Given a key for indexing an ndarray, return an equivalent key which is a tuple with length equal to the number of dimensions. @@ -220,14 +230,14 @@ def expanded_indexer(key, ndim): if not isinstance(key, tuple): # numpy treats non-tuple keys equivalent to tuples of length 1 key = (key,) - new_key = [] + new_key: list[_IndexKey] = [] # handling Ellipsis right is a little tricky, see: # https://numpy.org/doc/stable/reference/arrays.indexing.html#advanced-indexing found_ellipsis = False for k in key: if k is Ellipsis: if not found_ellipsis: - new_key.extend((ndim + 1 - len(key)) * [slice(None)]) + new_key.extend([slice(None)] * (ndim + 1 - len(key))) found_ellipsis = True else: new_key.append(slice(None)) @@ -235,7 +245,7 @@ def expanded_indexer(key, ndim): new_key.append(k) if len(new_key) > ndim: raise IndexError("too many indices") - new_key.extend((ndim - len(new_key)) * [slice(None)]) + new_key.extend([slice(None)] * (ndim - len(new_key))) return tuple(new_key) @@ -298,7 +308,7 @@ def slice_slice(old_slice: slice, applied_slice: slice, size: int) -> slice: return slice(start, stop, step) -def _index_indexer_1d(old_indexer, applied_indexer, size: int): +def _index_indexer_1d(old_indexer: Any, applied_indexer: Any, size: int) -> Any: if isinstance(applied_indexer, slice) and applied_indexer == slice(None): # shortcut for the usual case return old_indexer @@ -327,13 +337,13 @@ class ExplicitIndexer: __slots__ = ("_key",) - def __init__(self, key: tuple[Any, ...]): + def __init__(self, key: _IndexerKey): if type(self) is ExplicitIndexer: raise TypeError("cannot instantiate base ExplicitIndexer objects") self._key = tuple(key) @property - def tuple(self) -> tuple[Any, ...]: + def tuple(self) -> _IndexerKey: return self._key def __repr__(self) -> str: @@ -387,11 +397,11 @@ class BasicIndexer(ExplicitIndexer): __slots__ = () - def __init__(self, key: tuple[int | np.integer | slice, ...]): + def __init__(self, key: _BasicIndexerKey): if not isinstance(key, tuple): raise TypeError(f"key must be a tuple: {key!r}") - new_key = [] + new_key: tuple[int | np.integer | slice, ...] = () for k in key: if isinstance(k, integer_types): k = int(k) @@ -401,9 +411,9 @@ def __init__(self, key: tuple[int | np.integer | slice, ...]): raise TypeError( f"unexpected indexer type for {type(self).__name__}: {k!r}" ) - new_key.append(k) + new_key += (k,) - super().__init__(tuple(new_key)) + super().__init__(new_key) class OuterIndexer(ExplicitIndexer): @@ -419,14 +429,14 @@ class OuterIndexer(ExplicitIndexer): def __init__( self, - key: tuple[ - int | np.integer | slice | np.ndarray[Any, np.dtype[np.generic]], ... - ], + key: _OuterIndexerKey, ): if not isinstance(key, tuple): raise TypeError(f"key must be a tuple: {key!r}") - new_key = [] + new_key: tuple[ + int | np.integer | slice | np.ndarray[Any, np.dtype[np.generic]], ... + ] = () for k in key: if isinstance(k, integer_types): k = int(k) @@ -447,9 +457,9 @@ def __init__( raise TypeError( f"unexpected indexer type for {type(self).__name__}: {k!r}" ) - new_key.append(k) + new_key += (k,) - super().__init__(tuple(new_key)) + super().__init__(new_key) class VectorizedIndexer(ExplicitIndexer): @@ -464,11 +474,11 @@ class VectorizedIndexer(ExplicitIndexer): __slots__ = () - def __init__(self, key: tuple[slice | np.ndarray[Any, np.dtype[np.generic]], ...]): + def __init__(self, key: _VectorizedIndexerKey): if not isinstance(key, tuple): raise TypeError(f"key must be a tuple: {key!r}") - new_key = [] + new_key: tuple[slice | np.ndarray[Any, np.dtype[np.integer]], ...] = () ndim = None for k in key: if isinstance(k, slice): @@ -491,9 +501,9 @@ def __init__(self, key: tuple[slice | np.ndarray[Any, np.dtype[np.generic]], ... raise TypeError( f"unexpected indexer type for {type(self).__name__}: {k!r}" ) - new_key.append(k) + new_key += (k,) - super().__init__(tuple(new_key)) + super().__init__(new_key) class ExplicitlyIndexed: @@ -517,26 +527,25 @@ def get_duck_array(self): class ExplicitlyIndexedNDArrayMixin(NDArrayMixin, ExplicitlyIndexed): __slots__ = () - def get_duck_array(self): - key = BasicIndexer((slice(None),) * self.ndim) - return self[key] + def get_duck_array(self) -> Any: + return self[(slice(None),) * self.ndim] - def _oindex_get(self, indexer: OuterIndexer): + def _oindex_get(self, indexer: _OuterIndexerKey) -> Any: raise NotImplementedError( f"{self.__class__.__name__}._oindex_get method should be overridden" ) - def _vindex_get(self, indexer: VectorizedIndexer): + def _vindex_get(self, indexer: _VectorizedIndexerKey) -> Any: raise NotImplementedError( f"{self.__class__.__name__}._vindex_get method should be overridden" ) - def _oindex_set(self, indexer: OuterIndexer, value: Any) -> None: + def _oindex_set(self, indexer: _OuterIndexerKey, value: Any) -> None: raise NotImplementedError( f"{self.__class__.__name__}._oindex_set method should be overridden" ) - def _vindex_set(self, indexer: VectorizedIndexer, value: Any) -> None: + def _vindex_set(self, indexer: _VectorizedIndexerKey, value: Any) -> None: raise NotImplementedError( f"{self.__class__.__name__}._vindex_set method should be overridden" ) @@ -574,12 +583,12 @@ def __array__( else: return np.asarray(self.get_duck_array(), dtype=dtype) - def get_duck_array(self): + def get_duck_array(self) -> Any: return self.array.get_duck_array() - def __getitem__(self, key: Any): - key = expanded_indexer(key, self.ndim) - indexer = self.indexer_cls(key) + def __getitem__(self, key) -> Any: + _key = expanded_indexer(key, self.ndim) + indexer = self.indexer_cls(_key) result = apply_indexer(self.array, indexer) @@ -591,6 +600,14 @@ def __getitem__(self, key: Any): return result +BackendArray_fallback_warning_message = ( + "The array `{0}` does not support indexing using the .vindex and .oindex properties. " + "The __getitem__ method is being used instead. This fallback behavior will be " + "removed in a future version. Please ensure that the backend array `{0}` implements " + "support for the .vindex and .oindex properties to avoid potential issues." +) + + class LazilyIndexedArray(ExplicitlyIndexedNDArrayMixin): """Wrap an array to make basic and outer indexing lazy.""" @@ -625,33 +642,31 @@ def __init__(self, array: Any, key: ExplicitIndexer | None = None): shape += (k.size,) self._shape = shape - def _updated_key(self, new_key: ExplicitIndexer) -> BasicIndexer | OuterIndexer: - iter_new_key = iter(expanded_indexer(new_key.tuple, self.ndim)) - full_key = [] + def _updated_key( + self, new_key: ExplicitIndexer | _IndexerKey + ) -> BasicIndexer | OuterIndexer: + _new_key_tuple = ( + new_key.tuple if isinstance(new_key, ExplicitIndexer) else new_key + ) + iter_new_key = iter(expanded_indexer(_new_key_tuple, self.ndim)) + full_key: tuple[int | np.integer, ...] = () for size, k in zip(self.array.shape, self.key.tuple, strict=True): if isinstance(k, integer_types): - full_key.append(k) + full_key += (k,) else: - full_key.append(_index_indexer_1d(k, next(iter_new_key), size)) - full_key_tuple = tuple(full_key) + full_key += (_index_indexer_1d(k, next(iter_new_key), size),) - if all(isinstance(k, integer_types + (slice,)) for k in full_key_tuple): - return BasicIndexer(full_key_tuple) - return OuterIndexer(full_key_tuple) + if all(isinstance(k, integer_types + (slice,)) for k in full_key): + return BasicIndexer(full_key) + return OuterIndexer(full_key) @property def shape(self) -> _Shape: return self._shape - def get_duck_array(self): - if isinstance(self.array, ExplicitlyIndexedNDArrayMixin): - array = apply_indexer(self.array, self.key) - else: - # If the array is not an ExplicitlyIndexedNDArrayMixin, - # it may wrap a BackendArray so use its __getitem__ - array = self.array[self.key] - - # self.array[self.key] is now a numpy array when + def get_duck_array(self) -> Any: + array = apply_indexer(self.array, self.key) + # array[self.key] is now a numpy array when # self.array is a BackendArray subclass # and self.key is BasicIndexer((slice(None, None, None),)) # so we need the explicit check for ExplicitlyIndexed @@ -659,34 +674,32 @@ def get_duck_array(self): array = array.get_duck_array() return _wrap_numpy_scalars(array) - def transpose(self, order): + def transpose(self, order) -> Any: return LazilyVectorizedIndexedArray(self.array, self.key).transpose(order) - def _oindex_get(self, indexer: OuterIndexer): + def _oindex_get(self, indexer: _OuterIndexerKey) -> LazilyIndexedArray: return type(self)(self.array, self._updated_key(indexer)) - def _vindex_get(self, indexer: VectorizedIndexer): + def _vindex_get(self, indexer: _VectorizedIndexerKey) -> Any: array = LazilyVectorizedIndexedArray(self.array, self.key) return array.vindex[indexer] - def __getitem__(self, indexer: ExplicitIndexer): - self._check_and_raise_if_non_basic_indexer(indexer) + def __getitem__(self, indexer: _BasicIndexerKey) -> LazilyIndexedArray: return type(self)(self.array, self._updated_key(indexer)) - def _vindex_set(self, key: VectorizedIndexer, value: Any) -> None: + def _vindex_set(self, key: _VectorizedIndexerKey, value: Any) -> None: raise NotImplementedError( "Lazy item assignment with the vectorized indexer is not yet " "implemented. Load your data first by .load() or compute()." ) - def _oindex_set(self, key: OuterIndexer, value: Any) -> None: - full_key = self._updated_key(key) - self.array.oindex[full_key] = value + def _oindex_set(self, key: _OuterIndexerKey, value: Any) -> None: + full_key = self._updated_key(OuterIndexer(key)) + self.array.oindex[full_key.tuple] = value - def __setitem__(self, key: BasicIndexer, value: Any) -> None: - self._check_and_raise_if_non_basic_indexer(key) - full_key = self._updated_key(key) - self.array[full_key] = value + def __setitem__(self, key: _BasicIndexerKey, value: Any) -> None: + full_key = self._updated_key(BasicIndexer(key)) + self.array[full_key.tuple] = value def __repr__(self) -> str: return f"{type(self).__name__}(array={self.array!r}, key={self.key!r})" @@ -719,14 +732,10 @@ def __init__(self, array: duckarray[Any, Any], key: ExplicitIndexer): def shape(self) -> _Shape: return np.broadcast(*self.key.tuple).shape - def get_duck_array(self): - if isinstance(self.array, ExplicitlyIndexedNDArrayMixin): - array = apply_indexer(self.array, self.key) - else: - # If the array is not an ExplicitlyIndexedNDArrayMixin, - # it may wrap a BackendArray so use its __getitem__ - array = self.array[self.key] - # self.array[self.key] is now a numpy array when + def get_duck_array(self) -> Any: + array = apply_indexer(self.array, self.key) + + # array is now a numpy array when # self.array is a BackendArray subclass # and self.key is BasicIndexer((slice(None, None, None),)) # so we need the explicit check for ExplicitlyIndexed @@ -734,28 +743,29 @@ def get_duck_array(self): array = array.get_duck_array() return _wrap_numpy_scalars(array) - def _updated_key(self, new_key: ExplicitIndexer): + def _updated_key(self, new_key: ExplicitIndexer) -> VectorizedIndexer: return _combine_indexers(self.key, self.shape, new_key) - def _oindex_get(self, indexer: OuterIndexer): - return type(self)(self.array, self._updated_key(indexer)) + def _oindex_get(self, indexer: _OuterIndexerKey) -> LazilyVectorizedIndexedArray: + return type(self)(self.array, self._updated_key(OuterIndexer(indexer))) - def _vindex_get(self, indexer: VectorizedIndexer): - return type(self)(self.array, self._updated_key(indexer)) + def _vindex_get( + self, indexer: _VectorizedIndexerKey + ) -> LazilyVectorizedIndexedArray: + return type(self)(self.array, self._updated_key(VectorizedIndexer(indexer))) - def __getitem__(self, indexer: ExplicitIndexer): - self._check_and_raise_if_non_basic_indexer(indexer) + def __getitem__(self, indexer: _BasicIndexerKey) -> Any: # If the indexed array becomes a scalar, return LazilyIndexedArray - if all(isinstance(ind, integer_types) for ind in indexer.tuple): - key = BasicIndexer(tuple(k[indexer.tuple] for k in self.key.tuple)) + if all(isinstance(ind, integer_types) for ind in indexer): + key = BasicIndexer(tuple(k[indexer] for k in self.key.tuple)) return LazilyIndexedArray(self.array, key) - return type(self)(self.array, self._updated_key(indexer)) + return type(self)(self.array, self._updated_key(BasicIndexer(indexer))) - def transpose(self, order): + def transpose(self, order) -> LazilyVectorizedIndexedArray: key = VectorizedIndexer(tuple(k.transpose(order) for k in self.key.tuple)) return type(self)(self.array, key) - def __setitem__(self, indexer: ExplicitIndexer, value: Any) -> None: + def __setitem__(self, indexer: _BasicIndexerKey, value: Any) -> None: raise NotImplementedError( "Lazy item assignment with the vectorized indexer is not yet " "implemented. Load your data first by .load() or compute()." @@ -780,42 +790,39 @@ def __init__(self, array: duckarray[Any, Any]): self.array = as_indexable(array) self._copied = False - def _ensure_copied(self): + def _ensure_copied(self) -> None: if not self._copied: self.array = as_indexable(np.array(self.array)) self._copied = True - def get_duck_array(self): + def get_duck_array(self) -> Any: return self.array.get_duck_array() - def _oindex_get(self, indexer: OuterIndexer): + def _oindex_get(self, indexer: _OuterIndexerKey) -> CopyOnWriteArray: return type(self)(_wrap_numpy_scalars(self.array.oindex[indexer])) - def _vindex_get(self, indexer: VectorizedIndexer): + def _vindex_get(self, indexer: _VectorizedIndexerKey) -> CopyOnWriteArray: return type(self)(_wrap_numpy_scalars(self.array.vindex[indexer])) - def __getitem__(self, indexer: ExplicitIndexer): - self._check_and_raise_if_non_basic_indexer(indexer) + def __getitem__(self, indexer: _BasicIndexerKey) -> CopyOnWriteArray: return type(self)(_wrap_numpy_scalars(self.array[indexer])) - def transpose(self, order): + def transpose(self, order) -> Any: return self.array.transpose(order) - def _vindex_set(self, indexer: VectorizedIndexer, value: Any) -> None: + def _vindex_set(self, indexer: _VectorizedIndexerKey, value: Any) -> None: self._ensure_copied() self.array.vindex[indexer] = value - def _oindex_set(self, indexer: OuterIndexer, value: Any) -> None: + def _oindex_set(self, indexer: _OuterIndexerKey, value: Any) -> None: self._ensure_copied() self.array.oindex[indexer] = value - def __setitem__(self, indexer: ExplicitIndexer, value: Any) -> None: - self._check_and_raise_if_non_basic_indexer(indexer) + def __setitem__(self, indexer: _BasicIndexerKey, value: Any) -> None: self._ensure_copied() - self.array[indexer] = value - def __deepcopy__(self, memo): + def __deepcopy__(self, memo) -> CopyOnWriteArray: # CopyOnWriteArray is used to wrap backend array objects, which might # point to files on disk, so we can't rely on the default deepcopy # implementation. @@ -828,38 +835,41 @@ class MemoryCachedArray(ExplicitlyIndexedNDArrayMixin): def __init__(self, array): self.array = _wrap_numpy_scalars(as_indexable(array)) - def _ensure_cached(self): + def _ensure_cached(self) -> None: self.array = as_indexable(self.array.get_duck_array()) - def get_duck_array(self): + def __array__( + self, dtype: np.typing.DTypeLike = None, /, *, copy: bool | None = None + ) -> np.ndarray: + return np.asarray(self.get_duck_array(), dtype=dtype) + + def get_duck_array(self) -> Any: self._ensure_cached() return self.array.get_duck_array() - def _oindex_get(self, indexer: OuterIndexer): + def _oindex_get(self, indexer: _OuterIndexerKey) -> MemoryCachedArray: return type(self)(_wrap_numpy_scalars(self.array.oindex[indexer])) - def _vindex_get(self, indexer: VectorizedIndexer): + def _vindex_get(self, indexer: _VectorizedIndexerKey) -> MemoryCachedArray: return type(self)(_wrap_numpy_scalars(self.array.vindex[indexer])) - def __getitem__(self, indexer: ExplicitIndexer): - self._check_and_raise_if_non_basic_indexer(indexer) + def __getitem__(self, indexer: _BasicIndexerKey) -> MemoryCachedArray: return type(self)(_wrap_numpy_scalars(self.array[indexer])) - def transpose(self, order): + def transpose(self, order) -> Any: return self.array.transpose(order) - def _vindex_set(self, indexer: VectorizedIndexer, value: Any) -> None: + def _vindex_set(self, indexer: _VectorizedIndexerKey, value: Any) -> None: self.array.vindex[indexer] = value - def _oindex_set(self, indexer: OuterIndexer, value: Any) -> None: + def _oindex_set(self, indexer: _OuterIndexerKey, value: Any) -> None: self.array.oindex[indexer] = value - def __setitem__(self, indexer: ExplicitIndexer, value: Any) -> None: - self._check_and_raise_if_non_basic_indexer(indexer) + def __setitem__(self, indexer: _BasicIndexerKey, value: Any) -> None: self.array[indexer] = value -def as_indexable(array): +def as_indexable(array: Any): """ This function always returns a ExplicitlyIndexed subclass, so that the vectorized indexing is always possible with the returned @@ -904,21 +914,23 @@ def _outer_to_vectorized_indexer( n_dim = len([k for k in key if not isinstance(k, integer_types)]) i_dim = 0 - new_key = [] + new_key: tuple[slice | np.ndarray[Any, np.dtype[np.integer]], ...] = () for k, size in zip(key, shape, strict=True): if isinstance(k, integer_types): - new_key.append(np.array(k).reshape((1,) * n_dim)) + new_key += (np.array(k).reshape((1,) * n_dim),) else: # np.ndarray or slice if isinstance(k, slice): k = np.arange(*k.indices(size)) assert k.dtype.kind in {"i", "u"} new_shape = [(1,) * i_dim + (k.size,) + (1,) * (n_dim - i_dim - 1)] - new_key.append(k.reshape(*new_shape)) + new_key += (k.reshape(*new_shape),) i_dim += 1 - return VectorizedIndexer(tuple(new_key)) + return VectorizedIndexer(new_key) -def _outer_to_numpy_indexer(indexer: BasicIndexer | OuterIndexer, shape: _Shape): +def _outer_to_numpy_indexer( + indexer: BasicIndexer | OuterIndexer, shape: _Shape +) -> tuple[Any, ...]: """Convert an OuterIndexer into an indexer for NumPy. Parameters @@ -982,6 +994,118 @@ class IndexingSupport(enum.Enum): VECTORIZED = 3 +def _finish_indexing( + raw_indexing_method: Callable[..., Any], + *, + raw_key, + numpy_indices, +) -> Any: + result = raw_indexing_method(raw_key.tuple) + if numpy_indices.tuple: + result = apply_indexer(as_indexable(result), numpy_indices) + return result + + +def basic_indexing_adapter( + key: _BasicIndexerKey, + shape: _Shape, + indexing_support: IndexingSupport, + raw_indexing_method: Callable[..., Any], +) -> Any: + """Support explicit basic indexing by delegating to a raw indexing method. + + Outer and/or vectorized indexers are supported by indexing a second time + with a NumPy array. + + Parameters + ---------- + key : IndexerKey + Tuple indexer + shape : Tuple[int, ...] + Shape of the indexed array. + indexing_support : IndexingSupport enum + Form of indexing supported by raw_indexing_method. + raw_indexing_method : callable + Function (like ndarray.__getitem__) that when called with indexing key + in the form of a tuple returns an indexed array. + + Returns + ------- + Indexing result, in the form of a duck numpy-array. + """ + raw_key, numpy_indices = _decompose_outer_indexer( + BasicIndexer(key), shape, indexing_support + ) + return _finish_indexing( + raw_indexing_method, raw_key=raw_key, numpy_indices=numpy_indices + ) + + +def outer_indexing_adapter( + key: _OuterIndexerKey, + shape: _Shape, + indexing_support: IndexingSupport, + raw_indexing_method: Callable[..., Any], +) -> Any: + """Support explicit outer indexing by delegating to a raw indexing method. + + Parameters + ---------- + key : IndexerKey + tuple indexer + shape : Tuple[int, ...] + Shape of the indexed array. + indexing_support : IndexingSupport enum + Form of indexing supported by raw_indexing_method. + raw_indexing_method : callable + Function (like ndarray.__getitem__) that when called with indexing key + in the form of a tuple returns an indexed array. + + Returns + ------- + Indexing result, in the form of a duck numpy-array. + """ + raw_key, numpy_indices = _decompose_outer_indexer( + OuterIndexer(key), shape, indexing_support + ) + return _finish_indexing( + raw_indexing_method, raw_key=raw_key, numpy_indices=numpy_indices + ) + + +def vectorized_indexing_adapter( + key: _VectorizedIndexerKey, + shape: _Shape, + indexing_support: IndexingSupport, + raw_indexing_method: Callable[..., Any], +) -> Any: + """Support explicit vectorized indexing by delegating to a raw indexing method. + + Parameters + ---------- + key : IndexerKey + Explicit indexing object. + shape : Tuple[int, ...] + Shape of the indexed array. + indexing_support : IndexingSupport enum + Form of indexing supported by raw_indexing_method. + raw_indexing_method : callable + Function (like ndarray.__getitem__) that when called with indexing key + in the form of a tuple returns an indexed array. + + Returns + ------- + Indexing result, in the form of a duck numpy-array. + """ + raw_key, numpy_indices = _decompose_vectorized_indexer( + VectorizedIndexer(key), shape, indexing_support + ) + return _finish_indexing( + raw_indexing_method, raw_key=raw_key, numpy_indices=numpy_indices + ) + + +# TODO: deprecate and delete this method once it is no longer used externally def explicit_indexing_adapter( key: ExplicitIndexer, shape: _Shape, @@ -1009,35 +1133,60 @@ def explicit_indexing_adapter( ------- Indexing result, in the form of a duck numpy-array. """ + + # If the array is not an ExplicitlyIndexedNDArrayMixin, + # it may wrap a BackendArray subclass that doesn't implement .oindex and .vindex. so use its __getitem__ + # emit_user_level_warning( + # BackendArray_fallback_warning_message.format(""), + # category=PendingDeprecationWarning, + # ) raw_key, numpy_indices = decompose_indexer(key, shape, indexing_support) result = raw_indexing_method(raw_key.tuple) if numpy_indices.tuple: - # index the loaded np.ndarray indexable = NumpyIndexingAdapter(result) result = apply_indexer(indexable, numpy_indices) return result -def apply_indexer(indexable, indexer: ExplicitIndexer): +def apply_indexer( + indexable: ExplicitlyIndexedNDArrayMixin, indexer: ExplicitIndexer +) -> Any: """Apply an indexer to an indexable object.""" + if not hasattr(indexable, "vindex") and not hasattr(indexable, "oindex"): + # This path is used by Lazily*IndexedArray.get_duck_array() + # classname = type(indexable).__name__ + # If the array is not an ExplicitlyIndexedNDArrayMixin, + # it may wrap a BackendArray subclass that doesn't implement .oindex and .vindex. so use its __getitem__ + # emit_user_level_warning( + # BackendArray_fallback_warning_message.format(classname), + # category=PendingDeprecationWarning, + # ) + return indexable[indexer] + if isinstance(indexer, VectorizedIndexer): - return indexable.vindex[indexer] + return indexable.vindex[indexer.tuple] elif isinstance(indexer, OuterIndexer): - return indexable.oindex[indexer] + return indexable.oindex[indexer.tuple] + elif isinstance(indexer, BasicIndexer): + return indexable[indexer.tuple] else: - return indexable[indexer] + raise TypeError( + f"Received indexer of type {type(indexer)!r}. " + "Expected BasicIndexer, OuterIndexer, or VectorizedIndexer" + ) def set_with_indexer(indexable, indexer: ExplicitIndexer, value: Any) -> None: """Set values in an indexable object using an indexer.""" if isinstance(indexer, VectorizedIndexer): - indexable.vindex[indexer] = value + indexable.vindex[indexer.tuple] = value elif isinstance(indexer, OuterIndexer): - indexable.oindex[indexer] = value + indexable.oindex[indexer.tuple] = value else: - indexable[indexer] = value + indexable[indexer.tuple] = value +# TODO: delete this method once explicit_indexing_adapter is no longer used externally def decompose_indexer( indexer: ExplicitIndexer, shape: _Shape, indexing_support: IndexingSupport ) -> tuple[ExplicitIndexer, ExplicitIndexer]: @@ -1109,10 +1258,10 @@ def _decompose_vectorized_indexer( >>> array = np.arange(36).reshape(6, 6) >>> backend_indexer = OuterIndexer((np.array([0, 1, 3]), np.array([2, 3]))) >>> # load subslice of the array - ... array = NumpyIndexingAdapter(array).oindex[backend_indexer] + ... array = NumpyIndexingAdapter(array).oindex[backend_indexer.tuple] >>> np_indexer = VectorizedIndexer((np.array([0, 2, 1]), np.array([0, 1, 0]))) >>> # vectorized indexing for on-memory np.ndarray. - ... NumpyIndexingAdapter(array).vindex[np_indexer] + ... NumpyIndexingAdapter(array).vindex[np_indexer.tuple] array([ 2, 21, 8]) """ assert isinstance(indexer, VectorizedIndexer) @@ -1120,8 +1269,10 @@ def _decompose_vectorized_indexer( if indexing_support is IndexingSupport.VECTORIZED: return indexer, BasicIndexer(()) - backend_indexer_elems = [] - np_indexer_elems = [] + backend_indexer_elems: tuple[ + int | np.integer | slice | np.ndarray[Any, np.dtype[np.unsignedinteger]], ... + ] = () + np_indexer_elems: tuple[slice | np.ndarray[Any, np.dtype[np.integer]], ...] = () # convert negative indices indexer_elems = [ np.where(k < 0, k + s, k) if isinstance(k, np.ndarray) else k @@ -1134,17 +1285,17 @@ def _decompose_vectorized_indexer( # (but make its step positive) in the backend, # and then use all of it (slice(None)) for the in-memory portion. bk_slice, np_slice = _decompose_slice(k, s) - backend_indexer_elems.append(bk_slice) - np_indexer_elems.append(np_slice) + backend_indexer_elems += (bk_slice,) + np_indexer_elems += (np_slice,) else: # If it is a (multidimensional) np.ndarray, just pickup the used # keys without duplication and store them as a 1d-np.ndarray. oind, vind = np.unique(k, return_inverse=True) - backend_indexer_elems.append(oind) - np_indexer_elems.append(vind.reshape(*k.shape)) + backend_indexer_elems += (oind,) + np_indexer_elems += (vind.reshape(*k.shape),) - backend_indexer = OuterIndexer(tuple(backend_indexer_elems)) - np_indexer = VectorizedIndexer(tuple(np_indexer_elems)) + backend_indexer = OuterIndexer(backend_indexer_elems) + np_indexer = VectorizedIndexer(np_indexer_elems) if indexing_support is IndexingSupport.OUTER: return backend_indexer, np_indexer @@ -1191,42 +1342,42 @@ def _decompose_outer_indexer( >>> array = np.arange(36).reshape(6, 6) >>> backend_indexer = BasicIndexer((slice(0, 3), slice(2, 4))) >>> # load subslice of the array - ... array = NumpyIndexingAdapter(array)[backend_indexer] + ... array = NumpyIndexingAdapter(array)[backend_indexer.tuple] >>> np_indexer = OuterIndexer((np.array([0, 2, 1]), np.array([0, 1, 0]))) >>> # outer indexing for on-memory np.ndarray. - ... NumpyIndexingAdapter(array).oindex[np_indexer] + ... NumpyIndexingAdapter(array).oindex[np_indexer.tuple] array([[ 2, 3, 2], [14, 15, 14], [ 8, 9, 8]]) """ - backend_indexer: list[Any] = [] - np_indexer: list[Any] = [] + backend_indexer: tuple[Any, ...] = () + np_indexer: tuple[Any, ...] = () assert isinstance(indexer, OuterIndexer | BasicIndexer) if indexing_support == IndexingSupport.VECTORIZED: - for k, s in zip(indexer.tuple, shape, strict=False): + for k, s in zip(indexer.tuple, shape, strict=True): if isinstance(k, slice): # If it is a slice, then we will slice it as-is # (but make its step positive) in the backend, bk_slice, np_slice = _decompose_slice(k, s) - backend_indexer.append(bk_slice) - np_indexer.append(np_slice) + backend_indexer += (bk_slice,) + np_indexer += (np_slice,) else: - backend_indexer.append(k) + backend_indexer += (k,) if not is_scalar(k): - np_indexer.append(slice(None)) - return type(indexer)(tuple(backend_indexer)), BasicIndexer(tuple(np_indexer)) + np_indexer += (slice(None),) + return type(indexer)(backend_indexer), BasicIndexer(np_indexer) # make indexer positive - pos_indexer: list[np.ndarray | int | np.number] = [] - for k, s in zip(indexer.tuple, shape, strict=False): + pos_indexer: tuple[np.ndarray | int | np.number, ...] = () + for k, s in zip(indexer.tuple, shape, strict=True): if isinstance(k, np.ndarray): - pos_indexer.append(np.where(k < 0, k + s, k)) + pos_indexer += (np.where(k < 0, k + s, k),) elif isinstance(k, integer_types) and k < 0: - pos_indexer.append(k + s) + pos_indexer += (k + s,) else: - pos_indexer.append(k) + pos_indexer += (k,) indexer_elems = pos_indexer if indexing_support is IndexingSupport.OUTER_1VECTOR: @@ -1242,63 +1393,63 @@ def _decompose_outer_indexer( ] array_index = np.argmax(np.array(gains)) if len(gains) > 0 else None - for i, (k, s) in enumerate(zip(indexer_elems, shape, strict=False)): + for i, (k, s) in enumerate(zip(indexer_elems, shape, strict=True)): if isinstance(k, np.ndarray) and i != array_index: # np.ndarray key is converted to slice that covers the entire # entries of this key. - backend_indexer.append(slice(np.min(k), np.max(k) + 1)) - np_indexer.append(k - np.min(k)) + backend_indexer += (slice(np.min(k), np.max(k) + 1),) + np_indexer += (k - np.min(k),) elif isinstance(k, np.ndarray): # Remove duplicates and sort them in the increasing order pkey, ekey = np.unique(k, return_inverse=True) - backend_indexer.append(pkey) - np_indexer.append(ekey) + backend_indexer += (pkey,) + np_indexer += (ekey,) elif isinstance(k, integer_types): - backend_indexer.append(k) + backend_indexer += (k,) else: # slice: convert positive step slice for backend bk_slice, np_slice = _decompose_slice(k, s) - backend_indexer.append(bk_slice) - np_indexer.append(np_slice) + backend_indexer += (bk_slice,) + np_indexer += (np_slice,) - return (OuterIndexer(tuple(backend_indexer)), OuterIndexer(tuple(np_indexer))) + return OuterIndexer(backend_indexer), OuterIndexer(np_indexer) if indexing_support == IndexingSupport.OUTER: - for k, s in zip(indexer_elems, shape, strict=False): + for k, s in zip(indexer_elems, shape, strict=True): if isinstance(k, slice): # slice: convert positive step slice for backend bk_slice, np_slice = _decompose_slice(k, s) - backend_indexer.append(bk_slice) - np_indexer.append(np_slice) + backend_indexer += (bk_slice,) + np_indexer += (np_slice,) elif isinstance(k, integer_types): - backend_indexer.append(k) + backend_indexer += (k,) elif isinstance(k, np.ndarray) and (np.diff(k) >= 0).all(): - backend_indexer.append(k) - np_indexer.append(slice(None)) + backend_indexer += (k,) + np_indexer += (slice(None),) else: # Remove duplicates and sort them in the increasing order oind, vind = np.unique(k, return_inverse=True) - backend_indexer.append(oind) - np_indexer.append(vind.reshape(*k.shape)) + backend_indexer += (oind,) + np_indexer += (vind.reshape(*k.shape),) - return (OuterIndexer(tuple(backend_indexer)), OuterIndexer(tuple(np_indexer))) + return OuterIndexer(backend_indexer), OuterIndexer(np_indexer) # basic indexer assert indexing_support == IndexingSupport.BASIC - for k, s in zip(indexer_elems, shape, strict=False): + for k, s in zip(indexer_elems, shape, strict=True): if isinstance(k, np.ndarray): # np.ndarray key is converted to slice that covers the entire # entries of this key. - backend_indexer.append(slice(np.min(k), np.max(k) + 1)) - np_indexer.append(k - np.min(k)) + backend_indexer += (slice(np.min(k), np.max(k) + 1),) + np_indexer += (k - np.min(k),) elif isinstance(k, integer_types): - backend_indexer.append(k) + backend_indexer += (k,) else: # slice: convert positive step slice for backend bk_slice, np_slice = _decompose_slice(k, s) - backend_indexer.append(bk_slice) - np_indexer.append(np_slice) + backend_indexer += (bk_slice,) + np_indexer += (np_slice,) - return (BasicIndexer(tuple(backend_indexer)), OuterIndexer(tuple(np_indexer))) + return BasicIndexer(backend_indexer), OuterIndexer(np_indexer) def _arrayize_vectorized_indexer( @@ -1312,15 +1463,15 @@ def _arrayize_vectorized_indexer( arrays = [v for v in indexer.tuple if isinstance(v, np.ndarray)] n_dim = arrays[0].ndim if len(arrays) > 0 else 0 i_dim = 0 - new_key = [] + new_key: tuple[slice | np.ndarray[Any, np.dtype[np.integer]], ...] = () for v, size in zip(indexer.tuple, shape, strict=True): if isinstance(v, np.ndarray): - new_key.append(np.reshape(v, v.shape + (1,) * len(slices))) + new_key += (np.reshape(v, v.shape + (1,) * len(slices)),) else: # slice shape = (1,) * (n_dim + i_dim) + (-1,) + (1,) * (len(slices) - i_dim - 1) - new_key.append(np.arange(*v.indices(size)).reshape(shape)) + new_key += (np.arange(*v.indices(size)).reshape(shape),) i_dim += 1 - return VectorizedIndexer(tuple(new_key)) + return VectorizedIndexer(new_key) def _chunked_array_with_chunks_hint( @@ -1330,36 +1481,42 @@ def _chunked_array_with_chunks_hint( if len(chunks) < array.ndim: raise ValueError("not enough chunks in hint") - new_chunks = [] - for chunk, size in zip(chunks, array.shape, strict=False): - new_chunks.append(chunk if size > 1 else (1,)) - return chunkmanager.from_array(array, new_chunks) # type: ignore[arg-type] + + new_chunks: _Chunks = tuple( + chunk if size > 1 else 1 + for chunk, size in zip(chunks, array.shape, strict=False) + ) + + return chunkmanager.from_array(array, new_chunks) def _logical_any(args): return functools.reduce(operator.or_, args) -def _masked_result_drop_slice(key, data: duckarray[Any, Any] | None = None): +def _masked_result_drop_slice(key, data: duckarray[Any, Any] | None = None) -> Any: key = (k for k in key if not isinstance(k, slice)) chunks_hint = getattr(data, "chunks", None) - new_keys = [] + new_keys: tuple[Any, ...] = () for k in key: if isinstance(k, np.ndarray): if is_chunked_array(data): # type: ignore[arg-type] chunkmanager = get_chunked_array_type(data) - new_keys.append( - _chunked_array_with_chunks_hint(k, chunks_hint, chunkmanager) + # TODO: the chunks_hint is the chunks for the whole array, + # and has nothing to do with the axes indexed by `k` + # This is why we need to use `strict-False` :/ + new_keys += ( + _chunked_array_with_chunks_hint(k, chunks_hint, chunkmanager), ) elif isinstance(data, array_type("sparse")): import sparse - new_keys.append(sparse.COO.from_numpy(k)) + new_keys += (sparse.COO.from_numpy(k),) else: - new_keys.append(k) + new_keys += (k,) else: - new_keys.append(k) + new_keys += (k,) mask = _logical_any(k == -1 for k in new_keys) return mask @@ -1367,7 +1524,7 @@ def _masked_result_drop_slice(key, data: duckarray[Any, Any] | None = None): def create_mask( indexer: ExplicitIndexer, shape: _Shape, data: duckarray[Any, Any] | None = None -): +) -> duckarray[bool, Any]: """Create a mask for indexing with a fill-value. Parameters @@ -1495,29 +1652,31 @@ def __init__(self, array): ) self.array = array - def transpose(self, order): + def transpose(self, order) -> Any: return self.array.transpose(order) - def _oindex_get(self, indexer: OuterIndexer): - key = _outer_to_numpy_indexer(indexer, self.array.shape) + def _oindex_get(self, indexer: _OuterIndexerKey) -> np.ndarray: + key = _outer_to_numpy_indexer(OuterIndexer(indexer), self.array.shape) return self.array[key] - def _vindex_get(self, indexer: VectorizedIndexer): - _assert_not_chunked_indexer(indexer.tuple) + def _vindex_get(self, indexer: _VectorizedIndexerKey) -> np.ndarray: + _assert_not_chunked_indexer(indexer) array = NumpyVIndexAdapter(self.array) - return array[indexer.tuple] - - def __getitem__(self, indexer: ExplicitIndexer): - self._check_and_raise_if_non_basic_indexer(indexer) + return array[indexer] + def __getitem__(self, indexer: _BasicIndexerKey) -> np.ndarray: array = self.array # We want 0d slices rather than scalars. This is achieved by # appending an ellipsis (see # https://numpy.org/doc/stable/reference/arrays.indexing.html#detailed-notes). - key = indexer.tuple + (Ellipsis,) + key = ( + indexer.tuple + if isinstance(indexer, ExplicitIndexer) + else indexer + (Ellipsis,) + ) return array[key] - def _safe_setitem(self, array, key: tuple[Any, ...], value: Any) -> None: + def _safe_setitem(self, array, key: _BasicIndexerKey, value: Any) -> None: try: array[key] = value except ValueError as exc: @@ -1530,21 +1689,24 @@ def _safe_setitem(self, array, key: tuple[Any, ...], value: Any) -> None: else: raise exc - def _oindex_set(self, indexer: OuterIndexer, value: Any) -> None: - key = _outer_to_numpy_indexer(indexer, self.array.shape) + def _oindex_set(self, indexer: _OuterIndexerKey, value: Any) -> None: + key = _outer_to_numpy_indexer(OuterIndexer(indexer), self.array.shape) self._safe_setitem(self.array, key, value) - def _vindex_set(self, indexer: VectorizedIndexer, value: Any) -> None: + def _vindex_set(self, indexer: _VectorizedIndexerKey, value: Any) -> None: array = NumpyVIndexAdapter(self.array) - self._safe_setitem(array, indexer.tuple, value) + self._safe_setitem(array, indexer, value) - def __setitem__(self, indexer: ExplicitIndexer, value: Any) -> None: - self._check_and_raise_if_non_basic_indexer(indexer) + def __setitem__(self, indexer: _BasicIndexerKey, value: Any) -> None: array = self.array # We want 0d slices rather than scalars. This is achieved by # appending an ellipsis (see # https://numpy.org/doc/stable/reference/arrays.indexing.html#detailed-notes). - key = indexer.tuple + (Ellipsis,) + key = ( + indexer.tuple + if isinstance(indexer, ExplicitIndexer) + else indexer + (Ellipsis,) + ) self._safe_setitem(array, key, value) @@ -1573,30 +1735,28 @@ def __init__(self, array): ) self.array = array - def _oindex_get(self, indexer: OuterIndexer): + def _oindex_get(self, indexer: _OuterIndexerKey) -> Any: # manual orthogonal indexing (implemented like DaskIndexingAdapter) - key = indexer.tuple value = self.array - for axis, subkey in reversed(list(enumerate(key))): + subkey: Any + for axis, subkey in reversed(list(enumerate(indexer))): value = value[(slice(None),) * axis + (subkey, Ellipsis)] return value - def _vindex_get(self, indexer: VectorizedIndexer): + def _vindex_get(self, indexer: _VectorizedIndexerKey) -> Any: raise TypeError("Vectorized indexing is not supported") - def __getitem__(self, indexer: ExplicitIndexer): - self._check_and_raise_if_non_basic_indexer(indexer) - return self.array[indexer.tuple] + def __getitem__(self, indexer: _BasicIndexerKey) -> Any: + return self.array[indexer] - def _oindex_set(self, indexer: OuterIndexer, value: Any) -> None: - self.array[indexer.tuple] = value + def _oindex_set(self, indexer: _OuterIndexerKey, value: Any) -> None: + self.array[indexer] = value - def _vindex_set(self, indexer: VectorizedIndexer, value: Any) -> None: + def _vindex_set(self, indexer: _VectorizedIndexerKey, value: Any) -> None: raise TypeError("Vectorized indexing is not supported") - def __setitem__(self, indexer: ExplicitIndexer, value: Any) -> None: - self._check_and_raise_if_non_basic_indexer(indexer) - self.array[indexer.tuple] = value + def __setitem__(self, indexer: _BasicIndexerKey, value: Any) -> None: + self.array[indexer] = value def transpose(self, order): xp = self.array.__array_namespace__() @@ -1636,34 +1796,34 @@ def __init__(self, array): """ self.array = array - def _oindex_get(self, indexer: OuterIndexer): - key = indexer.tuple + def _oindex_get(self, indexer: _OuterIndexerKey) -> Any: try: - return self.array[key] + return self.array[indexer] except NotImplementedError: # manual orthogonal indexing value = self.array - for axis, subkey in reversed(list(enumerate(key))): + subkey: Any + for axis, subkey in reversed(list(enumerate(indexer))): value = value[(slice(None),) * axis + (subkey,)] return value - def _vindex_get(self, indexer: VectorizedIndexer): + def _vindex_get(self, indexer: _VectorizedIndexerKey) -> Any: try: - return self.array.vindex[indexer.tuple] + return self.array.vindex[indexer] except IndexError as e: # TODO: upstream to dask - has_dask = any(is_duck_dask_array(i) for i in indexer.tuple) + has_dask = any(is_duck_dask_array(i) for i in indexer) # this only works for "small" 1d coordinate arrays with one chunk # it is intended for idxmin, idxmax, and allows indexing with # the nD array output of argmin, argmax if ( not has_dask - or len(indexer.tuple) > 1 + or len(indexer) > 1 or math.prod(self.array.numblocks) > 1 or self.array.ndim > 1 ): raise e - (idxr,) = indexer.tuple + (idxr,) = indexer if idxr.ndim == 0: return self.array[idxr.data] else: @@ -1678,26 +1838,24 @@ def _vindex_get(self, indexer: VectorizedIndexer): dtype=self.array.dtype, ) - def __getitem__(self, indexer: ExplicitIndexer): - self._check_and_raise_if_non_basic_indexer(indexer) - return self.array[indexer.tuple] + def __getitem__(self, indexer: _BasicIndexerKey) -> Any: + return self.array[indexer] - def _oindex_set(self, indexer: OuterIndexer, value: Any) -> None: - num_non_slices = sum(0 if isinstance(k, slice) else 1 for k in indexer.tuple) + def _oindex_set(self, indexer: _OuterIndexerKey, value: Any) -> None: + num_non_slices = sum(0 if isinstance(k, slice) else 1 for k in indexer) if num_non_slices > 1: raise NotImplementedError( "xarray can't set arrays with multiple array indices to dask yet." ) - self.array[indexer.tuple] = value + self.array[indexer] = value - def _vindex_set(self, indexer: VectorizedIndexer, value: Any) -> None: - self.array.vindex[indexer.tuple] = value + def _vindex_set(self, indexer: _VectorizedIndexerKey, value: Any) -> None: + self.array.vindex[indexer] = value - def __setitem__(self, indexer: ExplicitIndexer, value: Any) -> None: - self._check_and_raise_if_non_basic_indexer(indexer) - self.array[indexer.tuple] = value + def __setitem__(self, indexer: _BasicIndexerKey, value: Any) -> None: + self.array[indexer] = value - def transpose(self, order): + def transpose(self, order) -> Any: return self.array.transpose(order) @@ -1746,7 +1904,7 @@ def get_duck_array(self) -> np.ndarray: def shape(self) -> _Shape: return (len(self.array),) - def _convert_scalar(self, item): + def _convert_scalar(self, item) -> Any: if item is pd.NaT: # work around the impossibility of casting NaT with asarray # note: it probably would be better in general to return @@ -1768,13 +1926,14 @@ def _convert_scalar(self, item): # a NumPy array. return to_0d_array(item) - def _prepare_key(self, key: Any | tuple[Any, ...]) -> tuple[Any, ...]: - if isinstance(key, tuple) and len(key) == 1: + def _prepare_key(self, key: ExplicitIndexer | _IndexerKey) -> _IndexerKey: + _key = key.tuple if isinstance(key, ExplicitIndexer) else key + if len(_key) == 1: # unpack key so it can index a pandas.Index object (pandas.Index # objects don't like tuples) - (key,) = key + (_key,) = _key - return key + return _key def _handle_result( self, result: Any @@ -1791,7 +1950,7 @@ def _handle_result( return self._convert_scalar(result) def _oindex_get( - self, indexer: OuterIndexer + self, indexer: _OuterIndexerKey ) -> ( PandasIndexingAdapter | NumpyIndexingAdapter @@ -1799,7 +1958,7 @@ def _oindex_get( | np.datetime64 | np.timedelta64 ): - key = self._prepare_key(indexer.tuple) + key = self._prepare_key(indexer) if getattr(key, "ndim", 0) > 1: # Return np-array if multidimensional indexable = NumpyIndexingAdapter(np.asarray(self)) @@ -1810,7 +1969,7 @@ def _oindex_get( return self._handle_result(result) def _vindex_get( - self, indexer: VectorizedIndexer + self, indexer: _VectorizedIndexerKey ) -> ( PandasIndexingAdapter | NumpyIndexingAdapter @@ -1818,8 +1977,8 @@ def _vindex_get( | np.datetime64 | np.timedelta64 ): - _assert_not_chunked_indexer(indexer.tuple) - key = self._prepare_key(indexer.tuple) + _assert_not_chunked_indexer(indexer) + key = self._prepare_key(indexer) if getattr(key, "ndim", 0) > 1: # Return np-array if multidimensional indexable = NumpyIndexingAdapter(np.asarray(self)) @@ -1830,7 +1989,7 @@ def _vindex_get( return self._handle_result(result) def __getitem__( - self, indexer: ExplicitIndexer + self, indexer: _BasicIndexerKey ) -> ( PandasIndexingAdapter | NumpyIndexingAdapter @@ -1838,7 +1997,7 @@ def __getitem__( | np.datetime64 | np.timedelta64 ): - key = self._prepare_key(indexer.tuple) + key = self._prepare_key(indexer) if getattr(key, "ndim", 0) > 1: # Return np-array if multidimensional indexable = NumpyIndexingAdapter(np.asarray(self)) @@ -1908,7 +2067,7 @@ def _convert_scalar(self, item): return super()._convert_scalar(item) def _oindex_get( - self, indexer: OuterIndexer + self, indexer: _OuterIndexerKey ) -> ( PandasIndexingAdapter | NumpyIndexingAdapter @@ -1922,7 +2081,7 @@ def _oindex_get( return result def _vindex_get( - self, indexer: VectorizedIndexer + self, indexer: _VectorizedIndexerKey ) -> ( PandasIndexingAdapter | NumpyIndexingAdapter @@ -1935,7 +2094,7 @@ def _vindex_get( result.level = self.level return result - def __getitem__(self, indexer: ExplicitIndexer): + def __getitem__(self, indexer: _BasicIndexerKey): result = super().__getitem__(indexer) if isinstance(result, type(self)): result.level = self.level @@ -1957,7 +2116,7 @@ def _get_array_subset(self) -> np.ndarray: if self.size > threshold: pos = threshold // 2 indices = np.concatenate([np.arange(0, pos), np.arange(-pos, 0)]) - subset = self[OuterIndexer((indices,))] + subset = self[(indices,)] else: subset = self diff --git a/xarray/namedarray/_typing.py b/xarray/namedarray/_typing.py index 95e7d7adfc3..f99360ff078 100644 --- a/xarray/namedarray/_typing.py +++ b/xarray/namedarray/_typing.py @@ -95,6 +95,12 @@ def dtype(self) -> _DType_co: ... _IndexKey = Union[int, slice, EllipsisType] _IndexKeys = tuple[_IndexKey, ...] # tuple[Union[_IndexKey, None], ...] _IndexKeyLike = Union[_IndexKey, _IndexKeys] +_IndexerKey = tuple[Any, ...] +_BasicIndexerKey = tuple[int | np.integer | slice, ...] +_OuterIndexerKey = tuple[ + int | np.integer | slice | np.ndarray[Any, np.dtype[np.integer]], ... +] +_VectorizedIndexerKey = tuple[slice | np.ndarray[Any, np.dtype[np.integer]], ...] _AttrsLike = Union[Mapping[Any, Any], None] diff --git a/xarray/tests/__init__.py b/xarray/tests/__init__.py index 333434b30ea..ed3c5cdb5c0 100644 --- a/xarray/tests/__init__.py +++ b/xarray/tests/__init__.py @@ -237,16 +237,6 @@ def __getitem__(self, key): return key -class IndexerMaker: - def __init__(self, indexer_cls): - self._indexer_cls = indexer_cls - - def __getitem__(self, key): - if not isinstance(key, tuple): - key = (key,) - return self._indexer_cls(key) - - def source_ndarray(array): """Given an ndarray, return the base object which holds its memory, or the object itself. diff --git a/xarray/tests/test_backends.py b/xarray/tests/test_backends.py index 420e30b8526..629d58e3ffd 100644 --- a/xarray/tests/test_backends.py +++ b/xarray/tests/test_backends.py @@ -39,6 +39,7 @@ open_mfdataset, save_mfdataset, ) +from xarray.backends.common import BackendArray as LegacyBackendArray from xarray.backends.common import robust_getitem from xarray.backends.h5netcdf_ import H5netcdfBackendEntrypoint from xarray.backends.netcdf3 import _nc3_dtype_coercions @@ -55,6 +56,7 @@ from xarray.coding.variables import SerializationWarning from xarray.conventions import encode_dataset_coordinates from xarray.core import indexing +from xarray.core.indexing import IndexingSupport from xarray.core.options import set_options from xarray.core.utils import module_available from xarray.namedarray.pycompat import array_type @@ -354,7 +356,147 @@ def test_dtype_coercion_error(self) -> None: ds.to_netcdf(path, format=format) -class DatasetIOBase: +class BackendIndexingTestsMixin: + def roundtrip(self, ds: Dataset, open_kwargs=None) -> Dataset: + raise NotImplementedError + + def test_orthogonal_indexing(self) -> None: + in_memory = create_test_data() + with self.roundtrip(in_memory) as on_disk: + indexers = {"dim1": [1, 2, 0], "dim2": [3, 2, 0, 3], "dim3": np.arange(5)} + expected = in_memory.isel(indexers) + actual = on_disk.isel(**indexers) + # make sure the array is not yet loaded into memory + assert not actual["var1"].variable._in_memory + assert_identical(expected, actual) + # do it twice, to make sure we're switched from orthogonal -> numpy + # when we cached the values + actual = on_disk.isel(**indexers) + assert_identical(expected, actual) + + def test_vectorized_indexing(self) -> None: + in_memory = create_test_data() + with self.roundtrip(in_memory) as on_disk: + indexers = { + "dim1": DataArray([0, 2, 0], dims="a"), + "dim2": DataArray([0, 2, 3], dims="a"), + } + expected = in_memory.isel(indexers) + actual = on_disk.isel(**indexers) + # make sure the array is not yet loaded into memory + assert not actual["var1"].variable._in_memory + assert_identical(expected, actual.load()) + # do it twice, to make sure we're switched from + # vectorized -> numpy when we cached the values + actual = on_disk.isel(**indexers) + assert_identical(expected, actual) + + def multiple_indexing(indexers): + # make sure a sequence of lazy indexings certainly works. + with self.roundtrip(in_memory) as on_disk: + actual = on_disk["var3"] + expected = in_memory["var3"] + for ind in indexers: + actual = actual.isel(ind) + expected = expected.isel(ind) + # make sure the array is not yet loaded into memory + assert not actual.variable._in_memory + assert_identical(expected, actual.load()) + + # two-staged vectorized-indexing + indexers2 = [ + { + "dim1": DataArray([[0, 7], [2, 6], [3, 5]], dims=["a", "b"]), + "dim3": DataArray([[0, 4], [1, 3], [2, 2]], dims=["a", "b"]), + }, + {"a": DataArray([0, 1], dims=["c"]), "b": DataArray([0, 1], dims=["c"])}, + ] + multiple_indexing(indexers2) + + # vectorized-slice mixed + indexers3 = [ + { + "dim1": DataArray([[0, 7], [2, 6], [3, 5]], dims=["a", "b"]), + "dim3": slice(None, 10), + } + ] + multiple_indexing(indexers3) + + # vectorized-integer mixed + indexers4 = [ + {"dim3": 0}, + {"dim1": DataArray([[0, 7], [2, 6], [3, 5]], dims=["a", "b"])}, + {"a": slice(None, None, 2)}, + ] + multiple_indexing(indexers4) + + # vectorized-integer mixed + indexers5 = [ + {"dim3": 0}, + {"dim1": DataArray([[0, 7], [2, 6], [3, 5]], dims=["a", "b"])}, + {"a": 1, "b": 0}, + ] + multiple_indexing(indexers5) + + def test_vectorized_indexing_negative_step(self) -> None: + # use dask explicitly when present + open_kwargs: dict[str, Any] | None + if has_dask: + open_kwargs = {"chunks": {}} + else: + open_kwargs = None + in_memory = create_test_data() + + def multiple_indexing(indexers): + # make sure a sequence of lazy indexings certainly works. + with self.roundtrip(in_memory, open_kwargs=open_kwargs) as on_disk: + actual = on_disk["var3"] + expected = in_memory["var3"] + for ind in indexers: + actual = actual.isel(ind) + expected = expected.isel(ind) + # make sure the array is not yet loaded into memory + assert not actual.variable._in_memory + assert_identical(expected, actual.load()) + + # with negative step slice. + indexers = [ + { + "dim1": DataArray([[0, 7], [2, 6], [3, 5]], dims=["a", "b"]), + "dim3": slice(-1, 1, -1), + } + ] + multiple_indexing(indexers) + + # with negative step slice. + indexers = [ + { + "dim1": DataArray([[0, 7], [2, 6], [3, 5]], dims=["a", "b"]), + "dim3": slice(-1, 1, -2), + } + ] + multiple_indexing(indexers) + + def test_outer_indexing_reversed(self) -> None: + # regression test for GH6560 + ds = xr.Dataset( + {"z": (("t", "p", "y", "x"), np.ones((1, 1, 31, 40)))}, + ) + + with self.roundtrip(ds) as on_disk: + subset = on_disk.isel(t=[0], p=0).z[:, ::10, ::10][:, ::-1, :] + assert subset.sizes == subset.load().sizes + + def test_isel_dataarray(self) -> None: + # Make sure isel works lazily. GH:issue:1688 + in_memory = create_test_data() + with self.roundtrip(in_memory) as on_disk: + expected = in_memory.isel(dim2=in_memory["dim2"] < 3) + actual = on_disk.isel(dim2=on_disk["dim2"] < 3) + assert_identical(expected, actual) + + +class DatasetIOBase(BackendIndexingTestsMixin): engine: T_NetcdfEngine | None = None file_format: T_NetcdfTypes | None = None @@ -706,141 +848,6 @@ def test_roundtrip_boolean_dtype(self) -> None: assert_identical(original, actual2) assert actual2["x"].dtype == "bool" - def test_orthogonal_indexing(self) -> None: - in_memory = create_test_data() - with self.roundtrip(in_memory) as on_disk: - indexers = {"dim1": [1, 2, 0], "dim2": [3, 2, 0, 3], "dim3": np.arange(5)} - expected = in_memory.isel(indexers) - actual = on_disk.isel(**indexers) - # make sure the array is not yet loaded into memory - assert not actual["var1"].variable._in_memory - assert_identical(expected, actual) - # do it twice, to make sure we're switched from orthogonal -> numpy - # when we cached the values - actual = on_disk.isel(**indexers) - assert_identical(expected, actual) - - def test_vectorized_indexing(self) -> None: - in_memory = create_test_data() - with self.roundtrip(in_memory) as on_disk: - indexers = { - "dim1": DataArray([0, 2, 0], dims="a"), - "dim2": DataArray([0, 2, 3], dims="a"), - } - expected = in_memory.isel(indexers) - actual = on_disk.isel(**indexers) - # make sure the array is not yet loaded into memory - assert not actual["var1"].variable._in_memory - assert_identical(expected, actual.load()) - # do it twice, to make sure we're switched from - # vectorized -> numpy when we cached the values - actual = on_disk.isel(**indexers) - assert_identical(expected, actual) - - def multiple_indexing(indexers): - # make sure a sequence of lazy indexings certainly works. - with self.roundtrip(in_memory) as on_disk: - actual = on_disk["var3"] - expected = in_memory["var3"] - for ind in indexers: - actual = actual.isel(ind) - expected = expected.isel(ind) - # make sure the array is not yet loaded into memory - assert not actual.variable._in_memory - assert_identical(expected, actual.load()) - - # two-staged vectorized-indexing - indexers2 = [ - { - "dim1": DataArray([[0, 7], [2, 6], [3, 5]], dims=["a", "b"]), - "dim3": DataArray([[0, 4], [1, 3], [2, 2]], dims=["a", "b"]), - }, - {"a": DataArray([0, 1], dims=["c"]), "b": DataArray([0, 1], dims=["c"])}, - ] - multiple_indexing(indexers2) - - # vectorized-slice mixed - indexers3 = [ - { - "dim1": DataArray([[0, 7], [2, 6], [3, 5]], dims=["a", "b"]), - "dim3": slice(None, 10), - } - ] - multiple_indexing(indexers3) - - # vectorized-integer mixed - indexers4 = [ - {"dim3": 0}, - {"dim1": DataArray([[0, 7], [2, 6], [3, 5]], dims=["a", "b"])}, - {"a": slice(None, None, 2)}, - ] - multiple_indexing(indexers4) - - # vectorized-integer mixed - indexers5 = [ - {"dim3": 0}, - {"dim1": DataArray([[0, 7], [2, 6], [3, 5]], dims=["a", "b"])}, - {"a": 1, "b": 0}, - ] - multiple_indexing(indexers5) - - def test_vectorized_indexing_negative_step(self) -> None: - # use dask explicitly when present - open_kwargs: dict[str, Any] | None - if has_dask: - open_kwargs = {"chunks": {}} - else: - open_kwargs = None - in_memory = create_test_data() - - def multiple_indexing(indexers): - # make sure a sequence of lazy indexings certainly works. - with self.roundtrip(in_memory, open_kwargs=open_kwargs) as on_disk: - actual = on_disk["var3"] - expected = in_memory["var3"] - for ind in indexers: - actual = actual.isel(ind) - expected = expected.isel(ind) - # make sure the array is not yet loaded into memory - assert not actual.variable._in_memory - assert_identical(expected, actual.load()) - - # with negative step slice. - indexers = [ - { - "dim1": DataArray([[0, 7], [2, 6], [3, 5]], dims=["a", "b"]), - "dim3": slice(-1, 1, -1), - } - ] - multiple_indexing(indexers) - - # with negative step slice. - indexers = [ - { - "dim1": DataArray([[0, 7], [2, 6], [3, 5]], dims=["a", "b"]), - "dim3": slice(-1, 1, -2), - } - ] - multiple_indexing(indexers) - - def test_outer_indexing_reversed(self) -> None: - # regression test for GH6560 - ds = xr.Dataset( - {"z": (("t", "p", "y", "x"), np.ones((1, 1, 31, 40)))}, - ) - - with self.roundtrip(ds) as on_disk: - subset = on_disk.isel(t=[0], p=0).z[:, ::10, ::10][:, ::-1, :] - assert subset.sizes == subset.load().sizes - - def test_isel_dataarray(self) -> None: - # Make sure isel works lazily. GH:issue:1688 - in_memory = create_test_data() - with self.roundtrip(in_memory) as on_disk: - expected = in_memory.isel(dim2=in_memory["dim2"] < 3) - actual = on_disk.isel(dim2=on_disk["dim2"] < 3) - assert_identical(expected, actual) - def validate_array_type(self, ds): # Make sure that only NumpyIndexingAdapter stores a bare np.ndarray. def find_and_validate_array(obj): @@ -6594,3 +6601,83 @@ def test_h5netcdf_storage_options() -> None: storage_options={"skip_instance_cache": False}, ) assert_identical(xr.concat([ds1, ds2], dim="time"), ds) + + +class LegacyBackendArrayWrapper(LegacyBackendArray): + def __init__(self, array: np.ndarray, indexing_support: IndexingSupport): + self.shape = array.shape + self.dtype = array.dtype + self.array = array + self.indexing_support = indexing_support + + def __getitem__(self, key: indexing.ExplicitIndexer): + return indexing.explicit_indexing_adapter( + key, self.shape, self.indexing_support, self._getitem + ) + + def _getitem(self, key: tuple[Any, ...]) -> np.ndarray: + return self.array[key] + + +def indexing_tests(*, indexing_support: IndexingSupport): + def wrapper(cls): + class NewClass(cls): + cls.indexing_support = indexing_support + + def roundtrip(self, ds: Dataset, *, open_kwargs=None) -> Dataset: + ds = ds.copy(deep=True) + for name in list(ds.data_vars) + list( + set(ds.coords) - set(ds.xindexes) + ): + var = ds._variables[name] + ds._variables[name] = var.copy( + # These tests assume that indexing is lazy (checks ._in_memory), + # so wrapping by LazilyIndexedArray is required. + data=indexing.LazilyIndexedArray( + LegacyBackendArrayWrapper(var.data, self.indexing_support) + ) + ) + return ds + + # def test_vectorized_indexing_negative_step(self) -> None: + # with pytest.warns(PendingDeprecationWarning): + # super().test_vectorized_indexing_negative_step() + + # def test_isel_dataarray(self) -> None: + # with pytest.warns(PendingDeprecationWarning): + # super().test_isel_dataarray() + + # def test_vectorized_indexing(self) -> None: + # with pytest.warns(PendingDeprecationWarning): + # super().test_vectorized_indexing() + + # def test_orthogonal_indexing(self) -> None: + # with pytest.warns(PendingDeprecationWarning): + # super().test_orthogonal_indexing() + + # def test_outer_indexing_reversed(self) -> None: + # with pytest.warns(PendingDeprecationWarning): + # super().test_outer_indexing_reversed() + + return NewClass + + return wrapper + + +@indexing_tests(indexing_support=IndexingSupport.BASIC) +class TestBasicIndexingLegacyBackend(BackendIndexingTestsMixin): + pass + + +@indexing_tests(indexing_support=IndexingSupport.OUTER_1VECTOR) +class TestOuter1VectorIndexingLegacyBackend(BackendIndexingTestsMixin): + pass + + +# @indexing_tests(indexing_support=IndexingSupport.OUTER) +# class TestOuterIndexingLegacyBackend(BackendIndexingTestsMixin): +# pass + +# @indexing_tests(indexing_support=IndexingSupport.VECTORIZED) +# class TestVectorizedIndexingLegacyBackend(BackendIndexingTestsMixin): +# pass diff --git a/xarray/tests/test_coding_strings.py b/xarray/tests/test_coding_strings.py index 17179a44a8a..af697b5c383 100644 --- a/xarray/tests/test_coding_strings.py +++ b/xarray/tests/test_coding_strings.py @@ -7,9 +7,7 @@ from xarray import Variable from xarray.coding import strings -from xarray.core import indexing from xarray.tests import ( - IndexerMaker, assert_array_equal, assert_identical, requires_dask, @@ -150,10 +148,9 @@ def test_StackedBytesArray() -> None: assert len(actual) == len(expected) assert_array_equal(expected, actual) - B = IndexerMaker(indexing.BasicIndexer) - assert_array_equal(expected[:1], actual[B[:1]]) + assert_array_equal(expected[:1], actual[(slice(1),)]) with pytest.raises(IndexError): - actual[B[:, :2]] + actual[slice(None), slice(2)] def test_StackedBytesArray_scalar() -> None: @@ -168,10 +165,8 @@ def test_StackedBytesArray_scalar() -> None: with pytest.raises(TypeError): len(actual) np.testing.assert_array_equal(expected, actual) - - B = IndexerMaker(indexing.BasicIndexer) with pytest.raises(IndexError): - actual[B[:2]] + actual[(slice(2),)] def test_StackedBytesArray_vectorized_indexing() -> None: @@ -179,9 +174,7 @@ def test_StackedBytesArray_vectorized_indexing() -> None: stacked = strings.StackedBytesArray(array) expected = np.array([[b"abc", b"def"], [b"def", b"abc"]]) - V = IndexerMaker(indexing.VectorizedIndexer) - indexer = V[np.array([[0, 1], [1, 0]])] - actual = stacked.vindex[indexer] + actual = stacked.vindex[(np.array([[0, 1], [1, 0]]),)] assert_array_equal(actual, expected) diff --git a/xarray/tests/test_dataset.py b/xarray/tests/test_dataset.py index f3867bd67d2..fb3b71d5fa2 100644 --- a/xarray/tests/test_dataset.py +++ b/xarray/tests/test_dataset.py @@ -248,7 +248,7 @@ def get_array(self): return self.array def __getitem__(self, key): - return self.array[key.tuple] + return self.array[(key if isinstance(key, tuple) else key.tuple)] class AccessibleAsDuckArrayDataStore(backends.InMemoryDataStore): @@ -5215,7 +5215,8 @@ def test_lazy_load(self) -> None: ds.isel(time=10) ds.isel(time=slice(10), dim1=[0]).isel(dim1=0, dim2=-1) - def test_lazy_load_duck_array(self) -> None: + @pytest.mark.parametrize("decode_cf", [True, False]) + def test_lazy_load_duck_array(self, decode_cf) -> None: store = AccessibleAsDuckArrayDataStore() create_test_data().dump_to_store(store) @@ -5230,13 +5231,11 @@ def test_lazy_load_duck_array(self) -> None: ds.isel(time=slice(10), dim1=[0]).isel(dim1=0, dim2=-1) repr(ds) - # preserve the duck array type and don't cast to array - assert isinstance(ds["var1"].load().data, DuckArrayWrapper) - assert isinstance( - ds["var1"].isel(dim2=0, dim1=0).load().data, DuckArrayWrapper - ) + # preserve the duck array type and don't cast to array + assert isinstance(ds["var1"].load().data, DuckArrayWrapper) + assert isinstance(ds["var1"].isel(dim2=0, dim1=0).load().data, DuckArrayWrapper) - ds.close() + ds.close() def test_dropna(self) -> None: x = np.random.randn(4, 4) diff --git a/xarray/tests/test_indexing.py b/xarray/tests/test_indexing.py index d9784e6a62e..ae620adaac8 100644 --- a/xarray/tests/test_indexing.py +++ b/xarray/tests/test_indexing.py @@ -12,7 +12,6 @@ from xarray.core.indexes import PandasIndex, PandasMultiIndex from xarray.core.types import T_Xarray from xarray.tests import ( - IndexerMaker, ReturnItem, assert_array_equal, assert_identical, @@ -20,8 +19,6 @@ requires_dask, ) -B = IndexerMaker(indexing.BasicIndexer) - class TestIndexCallable: def test_getitem(self): @@ -425,7 +422,7 @@ def test_lazily_indexed_array_vindex_setitem(self) -> None: NotImplementedError, match=r"Lazy item assignment with the vectorized indexer is not yet", ): - lazy.vindex[indexer] = 0 + lazy.vindex[indexer.tuple] = 0 @pytest.mark.parametrize( "indexer_class, key, value", @@ -441,10 +438,10 @@ def test_lazily_indexed_array_setitem(self, indexer_class, key, value) -> None: if indexer_class is indexing.BasicIndexer: indexer = indexer_class(key) - lazy[indexer] = value + lazy[indexer.tuple] = value elif indexer_class is indexing.OuterIndexer: indexer = indexer_class(key) - lazy.oindex[indexer] = value + lazy.oindex[indexer.tuple] = value assert_array_equal(original[key], value) @@ -453,16 +450,16 @@ class TestCopyOnWriteArray: def test_setitem(self) -> None: original = np.arange(10) wrapped = indexing.CopyOnWriteArray(original) - wrapped[B[:]] = 0 + wrapped[(slice(None),)] = 0 assert_array_equal(original, np.arange(10)) assert_array_equal(wrapped, np.zeros(10)) def test_sub_array(self) -> None: original = np.arange(10) wrapped = indexing.CopyOnWriteArray(original) - child = wrapped[B[:5]] + child = wrapped[(slice(5),)] assert isinstance(child, indexing.CopyOnWriteArray) - child[B[:]] = 0 + child[(slice(None),)] = 0 assert_array_equal(original, np.arange(10)) assert_array_equal(wrapped, np.arange(10)) assert_array_equal(child, np.zeros(5)) @@ -470,7 +467,7 @@ def test_sub_array(self) -> None: def test_index_scalar(self) -> None: # regression test for GH1374 x = indexing.CopyOnWriteArray(np.array(["foo", "bar"])) - assert np.array(x[B[0]][B[()]]) == "foo" + assert np.array(x[(0,)][()]) == "foo" class TestMemoryCachedArray: @@ -483,7 +480,7 @@ def test_wrapper(self) -> None: def test_sub_array(self) -> None: original = indexing.LazilyIndexedArray(np.arange(10)) wrapped = indexing.MemoryCachedArray(original) - child = wrapped[B[:5]] + child = wrapped[(slice(5),)] assert isinstance(child, indexing.MemoryCachedArray) assert_array_equal(child, np.arange(5)) assert isinstance(child.array, indexing.NumpyIndexingAdapter) @@ -492,13 +489,13 @@ def test_sub_array(self) -> None: def test_setitem(self) -> None: original = np.arange(10) wrapped = indexing.MemoryCachedArray(original) - wrapped[B[:]] = 0 + wrapped[(slice(None),)] = 0 assert_array_equal(original, np.zeros(10)) def test_index_scalar(self) -> None: # regression test for GH1374 x = indexing.MemoryCachedArray(np.array(["foo", "bar"])) - assert np.array(x[B[0]][B[()]]) == "foo" + assert np.array(x[(0,)][()]) == "foo" def test_base_explicit_indexer() -> None: @@ -607,7 +604,7 @@ def test_arrayize_vectorized_indexer(self) -> None: vindex, self.data.shape ) np.testing.assert_array_equal( - self.data.vindex[vindex], self.data.vindex[vindex_array] + self.data.vindex[vindex.tuple], self.data.vindex[vindex_array.tuple] ) actual = indexing._arrayize_vectorized_indexer( @@ -636,7 +633,8 @@ def test_arrayize_vectorized_indexer(self) -> None: np.testing.assert_array_equal(b, np.arange(5)[:, np.newaxis]) -def get_indexers(shape, mode): +def get_indexers(shape: tuple[int, ...], mode) -> indexing.ExplicitIndexer: + indexer: tuple[Any, ...] if mode == "vectorized": indexed_shape = (3, 4) indexer = tuple(np.random.randint(0, s, size=indexed_shape) for s in shape) @@ -665,7 +663,7 @@ def get_indexers(shape, mode): return indexing.BasicIndexer(tuple(indexer)) elif mode == "basic1": # basic indexer - return indexing.BasicIndexer((3,)) + return indexing.BasicIndexer((2,) * len(shape)) elif mode == "basic2": # basic indexer indexer = [0, 2, 4] @@ -723,35 +721,35 @@ def test_decompose_indexers(shape, indexer_mode, indexing_support) -> None: # Dispatch to appropriate indexing method if indexer_mode.startswith("vectorized"): - expected = indexing_adapter.vindex[indexer] + expected = indexing_adapter.vindex[indexer.tuple] elif indexer_mode.startswith("outer"): - expected = indexing_adapter.oindex[indexer] + expected = indexing_adapter.oindex[indexer.tuple] else: - expected = indexing_adapter[indexer] # Basic indexing + expected = indexing_adapter[indexer.tuple] # Basic indexing if isinstance(backend_ind, indexing.VectorizedIndexer): - array = indexing_adapter.vindex[backend_ind] + array = indexing_adapter.vindex[backend_ind.tuple] elif isinstance(backend_ind, indexing.OuterIndexer): - array = indexing_adapter.oindex[backend_ind] + array = indexing_adapter.oindex[backend_ind.tuple] else: - array = indexing_adapter[backend_ind] + array = indexing_adapter[backend_ind.tuple] if len(np_ind.tuple) > 0: array_indexing_adapter = indexing.NumpyIndexingAdapter(array) if isinstance(np_ind, indexing.VectorizedIndexer): - array = array_indexing_adapter.vindex[np_ind] + array = array_indexing_adapter.vindex[np_ind.tuple] elif isinstance(np_ind, indexing.OuterIndexer): - array = array_indexing_adapter.oindex[np_ind] + array = array_indexing_adapter.oindex[np_ind.tuple] else: - array = array_indexing_adapter[np_ind] + array = array_indexing_adapter[np_ind.tuple] np.testing.assert_array_equal(expected, array) if not all(isinstance(k, indexing.integer_types) for k in np_ind.tuple): combined_ind = indexing._combine_indexers(backend_ind, shape, np_ind) assert isinstance(combined_ind, indexing.VectorizedIndexer) - array = indexing_adapter.vindex[combined_ind] + array = indexing_adapter.vindex[combined_ind.tuple] np.testing.assert_array_equal(expected, array) @@ -824,14 +822,14 @@ def test_create_mask_outer_indexer() -> None: def test_create_mask_vectorized_indexer() -> None: indexer = indexing.VectorizedIndexer((np.array([0, -1, 2]), np.array([0, 1, -1]))) expected = np.array([False, True, True]) - actual = indexing.create_mask(indexer, (5,)) + actual = indexing.create_mask(indexer, (5, 5)) np.testing.assert_array_equal(expected, actual) indexer = indexing.VectorizedIndexer( (np.array([0, -1, 2]), slice(None), np.array([0, 1, -1])) ) expected = np.array([[False, True, True]] * 2).T - actual = indexing.create_mask(indexer, (5, 2)) + actual = indexing.create_mask(indexer, (5, 2, 5)) np.testing.assert_array_equal(expected, actual) @@ -845,13 +843,14 @@ def test_create_mask_basic_indexer() -> None: np.testing.assert_array_equal(False, actual) +@requires_dask def test_create_mask_dask() -> None: - da = pytest.importorskip("dask.array") + import dask.array as da indexer = indexing.OuterIndexer((1, slice(2), np.array([0, -1, 2]))) expected = np.array(2 * [[False, True, False]]) actual = indexing.create_mask( - indexer, (5, 5, 5), da.empty((2, 3), chunks=((1, 1), (2, 1))) + indexer, (5, 5, 5), da.empty((2, 3, 3), chunks=((1, 1), (2, 1), (3,))) ) assert actual.chunks == ((1, 1), (2, 1)) np.testing.assert_array_equal(expected, actual) @@ -861,7 +860,7 @@ def test_create_mask_dask() -> None: ) expected = np.array([[False, True, True]] * 2).T actual = indexing.create_mask( - indexer_vec, (5, 2), da.empty((3, 2), chunks=((3,), (2,))) + indexer_vec, (3, 2), da.empty((3, 2, 3), chunks=((3,), (2,), (3,))) ) assert isinstance(actual, da.Array) np.testing.assert_array_equal(expected, actual)