Skip to content

Commit aaf3b7e

Browse files
TomNicholasbenbovy
andauthored
Opt out of auto creating index variables (#8711)
* as_variable: deprecate converting to IndexVariable * fix multi-index edge case * Better default behavior of the Coordinates constructor (#8107) * ``Coordinates.__init__`` create default indexes ... for any input dimension coordinate, if ``indexes=None``. Also, if another ``Coordinates`` object is passed, extract its indexes and raise if ``indexes`` is not None (no align/merge supported here). * add docstring examples * fix doctests * fix tests * update what's new * fix deprecation warning after unintentionally reverted a valid previous change. * avoid unnecessary auto-creation of index to avoid userwarning * catch expected FutureWarnings in test_as_variable * check for coercion to IndexVariable * whatsnew --------- Co-authored-by: Benoit Bovy <[email protected]>
1 parent 2f34895 commit aaf3b7e

File tree

7 files changed

+52
-18
lines changed

7 files changed

+52
-18
lines changed

doc/whats-new.rst

Lines changed: 3 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -28,12 +28,13 @@ New Features
2828
By `Etienne Schalk <https://github.com/etienneschalk>`_ and `Deepak Cherian <https://github.com/dcherian>`_.
2929
- Add the ``.oindex`` property to Explicitly Indexed Arrays for orthogonal indexing functionality. (:issue:`8238`, :pull:`8750`)
3030
By `Anderson Banihirwe <https://github.com/andersy005>`_.
31-
3231
- Add the ``.vindex`` property to Explicitly Indexed Arrays for vectorized indexing functionality. (:issue:`8238`, :pull:`8780`)
3332
By `Anderson Banihirwe <https://github.com/andersy005>`_.
34-
3533
- Expand use of ``.oindex`` and ``.vindex`` properties. (:pull: `8790`)
3634
By `Anderson Banihirwe <https://github.com/andersy005>`_ and `Deepak Cherian <https://github.com/dcherian>`_.
35+
- Allow creating :py:class:`xr.Coordinates` objects with no indexes (:pull:`8711`)
36+
By `Benoit Bovy <https://github.com/benbovy>`_ and `Tom Nicholas
37+
<https://github.com/TomNicholas>`_.
3738

3839
Breaking changes
3940
~~~~~~~~~~~~~~~~

xarray/core/coordinates.py

Lines changed: 5 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -298,7 +298,7 @@ def __init__(
298298
else:
299299
variables = {}
300300
for name, data in coords.items():
301-
var = as_variable(data, name=name)
301+
var = as_variable(data, name=name, auto_convert=False)
302302
if var.dims == (name,) and indexes is None:
303303
index, index_vars = create_default_index_implicit(var, list(coords))
304304
default_indexes.update({k: index for k in index_vars})
@@ -998,9 +998,12 @@ def create_coords_with_default_indexes(
998998
if isinstance(obj, DataArray):
999999
dataarray_coords.append(obj.coords)
10001000

1001-
variable = as_variable(obj, name=name)
1001+
variable = as_variable(obj, name=name, auto_convert=False)
10021002

10031003
if variable.dims == (name,):
1004+
# still needed to convert to IndexVariable first due to some
1005+
# pandas multi-index edge cases.
1006+
variable = variable.to_index_variable()
10041007
idx, idx_vars = create_default_index_implicit(variable, all_variables)
10051008
indexes.update({k: idx for k in idx_vars})
10061009
variables.update(idx_vars)

xarray/core/dataarray.py

Lines changed: 15 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -159,7 +159,9 @@ def _infer_coords_and_dims(
159159
dims = list(coords.keys())
160160
else:
161161
for n, (dim, coord) in enumerate(zip(dims, coords)):
162-
coord = as_variable(coord, name=dims[n]).to_index_variable()
162+
coord = as_variable(
163+
coord, name=dims[n], auto_convert=False
164+
).to_index_variable()
163165
dims[n] = coord.name
164166
dims_tuple = tuple(dims)
165167
if len(dims_tuple) != len(shape):
@@ -179,10 +181,12 @@ def _infer_coords_and_dims(
179181
new_coords = {}
180182
if utils.is_dict_like(coords):
181183
for k, v in coords.items():
182-
new_coords[k] = as_variable(v, name=k)
184+
new_coords[k] = as_variable(v, name=k, auto_convert=False)
185+
if new_coords[k].dims == (k,):
186+
new_coords[k] = new_coords[k].to_index_variable()
183187
elif coords is not None:
184188
for dim, coord in zip(dims_tuple, coords):
185-
var = as_variable(coord, name=dim)
189+
var = as_variable(coord, name=dim, auto_convert=False)
186190
var.dims = (dim,)
187191
new_coords[dim] = var.to_index_variable()
188192

@@ -204,11 +208,17 @@ def _check_data_shape(
204208
return data
205209
else:
206210
data_shape = tuple(
207-
as_variable(coords[k], k).size if k in coords.keys() else 1
211+
(
212+
as_variable(coords[k], k, auto_convert=False).size
213+
if k in coords.keys()
214+
else 1
215+
)
208216
for k in dims
209217
)
210218
else:
211-
data_shape = tuple(as_variable(coord, "foo").size for coord in coords)
219+
data_shape = tuple(
220+
as_variable(coord, "foo", auto_convert=False).size for coord in coords
221+
)
212222
data = np.full(data_shape, data)
213223
return data
214224

xarray/core/merge.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -355,7 +355,7 @@ def append_all(variables, indexes):
355355
indexes_.pop(name, None)
356356
append_all(coords_, indexes_)
357357

358-
variable = as_variable(variable, name=name)
358+
variable = as_variable(variable, name=name, auto_convert=False)
359359
if name in indexes:
360360
append(name, variable, indexes[name])
361361
elif variable.dims == (name,):

xarray/core/variable.py

Lines changed: 19 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -33,6 +33,7 @@
3333
decode_numpy_dict_values,
3434
drop_dims_from_indexers,
3535
either_dict_or_kwargs,
36+
emit_user_level_warning,
3637
ensure_us_time_resolution,
3738
infix_dims,
3839
is_dict_like,
@@ -80,7 +81,9 @@ class MissingDimensionsError(ValueError):
8081
# TODO: move this to an xarray.exceptions module?
8182

8283

83-
def as_variable(obj: T_DuckArray | Any, name=None) -> Variable | IndexVariable:
84+
def as_variable(
85+
obj: T_DuckArray | Any, name=None, auto_convert: bool = True
86+
) -> Variable | IndexVariable:
8487
"""Convert an object into a Variable.
8588
8689
Parameters
@@ -100,6 +103,9 @@ def as_variable(obj: T_DuckArray | Any, name=None) -> Variable | IndexVariable:
100103
along a dimension of this given name.
101104
- Variables with name matching one of their dimensions are converted
102105
into `IndexVariable` objects.
106+
auto_convert : bool, optional
107+
For internal use only! If True, convert a "dimension" variable into
108+
an IndexVariable object (deprecated).
103109
104110
Returns
105111
-------
@@ -150,9 +156,15 @@ def as_variable(obj: T_DuckArray | Any, name=None) -> Variable | IndexVariable:
150156
f"explicit list of dimensions: {obj!r}"
151157
)
152158

153-
if name is not None and name in obj.dims and obj.ndim == 1:
154-
# automatically convert the Variable into an Index
155-
obj = obj.to_index_variable()
159+
if auto_convert:
160+
if name is not None and name in obj.dims and obj.ndim == 1:
161+
# automatically convert the Variable into an Index
162+
emit_user_level_warning(
163+
f"variable {name!r} with name matching its dimension will not be "
164+
"automatically converted into an `IndexVariable` object in the future.",
165+
FutureWarning,
166+
)
167+
obj = obj.to_index_variable()
156168

157169
return obj
158170

@@ -706,8 +718,10 @@ def _broadcast_indexes_vectorized(self, key):
706718
variable = (
707719
value
708720
if isinstance(value, Variable)
709-
else as_variable(value, name=dim)
721+
else as_variable(value, name=dim, auto_convert=False)
710722
)
723+
if variable.dims == (dim,):
724+
variable = variable.to_index_variable()
711725
if variable.dtype.kind == "b": # boolean indexing case
712726
(variable,) = variable._nonzero()
713727

xarray/tests/test_coordinates.py

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -8,6 +8,7 @@
88
from xarray.core.dataarray import DataArray
99
from xarray.core.dataset import Dataset
1010
from xarray.core.indexes import PandasIndex, PandasMultiIndex
11+
from xarray.core.variable import IndexVariable
1112
from xarray.tests import assert_identical, source_ndarray
1213

1314

@@ -23,10 +24,12 @@ def test_init_default_index(self) -> None:
2324
assert_identical(coords.to_dataset(), expected)
2425
assert "x" in coords.xindexes
2526

27+
@pytest.mark.filterwarnings("error:IndexVariable")
2628
def test_init_no_default_index(self) -> None:
2729
# dimension coordinate with no default index (explicit)
2830
coords = Coordinates(coords={"x": [1, 2]}, indexes={})
2931
assert "x" not in coords.xindexes
32+
assert not isinstance(coords["x"], IndexVariable)
3033

3134
def test_init_from_coords(self) -> None:
3235
expected = Dataset(coords={"foo": ("x", [0, 1, 2])})

xarray/tests/test_variable.py

Lines changed: 6 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -1216,7 +1216,8 @@ def test_as_variable(self):
12161216
with pytest.raises(TypeError, match=r"without an explicit list of dimensions"):
12171217
as_variable(data)
12181218

1219-
actual = as_variable(data, name="x")
1219+
with pytest.warns(FutureWarning, match="IndexVariable"):
1220+
actual = as_variable(data, name="x")
12201221
assert_identical(expected.to_index_variable(), actual)
12211222

12221223
actual = as_variable(0)
@@ -1234,9 +1235,11 @@ def test_as_variable(self):
12341235

12351236
# test datetime, timedelta conversion
12361237
dt = np.array([datetime(1999, 1, 1) + timedelta(days=x) for x in range(10)])
1237-
assert as_variable(dt, "time").dtype.kind == "M"
1238+
with pytest.warns(FutureWarning, match="IndexVariable"):
1239+
assert as_variable(dt, "time").dtype.kind == "M"
12381240
td = np.array([timedelta(days=x) for x in range(10)])
1239-
assert as_variable(td, "time").dtype.kind == "m"
1241+
with pytest.warns(FutureWarning, match="IndexVariable"):
1242+
assert as_variable(td, "time").dtype.kind == "m"
12401243

12411244
with pytest.raises(TypeError):
12421245
as_variable(("x", DataArray([])))

0 commit comments

Comments
 (0)