Skip to content

Commit ae4ce35

Browse files
committed
Closes #4647 DataArray transpose inconsistent with Dataset Ellipsis usage
- Add missing_dims parameter to transpose to mimic isel behavior - Add missing_dims to infix_dims to make function consistent across different methods.
1 parent 9fefefb commit ae4ce35

File tree

8 files changed

+91
-28
lines changed

8 files changed

+91
-28
lines changed

doc/internals.rst

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -230,4 +230,4 @@ re-open it directly with Zarr:
230230
231231
zgroup = zarr.open("rasm.zarr")
232232
print(zgroup.tree())
233-
dict(zgroup["Tair"].attrs)
233+
dict(zgroup["Tair"].attrs)

doc/plotting.rst

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -955,4 +955,4 @@ One can also make line plots with multidimensional coordinates. In this case, ``
955955
f, ax = plt.subplots(2, 1)
956956
da.plot.line(x="lon", hue="y", ax=ax[0])
957957
@savefig plotting_example_2d_hue_xy.png
958-
da.plot.line(x="lon", hue="x", ax=ax[1])
958+
da.plot.line(x="lon", hue="x", ax=ax[1])

doc/whats-new.rst

Lines changed: 3 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -55,6 +55,7 @@ Bug fixes
5555
- Fix a crash in orthogonal indexing on geographic coordinates with ``engine='cfgrib'`` (:issue:`4733` :pull:`4737`).
5656
By `Alessandro Amici <https://github.com/alexamici>`_
5757
- Limit number of data rows when printing large datasets. (:issue:`4736`, :pull:`4750`). By `Jimmy Westling <https://github.com/illviljan>`_.
58+
- Add ``missing_dims`` parameter to transpose (:issue:`4647`, :pull:`4767`). By `Daniel Mesejo <https://github.com/mesejo>`_.
5859

5960
Documentation
6061
~~~~~~~~~~~~~
@@ -76,8 +77,8 @@ Internal Changes
7677
- Run the tests in parallel using pytest-xdist (:pull:`4694`).
7778

7879
By `Justus Magin <https://github.com/keewis>`_ and `Mathias Hauser <https://github.com/mathause>`_.
79-
80-
- Replace all usages of ``assert x.identical(y)`` with ``assert_identical(x, y)``
80+
81+
- Replace all usages of ``assert x.identical(y)`` with ``assert_identical(x, y)``
8182
for clearer error messages.
8283
(:pull:`4752`);
8384
By `Maximilian Roos <https://github.com/max-sixty>`_.

xarray/core/dataarray.py

Lines changed: 13 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -2113,7 +2113,12 @@ def to_unstacked_dataset(self, dim, level=0):
21132113
# unstacked dataset
21142114
return Dataset(data_dict)
21152115

2116-
def transpose(self, *dims: Hashable, transpose_coords: bool = True) -> "DataArray":
2116+
def transpose(
2117+
self,
2118+
*dims: Hashable,
2119+
transpose_coords: bool = True,
2120+
missing_dims: str = "raise",
2121+
) -> "DataArray":
21172122
"""Return a new DataArray object with transposed dimensions.
21182123
21192124
Parameters
@@ -2123,6 +2128,12 @@ def transpose(self, *dims: Hashable, transpose_coords: bool = True) -> "DataArra
21232128
dimensions to this order.
21242129
transpose_coords : bool, default: True
21252130
If True, also transpose the coordinates of this DataArray.
2131+
missing_dims : {"raise", "warn", "ignore"}, default: "raise"
2132+
What to do if dimensions that should be selected from are not present in the
2133+
DataArray:
2134+
- "raise": raise an exception
2135+
- "warning": raise a warning, and ignore the missing dimensions
2136+
- "ignore": ignore the missing dimensions
21262137
21272138
Returns
21282139
-------
@@ -2141,7 +2152,7 @@ def transpose(self, *dims: Hashable, transpose_coords: bool = True) -> "DataArra
21412152
Dataset.transpose
21422153
"""
21432154
if dims:
2144-
dims = tuple(utils.infix_dims(dims, self.dims))
2155+
dims = tuple(utils.infix_dims(dims, self.dims, missing_dims))
21452156
variable = self.variable.transpose(*dims)
21462157
if transpose_coords:
21472158
coords: Dict[Hashable, Variable] = {}

xarray/core/utils.py

Lines changed: 55 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -715,28 +715,32 @@ def __len__(self) -> int:
715715
return len(self._data) - num_hidden
716716

717717

718-
def infix_dims(dims_supplied: Collection, dims_all: Collection) -> Iterator:
718+
def infix_dims(
719+
dims_supplied: Collection, dims_all: Collection, missing_dims: str = "raise"
720+
) -> Iterator:
719721
"""
720-
Resolves a supplied list containing an ellispsis representing other items, to
722+
Resolves a supplied list containing an ellipsis representing other items, to
721723
a generator with the 'realized' list of all items
722724
"""
723725
if ... in dims_supplied:
724726
if len(set(dims_all)) != len(dims_all):
725727
raise ValueError("Cannot use ellipsis with repeated dims")
726-
if len([d for d in dims_supplied if d == ...]) > 1:
728+
if list(dims_supplied).count(...) > 1:
727729
raise ValueError("More than one ellipsis supplied")
728730
other_dims = [d for d in dims_all if d not in dims_supplied]
729-
for d in dims_supplied:
730-
if d == ...:
731+
existing_dims = drop_missing_dims(dims_supplied, dims_all, missing_dims)
732+
for d in existing_dims:
733+
if d is ...:
731734
yield from other_dims
732735
else:
733736
yield d
734737
else:
735-
if set(dims_supplied) ^ set(dims_all):
738+
existing_dims = drop_missing_dims(dims_supplied, dims_all, missing_dims)
739+
if set(existing_dims) ^ set(dims_all):
736740
raise ValueError(
737741
f"{dims_supplied} must be a permuted list of {dims_all}, unless `...` is included"
738742
)
739-
yield from dims_supplied
743+
yield from existing_dims
740744

741745

742746
def get_temp_dimname(dims: Container[Hashable], new_dim: Hashable) -> Hashable:
@@ -776,7 +780,7 @@ def drop_dims_from_indexers(
776780
invalid = indexers.keys() - set(dims)
777781
if invalid:
778782
raise ValueError(
779-
f"dimensions {invalid} do not exist. Expected one or more of {dims}"
783+
f"Dimensions {invalid} do not exist. Expected one or more of {dims}"
780784
)
781785

782786
return indexers
@@ -789,7 +793,7 @@ def drop_dims_from_indexers(
789793
invalid = indexers.keys() - set(dims)
790794
if invalid:
791795
warnings.warn(
792-
f"dimensions {invalid} do not exist. Expected one or more of {dims}"
796+
f"Dimensions {invalid} do not exist. Expected one or more of {dims}"
793797
)
794798
for key in invalid:
795799
indexers.pop(key)
@@ -805,6 +809,48 @@ def drop_dims_from_indexers(
805809
)
806810

807811

812+
def drop_missing_dims(
813+
supplied_dims: Collection, dims: Collection, missing_dims: str
814+
) -> Collection:
815+
"""Depending on the setting of missing_dims, drop any dimensions from supplied_dims that
816+
are not present in dims.
817+
818+
Parameters
819+
----------
820+
supplied_dims : dict
821+
dims : sequence
822+
missing_dims : {"raise", "warn", "ignore"}
823+
"""
824+
825+
if missing_dims == "raise":
826+
supplied_dims_set = set(val for val in supplied_dims if val is not ...)
827+
invalid = supplied_dims_set - set(dims)
828+
if invalid:
829+
raise ValueError(
830+
f"Dimensions {invalid} do not exist. Expected one or more of {dims}"
831+
)
832+
833+
return supplied_dims
834+
835+
elif missing_dims == "warn":
836+
837+
invalid = set(supplied_dims) - set(dims)
838+
if invalid:
839+
warnings.warn(
840+
f"Dimensions {invalid} do not exist. Expected one or more of {dims}"
841+
)
842+
843+
return [val for val in supplied_dims if val in dims or val is ...]
844+
845+
elif missing_dims == "ignore":
846+
return [val for val in supplied_dims if val in dims or val is ...]
847+
848+
else:
849+
raise ValueError(
850+
f"Unrecognised option {missing_dims} for missing_dims argument"
851+
)
852+
853+
808854
class UncachedAccessor:
809855
"""Acts like a property, but on both classes and class instances
810856

xarray/tests/test_dataarray.py

Lines changed: 14 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -797,13 +797,13 @@ def test_isel(self):
797797
assert_identical(self.dv[:3, :5], self.dv.isel(x=slice(3), y=slice(5)))
798798
with raises_regex(
799799
ValueError,
800-
r"dimensions {'not_a_dim'} do not exist. Expected "
800+
r"Dimensions {'not_a_dim'} do not exist. Expected "
801801
r"one or more of \('x', 'y'\)",
802802
):
803803
self.dv.isel(not_a_dim=0)
804804
with pytest.warns(
805805
UserWarning,
806-
match=r"dimensions {'not_a_dim'} do not exist. "
806+
match=r"Dimensions {'not_a_dim'} do not exist. "
807807
r"Expected one or more of \('x', 'y'\)",
808808
):
809809
self.dv.isel(not_a_dim=0, missing_dims="warn")
@@ -2231,9 +2231,21 @@ def test_transpose(self):
22312231
actual = da.transpose("z", ..., "x", transpose_coords=True)
22322232
assert_equal(expected, actual)
22332233

2234+
# same as previous but with a missing dimension
2235+
actual = da.transpose(
2236+
"z", "y", "x", "not_a_dim", transpose_coords=True, missing_dims="ignore"
2237+
)
2238+
assert_equal(expected, actual)
2239+
22342240
with pytest.raises(ValueError):
22352241
da.transpose("x", "y")
22362242

2243+
with pytest.raises(ValueError):
2244+
da.transpose("not_a_dim", "z", "x", ...)
2245+
2246+
with pytest.warns(UserWarning):
2247+
da.transpose("not_a_dim", "y", "x", ..., missing_dims="warn")
2248+
22372249
def test_squeeze(self):
22382250
assert_equal(self.dv.variable.squeeze(), self.dv.squeeze().variable)
22392251

@@ -6227,7 +6239,6 @@ def da_dask(seed=123):
62276239

62286240
@pytest.mark.parametrize("da", ("repeating_ints",), indirect=True)
62296241
def test_isin(da):
6230-
62316242
expected = DataArray(
62326243
np.asarray([[0, 0, 0], [1, 0, 0]]),
62336244
dims=list("yx"),
@@ -6277,7 +6288,6 @@ def test_coarsen_keep_attrs():
62776288

62786289
@pytest.mark.parametrize("da", (1, 2), indirect=True)
62796290
def test_rolling_iter(da):
6280-
62816291
rolling_obj = da.rolling(time=7)
62826292
rolling_obj_mean = rolling_obj.mean()
62836293

@@ -6452,7 +6462,6 @@ def test_rolling_construct(center, window):
64526462
@pytest.mark.parametrize("window", (1, 2, 3, 4))
64536463
@pytest.mark.parametrize("name", ("sum", "mean", "std", "max"))
64546464
def test_rolling_reduce(da, center, min_periods, window, name):
6455-
64566465
if min_periods is not None and window < min_periods:
64576466
min_periods = window
64586467

@@ -6491,7 +6500,6 @@ def test_rolling_reduce_nonnumeric(center, min_periods, window, name):
64916500

64926501

64936502
def test_rolling_count_correct():
6494-
64956503
da = DataArray([0, np.nan, 1, 2, np.nan, 3, 4, 5, np.nan, 6, 7], dims="time")
64966504

64976505
kwargs = [
@@ -6579,7 +6587,6 @@ def test_ndrolling_construct(center, fill_value):
65796587
],
65806588
)
65816589
def test_rolling_keep_attrs(funcname, argument):
6582-
65836590
attrs_da = {"da_attr": "test"}
65846591

65856592
data = np.linspace(10, 15, 100)
@@ -6623,7 +6630,6 @@ def test_rolling_keep_attrs(funcname, argument):
66236630

66246631

66256632
def test_rolling_keep_attrs_deprecated():
6626-
66276633
attrs_da = {"da_attr": "test"}
66286634

66296635
data = np.linspace(10, 15, 100)
@@ -6957,7 +6963,6 @@ def test_rolling_exp(da, dim, window_type, window):
69576963

69586964
@requires_numbagg
69596965
def test_rolling_exp_keep_attrs(da):
6960-
69616966
attrs = {"attrs": "da"}
69626967
da.attrs = attrs
69636968

xarray/tests/test_dataset.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1024,14 +1024,14 @@ def test_isel(self):
10241024
data.isel(not_a_dim=slice(0, 2))
10251025
with raises_regex(
10261026
ValueError,
1027-
r"dimensions {'not_a_dim'} do not exist. Expected "
1027+
r"Dimensions {'not_a_dim'} do not exist. Expected "
10281028
r"one or more of "
10291029
r"[\w\W]*'time'[\w\W]*'dim\d'[\w\W]*'dim\d'[\w\W]*'dim\d'[\w\W]*",
10301030
):
10311031
data.isel(not_a_dim=slice(0, 2))
10321032
with pytest.warns(
10331033
UserWarning,
1034-
match=r"dimensions {'not_a_dim'} do not exist. "
1034+
match=r"Dimensions {'not_a_dim'} do not exist. "
10351035
r"Expected one or more of "
10361036
r"[\w\W]*'time'[\w\W]*'dim\d'[\w\W]*'dim\d'[\w\W]*'dim\d'[\w\W]*",
10371037
):

xarray/tests/test_variable.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1270,13 +1270,13 @@ def test_isel(self):
12701270
assert_identical(v.isel(time=[]), v[[]])
12711271
with raises_regex(
12721272
ValueError,
1273-
r"dimensions {'not_a_dim'} do not exist. Expected one or more of "
1273+
r"Dimensions {'not_a_dim'} do not exist. Expected one or more of "
12741274
r"\('time', 'x'\)",
12751275
):
12761276
v.isel(not_a_dim=0)
12771277
with pytest.warns(
12781278
UserWarning,
1279-
match=r"dimensions {'not_a_dim'} do not exist. Expected one or more of "
1279+
match=r"Dimensions {'not_a_dim'} do not exist. Expected one or more of "
12801280
r"\('time', 'x'\)",
12811281
):
12821282
v.isel(not_a_dim=0, missing_dims="warn")

0 commit comments

Comments
 (0)