From 87dfb530b63edb2db4ee692bc0d5bd69f3dc0a9f Mon Sep 17 00:00:00 2001 From: Michael Niklas Date: Sun, 12 Jun 2022 22:45:23 +0200 Subject: [PATCH 1/3] support for where with drop=True and mixed dims --- xarray/core/common.py | 23 ++++++++++++++--------- xarray/tests/test_dataset.py | 19 +++++++++++++++++++ 2 files changed, 33 insertions(+), 9 deletions(-) diff --git a/xarray/core/common.py b/xarray/core/common.py index 16a7c07ddb4..3c328f42e98 100644 --- a/xarray/core/common.py +++ b/xarray/core/common.py @@ -1362,18 +1362,23 @@ def where( f"cond argument is {cond!r} but must be a {Dataset!r} or {DataArray!r}" ) - # align so we can use integer indexing self, cond = align(self, cond) # type: ignore[assignment] - # get cond with the minimal size needed for the Dataset - if isinstance(cond, Dataset): - clipcond = cond.to_array().any("variable") - else: - clipcond = cond + def _dataarray_indexer(dim: Hashable) -> DataArray: + return cond.any(dim=(d for d in cond.dims if d != dim)) + + def _dataset_indexer(dim: Hashable) -> DataArray: + cond_wdim = cond.drop(var for var in cond if dim not in cond[var].dims) + keepany = cond_wdim.any(dim=(d for d in cond.dims.keys() if d != dim)) + return keepany.to_array().any("variable") + + _get_indexer = ( + _dataarray_indexer if isinstance(cond, DataArray) else _dataset_indexer + ) - # clip the data corresponding to coordinate dims that are not used - nonzeros = zip(clipcond.dims, np.nonzero(clipcond.values)) - indexers = {k: np.unique(v) for k, v in nonzeros} + indexers = {} + for dim in cond.sizes.keys(): + indexers[dim] = _get_indexer(dim) self = self.isel(**indexers) cond = cond.isel(**indexers) diff --git a/xarray/tests/test_dataset.py b/xarray/tests/test_dataset.py index 66b01a9b338..c1d1ec0393e 100644 --- a/xarray/tests/test_dataset.py +++ b/xarray/tests/test_dataset.py @@ -4715,6 +4715,25 @@ def test_where_drop(self) -> None: actual8 = ds.where(ds > 0, drop=True) assert_identical(expected8, actual8) + # mixed dimensions (add issue + PR num) + ds = xr.Dataset( + { + "a": ("x", [1, 2, 3]), + "b": ("y", [2, 3, 4]), + "c": (("x", "y"), np.arange(9).reshape((3, 3))), + } + ) + expected9 = xr.Dataset( + { + "a": ("x", [np.nan, 3]), + "b": ("y", [np.nan, 3, 4]), + "c": (("x", "y"), np.arange(3.0, 9.0).reshape((2, 3))), + } + ) + actual9 = ds.where(ds > 2, drop=True) + assert actual9.sizes["x"] == 2 + assert_identical(expected9, actual9) + def test_where_drop_empty(self) -> None: # regression test for GH1341 array = DataArray(np.random.rand(100, 10), dims=["nCells", "nVertLevels"]) From 3484ebd12ad321b6f0cb745d2a7e7d81e23f77ea Mon Sep 17 00:00:00 2001 From: Michael Niklas Date: Sun, 12 Jun 2022 22:48:23 +0200 Subject: [PATCH 2/3] add PR and issue nbr to test --- xarray/tests/test_dataset.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/xarray/tests/test_dataset.py b/xarray/tests/test_dataset.py index c1d1ec0393e..a2c1ae1fc12 100644 --- a/xarray/tests/test_dataset.py +++ b/xarray/tests/test_dataset.py @@ -4715,7 +4715,7 @@ def test_where_drop(self) -> None: actual8 = ds.where(ds > 0, drop=True) assert_identical(expected8, actual8) - # mixed dimensions (add issue + PR num) + # mixed dimensions: PR#6690, Issue#6227 ds = xr.Dataset( { "a": ("x", [1, 2, 3]), From 955361489b522fdc60379c8c431ecfaf22817085 Mon Sep 17 00:00:00 2001 From: Michael Niklas Date: Sun, 12 Jun 2022 22:51:25 +0200 Subject: [PATCH 3/3] add changes to whats-new --- doc/whats-new.rst | 3 +++ 1 file changed, 3 insertions(+) diff --git a/doc/whats-new.rst b/doc/whats-new.rst index 833f475d38c..3c53d3bfb04 100644 --- a/doc/whats-new.rst +++ b/doc/whats-new.rst @@ -34,6 +34,9 @@ Bug fixes allowing the ``encoding`` and ``unlimited_dims`` options with ``save_mfdataset``. (:issue:`6684`) By `Travis A. O'Brien `_. +- :py:meth:`Dataset.where` with ``drop=True`` now behaves correctly with mixed dimensions. + (:issue:`6227`, :pull:`6690`) + By `Michael Niklas `_. Documentation ~~~~~~~~~~~~~