From 529da8e4364f7e01acfda52e4bec129f97f3ec0a Mon Sep 17 00:00:00 2001 From: Maximilian Roos Date: Fri, 14 Mar 2025 14:14:27 -0700 Subject: [PATCH 1/6] Fix type issues from pandas stubs for transparency, claude code also helped here, but I did more auditing & fixing than the idxmax PR (sorry to externalize that to reviewers...) --- xarray/core/coordinates.py | 4 ++-- xarray/core/groupby.py | 2 +- xarray/core/indexes.py | 9 ++++++--- xarray/tests/test_dataset.py | 4 ++-- 4 files changed, 11 insertions(+), 8 deletions(-) diff --git a/xarray/core/coordinates.py b/xarray/core/coordinates.py index 47773ddfbb6..b6c8e4a0dc5 100644 --- a/xarray/core/coordinates.py +++ b/xarray/core/coordinates.py @@ -177,13 +177,13 @@ def to_index(self, ordered_dims: Sequence[Hashable] | None = None) -> pd.Index: # compute the cartesian product code_list += [ - np.tile(np.repeat(code, repeat_counts[i]), tile_counts[i]) + np.tile(np.repeat(code, repeat_counts[i]), tile_counts[i]).tolist() for code in codes ] level_list += levels names += index.names - return pd.MultiIndex(level_list, code_list, names=names) + return pd.MultiIndex(levels=level_list, codes=code_list, names=names) class Coordinates(AbstractCoordinates): diff --git a/xarray/core/groupby.py b/xarray/core/groupby.py index 6a3ce156ce6..3bdc30eb1f0 100644 --- a/xarray/core/groupby.py +++ b/xarray/core/groupby.py @@ -527,7 +527,7 @@ def factorize(self) -> EncodedGroups: _flatcodes = where(mask.data, -1, _flatcodes) full_index = pd.MultiIndex.from_product( - (grouper.full_index.values for grouper in groupers), + list(grouper.full_index.values for grouper in groupers), names=tuple(grouper.name for grouper in groupers), ) # This will be unused when grouping by dask arrays, so skip.. diff --git a/xarray/core/indexes.py b/xarray/core/indexes.py index c2bc8b94f3f..0b4eee7b21c 100644 --- a/xarray/core/indexes.py +++ b/xarray/core/indexes.py @@ -1046,9 +1046,11 @@ def stack( *[lev.factorize() for lev in level_indexes], strict=True ) labels_mesh = np.meshgrid(*split_labels, indexing="ij") - labels = [x.ravel() for x in labels_mesh] + labels = [x.ravel().tolist() for x in labels_mesh] - index = pd.MultiIndex(levels, labels, sortorder=0, names=variables.keys()) + index = pd.MultiIndex( + levels=levels, codes=labels, sortorder=0, names=variables.keys() + ) level_coords_dtype = {k: var.dtype for k, var in variables.items()} return cls(index, dim, level_coords_dtype=level_coords_dtype) @@ -1120,7 +1122,8 @@ def from_variables_maybe_expand( levels.append(cat.categories) level_variables[name] = var - index = pd.MultiIndex(levels, codes, names=names) + codes_as_lists = [list(x) for x in codes] + index = pd.MultiIndex(levels=levels, codes=codes_as_lists, names=names) level_coords_dtype = {k: var.dtype for k, var in level_variables.items()} obj = cls(index, dim, level_coords_dtype=level_coords_dtype) index_vars = obj.create_variables(level_variables) diff --git a/xarray/tests/test_dataset.py b/xarray/tests/test_dataset.py index bdae9daf758..06be7e3d128 100644 --- a/xarray/tests/test_dataset.py +++ b/xarray/tests/test_dataset.py @@ -2923,7 +2923,7 @@ def test_drop_indexes(self) -> None: assert_identical(actual, ds) # test index corrupted - midx = pd.MultiIndex.from_tuples([([1, 2]), ([3, 4])], names=["a", "b"]) + midx = pd.MultiIndex.from_tuples([([1, 2]), ([3, 4])], names=["a", "b"]) # type: ignore[list-item] midx_coords = Coordinates.from_pandas_multiindex(midx, "x") ds = Dataset(coords=midx_coords) @@ -3219,7 +3219,7 @@ def test_rename_dimension_coord_warnings(self) -> None: ds.rename(x="x") def test_rename_multiindex(self) -> None: - midx = pd.MultiIndex.from_tuples([([1, 2]), ([3, 4])], names=["a", "b"]) + midx = pd.MultiIndex.from_tuples([(1, 2), (3, 4)], names=["a", "b"]) midx_coords = Coordinates.from_pandas_multiindex(midx, "x") original = Dataset({}, midx_coords) From 16e76526a13e3187b58c8eba4036d53323771266 Mon Sep 17 00:00:00 2001 From: Maximilian Roos Date: Fri, 14 Mar 2025 15:51:13 -0700 Subject: [PATCH 2/6] --- ci/requirements/environment.yml | 1 + pyproject.toml | 1 + 2 files changed, 2 insertions(+) diff --git a/ci/requirements/environment.yml b/ci/requirements/environment.yml index 5d31b2feb89..b4354b14f40 100644 --- a/ci/requirements/environment.yml +++ b/ci/requirements/environment.yml @@ -56,6 +56,7 @@ dependencies: - types-pytz - types-PyYAML - types-setuptools + - types-openpyxl - typing_extensions - zarr - pip: diff --git a/pyproject.toml b/pyproject.toml index 6b45aef89e4..85cb8f1bc0c 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -55,6 +55,7 @@ types = [ "types-pexpect", "types-psutil", "types-pycurl", + "types-openpyxl", "types-python-dateutil", "types-pytz", "types-setuptools", From 29e273cdd44e20002512ec74d46ea6352e215c9d Mon Sep 17 00:00:00 2001 From: Maximilian Roos Date: Fri, 14 Mar 2025 16:00:42 -0700 Subject: [PATCH 3/6] . --- xarray/tests/test_dataset.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/xarray/tests/test_dataset.py b/xarray/tests/test_dataset.py index 06be7e3d128..e584b69eefb 100644 --- a/xarray/tests/test_dataset.py +++ b/xarray/tests/test_dataset.py @@ -2923,7 +2923,7 @@ def test_drop_indexes(self) -> None: assert_identical(actual, ds) # test index corrupted - midx = pd.MultiIndex.from_tuples([([1, 2]), ([3, 4])], names=["a", "b"]) # type: ignore[list-item] + midx = pd.MultiIndex.from_tuples([[1, 2], [3, 4]], names=["a", "b"]) # type: ignore[list-item] midx_coords = Coordinates.from_pandas_multiindex(midx, "x") ds = Dataset(coords=midx_coords) From 78d2516e0d868277849abaebded50139df53228e Mon Sep 17 00:00:00 2001 From: Maximilian Roos Date: Fri, 14 Mar 2025 16:01:07 -0700 Subject: [PATCH 4/6] Don't skip tests when on a `mypy` branch For some reason, my local was skipping tests when on a branch named `mypy`, or at least something was setting a `mypy` keyword on the test items. This changes the check check whether the test has the `mypy` marker set, which is more precise than just looking at the keywords. --- conftest.py | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/conftest.py b/conftest.py index 532a7badd91..200696431ea 100644 --- a/conftest.py +++ b/conftest.py @@ -22,7 +22,9 @@ def pytest_runtest_setup(item): pytest.skip( "set --run-network-tests to run test requiring an internet connection" ) - if "mypy" in item.keywords and not item.config.getoption("--run-mypy"): + if any("mypy" in m.name for m in item.own_markers) and not item.config.getoption( + "--run-mypy" + ): pytest.skip("set --run-mypy option to run mypy tests") From 137306498f73e657163653b3bc0918aac5097c54 Mon Sep 17 00:00:00 2001 From: Maximilian Roos Date: Fri, 14 Mar 2025 16:05:35 -0700 Subject: [PATCH 5/6] --- xarray/tests/test_dataset.py | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/xarray/tests/test_dataset.py b/xarray/tests/test_dataset.py index e584b69eefb..490ba116f74 100644 --- a/xarray/tests/test_dataset.py +++ b/xarray/tests/test_dataset.py @@ -2923,7 +2923,10 @@ def test_drop_indexes(self) -> None: assert_identical(actual, ds) # test index corrupted - midx = pd.MultiIndex.from_tuples([[1, 2], [3, 4]], names=["a", "b"]) # type: ignore[list-item] + + # FYI this requires a `type: ignore[list-item]` in one environment, but not CI + # (remove this comment if it doesn't flare up again) + midx = pd.MultiIndex.from_tuples([[1, 2], [3, 4]], names=["a", "b"]) midx_coords = Coordinates.from_pandas_multiindex(midx, "x") ds = Dataset(coords=midx_coords) From 8bad1bd4254c9ed3e5ee83041a32cfe6fd6a7352 Mon Sep 17 00:00:00 2001 From: Maximilian Roos Date: Fri, 14 Mar 2025 16:06:38 -0700 Subject: [PATCH 6/6] --- xarray/tests/test_dataset.py | 5 +---- 1 file changed, 1 insertion(+), 4 deletions(-) diff --git a/xarray/tests/test_dataset.py b/xarray/tests/test_dataset.py index 490ba116f74..b273b7d1a0d 100644 --- a/xarray/tests/test_dataset.py +++ b/xarray/tests/test_dataset.py @@ -2923,10 +2923,7 @@ def test_drop_indexes(self) -> None: assert_identical(actual, ds) # test index corrupted - - # FYI this requires a `type: ignore[list-item]` in one environment, but not CI - # (remove this comment if it doesn't flare up again) - midx = pd.MultiIndex.from_tuples([[1, 2], [3, 4]], names=["a", "b"]) + midx = pd.MultiIndex.from_tuples([(1, 2), (3, 4)], names=["a", "b"]) midx_coords = Coordinates.from_pandas_multiindex(midx, "x") ds = Dataset(coords=midx_coords)