From cb7a378f691a2b4ae3cfc26740ee42dd071365f7 Mon Sep 17 00:00:00 2001 From: Maximilian Roos Date: Wed, 27 Apr 2022 19:01:21 -0700 Subject: [PATCH 1/7] Attempt to improve CI caching Currently about 40% of the time is taken by installing things, hopefully we can cut that down --- .github/workflows/ci-additional.yaml | 7 +++---- .github/workflows/ci.yaml | 12 ++++++++++++ 2 files changed, 15 insertions(+), 4 deletions(-) diff --git a/.github/workflows/ci-additional.yaml b/.github/workflows/ci-additional.yaml index c43dd1ad46a..8d4e63e583c 100644 --- a/.github/workflows/ci-additional.yaml +++ b/.github/workflows/ci-additional.yaml @@ -42,8 +42,7 @@ jobs: fail-fast: false matrix: os: ["ubuntu-latest"] - env: - [ + env: [ # Minimum python version: "py38-bare-minimum", "py38-min-all-deps", @@ -67,12 +66,12 @@ jobs: else echo "CONDA_ENV_FILE=ci/requirements/${{ matrix.env }}.yml" >> $GITHUB_ENV fi + - name: Cache conda uses: actions/cache@v3 with: path: ~/conda_pkgs_dir - key: - ${{ runner.os }}-conda-${{ matrix.env }}-${{ + key: ${{ runner.os }}-conda-${{ matrix.env }}-${{ hashFiles('ci/requirements/**.yml') }} - uses: conda-incubator/setup-miniconda@v2 diff --git a/.github/workflows/ci.yaml b/.github/workflows/ci.yaml index 0f8073c017b..d8285be32a4 100644 --- a/.github/workflows/ci.yaml +++ b/.github/workflows/ci.yaml @@ -58,12 +58,15 @@ jobs: fi echo "PYTHON_VERSION=${{ matrix.python-version }}" >> $GITHUB_ENV + # This and the next few are based on https://github.com/conda-incubator/setup-miniconda#caching-environments - name: Cache conda + id: cache uses: actions/cache@v3 with: path: ~/conda_pkgs_dir key: ${{ runner.os }}-conda-py${{ matrix.python-version }}-${{ hashFiles('ci/requirements/**.yml') }} + - uses: conda-incubator/setup-miniconda@v2 with: channels: conda-forge @@ -74,9 +77,18 @@ jobs: python-version: ${{ matrix.python-version }} use-only-tar-bz2: true + - name: Cache conda env + id: cache + uses: actions/cache@v3 + with: + path: ${{ env.CONDA }}/envs + key: ${{ runner.os }}-conda-py${{ matrix.python-version }}-${{ + hashFiles('ci/requirements/**.yml') }} + - name: Install conda dependencies run: | mamba env update -f $CONDA_ENV_FILE + if: steps.cache.outputs.cache-hit != 'true' - name: Install xarray run: | From 38b402788526de24ffd0d31e907d6893105b21bd Mon Sep 17 00:00:00 2001 From: Maximilian Roos Date: Wed, 27 Apr 2022 19:43:36 -0700 Subject: [PATCH 2/7] --- .github/workflows/ci.yaml | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/.github/workflows/ci.yaml b/.github/workflows/ci.yaml index d8285be32a4..76bfc34fd50 100644 --- a/.github/workflows/ci.yaml +++ b/.github/workflows/ci.yaml @@ -60,7 +60,7 @@ jobs: # This and the next few are based on https://github.com/conda-incubator/setup-miniconda#caching-environments - name: Cache conda - id: cache + id: cache-conda uses: actions/cache@v3 with: path: ~/conda_pkgs_dir @@ -78,7 +78,7 @@ jobs: use-only-tar-bz2: true - name: Cache conda env - id: cache + id: cache-env uses: actions/cache@v3 with: path: ${{ env.CONDA }}/envs @@ -88,7 +88,7 @@ jobs: - name: Install conda dependencies run: | mamba env update -f $CONDA_ENV_FILE - if: steps.cache.outputs.cache-hit != 'true' + if: steps.cache-env.outputs.cache-hit != 'true' - name: Install xarray run: | From 33017fd4aeb6995c489e6af37c5d85b8d831bc7a Mon Sep 17 00:00:00 2001 From: Maximilian Roos <5635139+max-sixty@users.noreply.github.com> Date: Wed, 27 Apr 2022 20:14:53 -0700 Subject: [PATCH 3/7] Update .github/workflows/ci.yaml Co-authored-by: Deepak Cherian --- .github/workflows/ci.yaml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/ci.yaml b/.github/workflows/ci.yaml index 76bfc34fd50..2732145de08 100644 --- a/.github/workflows/ci.yaml +++ b/.github/workflows/ci.yaml @@ -81,7 +81,7 @@ jobs: id: cache-env uses: actions/cache@v3 with: - path: ${{ env.CONDA }}/envs + path: ${{ env.CONDA_PREFIX }} key: ${{ runner.os }}-conda-py${{ matrix.python-version }}-${{ hashFiles('ci/requirements/**.yml') }} From 73a99327733906ddbde33bd6547a1ba517bc9e9f Mon Sep 17 00:00:00 2001 From: Deepak Cherian Date: Wed, 27 Apr 2022 21:24:16 -0600 Subject: [PATCH 4/7] Update .github/workflows/ci.yaml --- .github/workflows/ci.yaml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/ci.yaml b/.github/workflows/ci.yaml index 2732145de08..38decc9ed31 100644 --- a/.github/workflows/ci.yaml +++ b/.github/workflows/ci.yaml @@ -81,7 +81,7 @@ jobs: id: cache-env uses: actions/cache@v3 with: - path: ${{ env.CONDA_PREFIX }} + path: /usr/share/miniconda/envs/xarray-tests key: ${{ runner.os }}-conda-py${{ matrix.python-version }}-${{ hashFiles('ci/requirements/**.yml') }} From 1fb905025df49922aef99baffdab4bf0e61143d5 Mon Sep 17 00:00:00 2001 From: Maximilian Roos <5635139+max-sixty@users.noreply.github.com> Date: Wed, 27 Apr 2022 19:17:53 -0700 Subject: [PATCH 5/7] Fix some mypy issues (#6531) * Fix some mypy issues Unfortunately these have crept back in. I'll add a workflow job, since the pre-commit is not covering everything on its own * [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci * Add mypy workflow * [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci * [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci Co-authored-by: pre-commit-ci[bot] <66853113+pre-commit-ci[bot]@users.noreply.github.com> --- .github/workflows/ci-additional.yaml | 35 +++++++++++++ xarray/core/dataarray.py | 5 +- xarray/core/dataset.py | 6 ++- xarray/core/indexing.py | 76 ++++++++++++---------------- xarray/core/utils.py | 3 +- xarray/tests/test_coding_times.py | 10 ++-- xarray/tests/test_formatting.py | 6 +-- 7 files changed, 84 insertions(+), 57 deletions(-) diff --git a/.github/workflows/ci-additional.yaml b/.github/workflows/ci-additional.yaml index 8d4e63e583c..df978b7ef85 100644 --- a/.github/workflows/ci-additional.yaml +++ b/.github/workflows/ci-additional.yaml @@ -151,6 +151,41 @@ jobs: run: | python -m pytest --doctest-modules xarray --ignore xarray/tests + mypy: + name: Mypy + runs-on: "ubuntu-latest" + if: needs.detect-ci-trigger.outputs.triggered == 'false' + defaults: + run: + shell: bash -l {0} + + steps: + - uses: actions/checkout@v3 + with: + fetch-depth: 0 # Fetch all history for all branches and tags. + - uses: conda-incubator/setup-miniconda@v2 + with: + channels: conda-forge + channel-priority: strict + mamba-version: "*" + activate-environment: xarray-tests + auto-update-conda: false + python-version: "3.9" + + - name: Install conda dependencies + run: | + mamba env update -f ci/requirements/environment.yml + - name: Install xarray + run: | + python -m pip install --no-deps -e . + - name: Version info + run: | + conda info -a + conda list + python xarray/util/print_versions.py + - name: Run mypy + run: mypy + min-version-policy: name: Minimum Version Policy runs-on: "ubuntu-latest" diff --git a/xarray/core/dataarray.py b/xarray/core/dataarray.py index 2cf78fa7c61..d15cbd00c0d 100644 --- a/xarray/core/dataarray.py +++ b/xarray/core/dataarray.py @@ -1154,7 +1154,8 @@ def chunk( chunks = {} if isinstance(chunks, (float, str, int)): - chunks = dict.fromkeys(self.dims, chunks) + # ignoring type; unclear why it won't accept a Literal into the value. + chunks = dict.fromkeys(self.dims, chunks) # type: ignore elif isinstance(chunks, (tuple, list)): chunks = dict(zip(self.dims, chunks)) else: @@ -4735,7 +4736,7 @@ def curvefit( def drop_duplicates( self, - dim: Hashable | Iterable[Hashable] | ..., + dim: Hashable | Iterable[Hashable], keep: Literal["first", "last"] | Literal[False] = "first", ): """Returns a new DataArray with duplicate dimension values removed. diff --git a/xarray/core/dataset.py b/xarray/core/dataset.py index 2c67cd665ca..76776b4bc44 100644 --- a/xarray/core/dataset.py +++ b/xarray/core/dataset.py @@ -7981,7 +7981,7 @@ def _wrapper(Y, *coords_, **kwargs): def drop_duplicates( self, - dim: Hashable | Iterable[Hashable] | ..., + dim: Hashable | Iterable[Hashable], keep: Literal["first", "last"] | Literal[False] = "first", ): """Returns a new Dataset with duplicate dimension values removed. @@ -8005,9 +8005,11 @@ def drop_duplicates( DataArray.drop_duplicates """ if isinstance(dim, str): - dims = (dim,) + dims: Iterable = (dim,) elif dim is ...: dims = self.dims + elif not isinstance(dim, Iterable): + dims = [dim] else: dims = dim diff --git a/xarray/core/indexing.py b/xarray/core/indexing.py index 27bd4954bc4..cbbd507eeff 100644 --- a/xarray/core/indexing.py +++ b/xarray/core/indexing.py @@ -1,3 +1,5 @@ +from __future__ import annotations + import enum import functools import operator @@ -6,19 +8,7 @@ from dataclasses import dataclass, field from datetime import timedelta from html import escape -from typing import ( - TYPE_CHECKING, - Any, - Callable, - Dict, - Hashable, - Iterable, - List, - Mapping, - Optional, - Tuple, - Union, -) +from typing import TYPE_CHECKING, Any, Callable, Hashable, Iterable, Mapping import numpy as np import pandas as pd @@ -59,12 +49,12 @@ class IndexSelResult: """ - dim_indexers: Dict[Any, Any] - indexes: Dict[Any, "Index"] = field(default_factory=dict) - variables: Dict[Any, "Variable"] = field(default_factory=dict) - drop_coords: List[Hashable] = field(default_factory=list) - drop_indexes: List[Hashable] = field(default_factory=list) - rename_dims: Dict[Any, Hashable] = field(default_factory=dict) + dim_indexers: dict[Any, Any] + indexes: dict[Any, Index] = field(default_factory=dict) + variables: dict[Any, Variable] = field(default_factory=dict) + drop_coords: list[Hashable] = field(default_factory=list) + drop_indexes: list[Hashable] = field(default_factory=list) + rename_dims: dict[Any, Hashable] = field(default_factory=dict) def as_tuple(self): """Unlike ``dataclasses.astuple``, return a shallow copy. @@ -82,7 +72,7 @@ def as_tuple(self): ) -def merge_sel_results(results: List[IndexSelResult]) -> IndexSelResult: +def merge_sel_results(results: list[IndexSelResult]) -> IndexSelResult: all_dims_count = Counter([dim for res in results for dim in res.dim_indexers]) duplicate_dims = {k: v for k, v in all_dims_count.items() if v > 1} @@ -124,13 +114,13 @@ def group_indexers_by_index( obj: T_Xarray, indexers: Mapping[Any, Any], options: Mapping[str, Any], -) -> List[Tuple["Index", Dict[Any, Any]]]: +) -> list[tuple[Index, dict[Any, Any]]]: """Returns a list of unique indexes and their corresponding indexers.""" unique_indexes = {} - grouped_indexers: Mapping[Union[int, None], Dict] = defaultdict(dict) + grouped_indexers: Mapping[int | None, dict] = defaultdict(dict) for key, label in indexers.items(): - index: "Index" = obj.xindexes.get(key, None) + index: Index = obj.xindexes.get(key, None) if index is not None: index_id = id(index) @@ -787,7 +777,7 @@ class IndexingSupport(enum.Enum): def explicit_indexing_adapter( key: ExplicitIndexer, - shape: Tuple[int, ...], + shape: tuple[int, ...], indexing_support: IndexingSupport, raw_indexing_method: Callable, ) -> Any: @@ -821,8 +811,8 @@ def explicit_indexing_adapter( def decompose_indexer( - indexer: ExplicitIndexer, shape: Tuple[int, ...], indexing_support: IndexingSupport -) -> Tuple[ExplicitIndexer, ExplicitIndexer]: + indexer: ExplicitIndexer, shape: tuple[int, ...], indexing_support: IndexingSupport +) -> tuple[ExplicitIndexer, ExplicitIndexer]: if isinstance(indexer, VectorizedIndexer): return _decompose_vectorized_indexer(indexer, shape, indexing_support) if isinstance(indexer, (BasicIndexer, OuterIndexer)): @@ -848,9 +838,9 @@ def _decompose_slice(key, size): def _decompose_vectorized_indexer( indexer: VectorizedIndexer, - shape: Tuple[int, ...], + shape: tuple[int, ...], indexing_support: IndexingSupport, -) -> Tuple[ExplicitIndexer, ExplicitIndexer]: +) -> tuple[ExplicitIndexer, ExplicitIndexer]: """ Decompose vectorized indexer to the successive two indexers, where the first indexer will be used to index backend arrays, while the second one @@ -929,10 +919,10 @@ def _decompose_vectorized_indexer( def _decompose_outer_indexer( - indexer: Union[BasicIndexer, OuterIndexer], - shape: Tuple[int, ...], + indexer: BasicIndexer | OuterIndexer, + shape: tuple[int, ...], indexing_support: IndexingSupport, -) -> Tuple[ExplicitIndexer, ExplicitIndexer]: +) -> tuple[ExplicitIndexer, ExplicitIndexer]: """ Decompose outer indexer to the successive two indexers, where the first indexer will be used to index backend arrays, while the second one @@ -973,7 +963,7 @@ def _decompose_outer_indexer( return indexer, BasicIndexer(()) assert isinstance(indexer, (OuterIndexer, BasicIndexer)) - backend_indexer: List[Any] = [] + backend_indexer: list[Any] = [] np_indexer = [] # make indexer positive pos_indexer: list[np.ndarray | int | np.number] = [] @@ -1395,7 +1385,7 @@ def __array__(self, dtype: DTypeLike = None) -> np.ndarray: return np.asarray(array.values, dtype=dtype) @property - def shape(self) -> Tuple[int]: + def shape(self) -> tuple[int]: return (len(self.array),) def _convert_scalar(self, item): @@ -1420,13 +1410,13 @@ def _convert_scalar(self, item): def __getitem__( self, indexer - ) -> Union[ - "PandasIndexingAdapter", - NumpyIndexingAdapter, - np.ndarray, - np.datetime64, - np.timedelta64, - ]: + ) -> ( + PandasIndexingAdapter + | NumpyIndexingAdapter + | np.ndarray + | np.datetime64 + | np.timedelta64 + ): key = indexer.tuple if isinstance(key, tuple) and len(key) == 1: # unpack key so it can index a pandas.Index object (pandas.Index @@ -1449,7 +1439,7 @@ def transpose(self, order) -> pd.Index: def __repr__(self) -> str: return f"{type(self).__name__}(array={self.array!r}, dtype={self.dtype!r})" - def copy(self, deep: bool = True) -> "PandasIndexingAdapter": + def copy(self, deep: bool = True) -> PandasIndexingAdapter: # Not the same as just writing `self.array.copy(deep=deep)`, as # shallow copies of the underlying numpy.ndarrays become deep ones # upon pickling @@ -1476,7 +1466,7 @@ def __init__( self, array: pd.MultiIndex, dtype: DTypeLike = None, - level: Optional[str] = None, + level: str | None = None, ): super().__init__(array, dtype) self.level = level @@ -1535,7 +1525,7 @@ def _repr_html_(self) -> str: array_repr = short_numpy_repr(self._get_array_subset()) return f"
{escape(array_repr)}
" - def copy(self, deep: bool = True) -> "PandasMultiIndexingAdapter": + def copy(self, deep: bool = True) -> PandasMultiIndexingAdapter: # see PandasIndexingAdapter.copy array = self.array.copy(deep=True) if deep else self.array return type(self)(array, self._dtype, self.level) diff --git a/xarray/core/utils.py b/xarray/core/utils.py index efdbe10d5ef..bab6476e734 100644 --- a/xarray/core/utils.py +++ b/xarray/core/utils.py @@ -237,7 +237,8 @@ def remove_incompatible_items( del first_dict[k] -def is_dict_like(value: Any) -> bool: +# It's probably OK to give this as a TypeGuard; though it's not perfectly robust. +def is_dict_like(value: Any) -> TypeGuard[dict]: return hasattr(value, "keys") and hasattr(value, "__getitem__") diff --git a/xarray/tests/test_coding_times.py b/xarray/tests/test_coding_times.py index 92d27f22eb8..f10523aecbb 100644 --- a/xarray/tests/test_coding_times.py +++ b/xarray/tests/test_coding_times.py @@ -1007,12 +1007,9 @@ def test_decode_ambiguous_time_warns(calendar) -> None: units = "days since 1-1-1" expected = num2date(dates, units, calendar=calendar, only_use_cftime_datetimes=True) - exp_warn_type = SerializationWarning if is_standard_calendar else None - - with pytest.warns(exp_warn_type) as record: - result = decode_cf_datetime(dates, units, calendar=calendar) - if is_standard_calendar: + with pytest.warns(SerializationWarning) as record: + result = decode_cf_datetime(dates, units, calendar=calendar) relevant_warnings = [ r for r in record.list @@ -1020,7 +1017,8 @@ def test_decode_ambiguous_time_warns(calendar) -> None: ] assert len(relevant_warnings) == 1 else: - assert not record + with assert_no_warnings(): + result = decode_cf_datetime(dates, units, calendar=calendar) np.testing.assert_array_equal(result, expected) diff --git a/xarray/tests/test_formatting.py b/xarray/tests/test_formatting.py index efdb8a57288..4bbf41c7b38 100644 --- a/xarray/tests/test_formatting.py +++ b/xarray/tests/test_formatting.py @@ -480,10 +480,10 @@ def test_short_numpy_repr() -> None: assert num_lines < 30 # threshold option (default: 200) - array = np.arange(100) - assert "..." not in formatting.short_numpy_repr(array) + array2 = np.arange(100) + assert "..." not in formatting.short_numpy_repr(array2) with xr.set_options(display_values_threshold=10): - assert "..." in formatting.short_numpy_repr(array) + assert "..." in formatting.short_numpy_repr(array2) def test_large_array_repr_length() -> None: From ad9038226d92f1eae3e63cc5d86ed6bbf566265b Mon Sep 17 00:00:00 2001 From: Maximilian Roos Date: Wed, 27 Apr 2022 20:40:40 -0700 Subject: [PATCH 6/7] Revert "Update .github/workflows/ci.yaml" This reverts commit 73a99327733906ddbde33bd6547a1ba517bc9e9f. --- .github/workflows/ci.yaml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/ci.yaml b/.github/workflows/ci.yaml index 73250d54404..7df0766b016 100644 --- a/.github/workflows/ci.yaml +++ b/.github/workflows/ci.yaml @@ -81,7 +81,7 @@ jobs: id: cache-env uses: actions/cache@v3 with: - path: /usr/share/miniconda/envs/xarray-tests + path: ${{ env.CONDA_PREFIX }} key: ${{ runner.os }}-conda-py${{ matrix.python-version }}-${{ hashFiles('ci/requirements/**.yml') }} From 17d73d26af824fc6b58197fc55ae0052e92b295a Mon Sep 17 00:00:00 2001 From: Deepak Cherian Date: Wed, 27 Apr 2022 21:53:27 -0600 Subject: [PATCH 7/7] hard-code env path --- .github/workflows/ci.yaml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/ci.yaml b/.github/workflows/ci.yaml index 7df0766b016..73250d54404 100644 --- a/.github/workflows/ci.yaml +++ b/.github/workflows/ci.yaml @@ -81,7 +81,7 @@ jobs: id: cache-env uses: actions/cache@v3 with: - path: ${{ env.CONDA_PREFIX }} + path: /usr/share/miniconda/envs/xarray-tests key: ${{ runner.os }}-conda-py${{ matrix.python-version }}-${{ hashFiles('ci/requirements/**.yml') }}