From cdf30e8d753f179f5505fa8fa55061b230c578fa Mon Sep 17 00:00:00 2001 From: Maximilian Roos Date: Sun, 9 Feb 2025 11:48:33 -0800 Subject: [PATCH 1/5] Add typos check to pre-commit hooks Also fixes a bunch of typos. The work here was adding the exclusions (I had an LLM do them but also checked them) --- .pre-commit-config.yaml | 6 ++ doc/whats-new.rst | 2 +- pyproject.toml | 99 ++++++++++++++++++++++++------- xarray/core/alignment.py | 2 +- xarray/core/dataarray.py | 8 +-- xarray/core/dataset.py | 8 +-- xarray/testing/strategies.py | 4 +- xarray/tests/test_backends.py | 2 +- xarray/tests/test_coding_times.py | 2 +- xarray/tests/test_dataset.py | 4 +- xarray/tests/test_rolling.py | 2 +- xarray/tests/test_variable.py | 2 +- 12 files changed, 103 insertions(+), 38 deletions(-) diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml index f02dbf9dc69..b9990bf0f94 100644 --- a/.pre-commit-config.yaml +++ b/.pre-commit-config.yaml @@ -73,3 +73,9 @@ repos: hooks: - id: validate-pyproject additional_dependencies: ["validate-pyproject-schema-store[all]"] + - repo: https://github.com/crate-ci/typos + rev: dictgen-v0.3.1 + hooks: + - id: typos + # https://github.com/crate-ci/typos/issues/347 + pass_filenames: false diff --git a/doc/whats-new.rst b/doc/whats-new.rst index fe63a923fe6..0b6533300b2 100644 --- a/doc/whats-new.rst +++ b/doc/whats-new.rst @@ -82,7 +82,7 @@ class can be passed through the ``decode_times`` keyword argument (see also coder = xr.coders.CFDatetimeCoder(time_unit="s") ds = xr.open_dataset(filename, decode_times=coder) -Similar control of the resoution of decoded timedeltas can be achieved through +Similar control of the resolution of decoded timedeltas can be achieved through passing a :py:class:`coders.CFTimedeltaCoder` instance to the ``decode_timedelta`` keyword argument: diff --git a/pyproject.toml b/pyproject.toml index fd4a4293882..cffdbeaa633 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -1,7 +1,5 @@ [project] -authors = [ - { name = "xarray Developers", email = "xarray@googlegroups.com" }, -] +authors = [{ name = "xarray Developers", email = "xarray@googlegroups.com" }] classifiers = [ "Development Status :: 5 - Production/Stable", "License :: OSI Approved :: Apache Software License", @@ -21,11 +19,7 @@ name = "xarray" readme = "README.md" requires-python = ">=3.10" -dependencies = [ - "numpy>=1.24", - "packaging>=23.2", - "pandas>=2.1", -] +dependencies = ["numpy>=1.24", "packaging>=23.2", "pandas>=2.1"] # We don't encode minimum requirements here (though if we can write a script to # generate the text from `min_deps_check.py`, that's welcome...). We do add @@ -63,6 +57,22 @@ io = [ etc = ["sparse"] parallel = ["dask[complete]"] viz = ["cartopy", "matplotlib", "nc-time-axis", "seaborn"] +type-stubs = [ + "pandas-stubs", + "types-PyYAML", + "types-Pygments", + "types-colorama", + "types-decorator", + "types-defusedxml", + "types-docutils", + "types-networkx", + "types-pexpect", + "types-psutil", + "types-pycurl", + "types-python-dateutil", + "types-pytz", + "types-setuptools", +] [project.urls] Documentation = "https://docs.xarray.dev" @@ -76,10 +86,7 @@ dask = "xarray.namedarray.daskmanager:DaskManager" [build-system] build-backend = "setuptools.build_meta" -requires = [ - "setuptools>=42", - "setuptools-scm>=7", -] +requires = ["setuptools>=42", "setuptools-scm>=7"] [tool.setuptools] packages = ["xarray"] @@ -103,10 +110,7 @@ exclude_lines = ["pragma: no cover", "if TYPE_CHECKING"] [tool.mypy] enable_error_code = ["ignore-without-code", "redundant-self", "redundant-expr"] -exclude = [ - 'build', - 'xarray/util/generate_.*\.py', -] +exclude = ['build', 'xarray/util/generate_.*\.py'] files = "xarray" show_error_context = true warn_redundant_casts = true @@ -237,10 +241,7 @@ module = ["xarray.namedarray.*", "xarray.tests.test_namedarray"] # reportMissingTypeStubs = false [tool.ruff] -extend-exclude = [ - "doc", - "_typed_ops.pyi", -] +extend-exclude = ["doc", "_typed_ops.pyi"] [tool.ruff.lint] extend-select = [ @@ -360,3 +361,61 @@ test = "pytest" ignore = [ "PP308", # This option creates a large amount of log lines. ] + +[tool.typos] + +[tool.typos.default] +extend-ignore-identifiers-re = [ + # Variable names + "nd_.*", + ".*_nd", + "ba_.*", + ".*_ba", + "ser_.*", + ".*_ser", + # Function/class names + "NDArray.*", + ".*NDArray.*", +] + +[tool.typos.default.extend-words] +# NumPy function names +arange = "arange" + +# Technical terms +nd = "nd" +nin = "nin" + +# Variable names +ba = "ba" +ser = "ser" +fo = "fo" +iy = "iy" +vart = "vart" +ede = "ede" + +# Organization/Institution names +Stichting = "Stichting" +Mathematisch = "Mathematisch" + +# People's names +Soler = "Soler" +Bruning = "Bruning" +Tung = "Tung" +Claus = "Claus" +Celles = "Celles" +slowy = "slowy" +Commun = "Commun" + +# Tests +Ome = "Ome" +SUR = "SUR" +Tio = "Tio" +Ono = "Ono" +abl = "abl" + +# Technical terms +splitted = "splitted" +childs = "childs" +cutted = "cutted" +LOCA = "LOCA" diff --git a/xarray/core/alignment.py b/xarray/core/alignment.py index d6cdd45bb49..a3c26a0d023 100644 --- a/xarray/core/alignment.py +++ b/xarray/core/alignment.py @@ -979,7 +979,7 @@ def reindex( """ # TODO: (benbovy - explicit indexes): uncomment? - # --> from reindex docstrings: "any mis-matched dimension is simply ignored" + # --> from reindex docstrings: "any mismatched dimension is simply ignored" # bad_keys = [k for k in indexers if k not in obj._indexes and k not in obj.dims] # if bad_keys: # raise ValueError( diff --git a/xarray/core/dataarray.py b/xarray/core/dataarray.py index 6c4c17e76cd..319fce1057c 100644 --- a/xarray/core/dataarray.py +++ b/xarray/core/dataarray.py @@ -1966,8 +1966,8 @@ def reindex_like( names to pandas.Index objects, which provides coordinates upon which to index the variables in this dataset. The indexes on this other object need not be the same as the indexes on this - dataset. Any mis-matched index values will be filled in with - NaN, and any mis-matched dimension names will simply be ignored. + dataset. Any mismatched index values will be filled in with + NaN, and any mismatched dimension names will simply be ignored. method : {None, "nearest", "pad", "ffill", "backfill", "bfill"}, optional Method to use for filling index values from other not found on this data array: @@ -2148,8 +2148,8 @@ def reindex( ---------- indexers : dict, optional Dictionary with keys given by dimension names and values given by - arrays of coordinates tick labels. Any mis-matched coordinate - values will be filled in with NaN, and any mis-matched dimension + arrays of coordinates tick labels. Any mismatched coordinate + values will be filled in with NaN, and any mismatched dimension names will simply be ignored. One of indexers or indexers_kwargs must be provided. copy : bool, optional diff --git a/xarray/core/dataset.py b/xarray/core/dataset.py index 79fea33e4c1..5f239ce5fad 100644 --- a/xarray/core/dataset.py +++ b/xarray/core/dataset.py @@ -3670,8 +3670,8 @@ def reindex_like( names to pandas.Index objects, which provides coordinates upon which to index the variables in this dataset. The indexes on this other object need not be the same as the indexes on this - dataset. Any mis-matched index values will be filled in with - NaN, and any mis-matched dimension names will simply be ignored. + dataset. Any mismatched index values will be filled in with + NaN, and any mismatched dimension names will simply be ignored. method : {None, "nearest", "pad", "ffill", "backfill", "bfill", None}, optional Method to use for filling index values from other not found in this dataset: @@ -3736,8 +3736,8 @@ def reindex( ---------- indexers : dict, optional Dictionary with keys given by dimension names and values given by - arrays of coordinates tick labels. Any mis-matched coordinate - values will be filled in with NaN, and any mis-matched dimension + arrays of coordinates tick labels. Any mismatched coordinate + values will be filled in with NaN, and any mismatched dimension names will simply be ignored. One of indexers or indexers_kwargs must be provided. method : {None, "nearest", "pad", "ffill", "backfill", "bfill", None}, optional diff --git a/xarray/testing/strategies.py b/xarray/testing/strategies.py index e60572fbddd..84f37e5568a 100644 --- a/xarray/testing/strategies.py +++ b/xarray/testing/strategies.py @@ -477,7 +477,7 @@ def unique_subset_of( ) -class CFTimeStategy(st.SearchStrategy): +class CFTimeStrategy(st.SearchStrategy): def __init__(self, min_value, max_value): self.min_value = min_value self.max_value = max_value @@ -506,5 +506,5 @@ def do_draw(self, data): daysinmonth = date_type(99999, 12, 1).daysinmonth min_value = date_type(-99999, 1, 1) max_value = date_type(99999, 12, daysinmonth, 23, 59, 59, 999999) - strategy = CFTimeStategy(min_value, max_value) + strategy = CFTimeStrategy(min_value, max_value) return strategy.do_draw(data) diff --git a/xarray/tests/test_backends.py b/xarray/tests/test_backends.py index 0237252f975..29d1f375548 100644 --- a/xarray/tests/test_backends.py +++ b/xarray/tests/test_backends.py @@ -2576,7 +2576,7 @@ def test_chunk_encoding_with_dask(self) -> None: with self.roundtrip(original) as actual: assert_identical(original, actual) - # but itermediate unaligned chunks are bad + # but intermediate unaligned chunks are bad badenc = ds.chunk({"x": (3, 5, 3, 1)}) badenc.var1.encoding["chunks"] = (3,) with pytest.raises(ValueError, match=r"would overlap multiple dask chunks"): diff --git a/xarray/tests/test_coding_times.py b/xarray/tests/test_coding_times.py index 2e61e5d853e..c9b451d0960 100644 --- a/xarray/tests/test_coding_times.py +++ b/xarray/tests/test_coding_times.py @@ -1820,7 +1820,7 @@ def test_encode_cf_timedelta_casting_overflow_error(use_dask, dtype) -> None: _DECODE_TIMEDELTA_TESTS = { "default": (True, None, np.dtype("timedelta64[ns]"), True), - "decode_timdelta=False": (True, False, np.dtype("int64"), False), + "decode_timedelta=False": (True, False, np.dtype("int64"), False), "inherit-time_unit-from-decode_times": ( CFDatetimeCoder(time_unit="s"), None, diff --git a/xarray/tests/test_dataset.py b/xarray/tests/test_dataset.py index c3302dd6c9d..e13dada1b41 100644 --- a/xarray/tests/test_dataset.py +++ b/xarray/tests/test_dataset.py @@ -1243,7 +1243,7 @@ def test_chunk_by_frequency(self, freq: str, calendar: str, add_gap: bool) -> No assert rechunked.chunksizes["time"] == expected assert rechunked.chunksizes["x"] == (2,) * 5 - def test_chunk_by_frequecy_errors(self): + def test_chunk_by_frequency_errors(self): ds = Dataset({"foo": ("x", [1, 2, 3])}) with pytest.raises(ValueError, match="virtual variable"): ds.chunk(x=TimeResampler("YE")) @@ -2170,7 +2170,7 @@ def test_reindex(self) -> None: # invalid dimension # TODO: (benbovy - explicit indexes): uncomment? - # --> from reindex docstrings: "any mis-matched dimension is simply ignored" + # --> from reindex docstrings: "any mismatched dimension is simply ignored" # with pytest.raises(ValueError, match=r"indexer keys.*not correspond.*"): # data.reindex(invalid=0) diff --git a/xarray/tests/test_rolling.py b/xarray/tests/test_rolling.py index 0a3449f51ac..6cb92f8e796 100644 --- a/xarray/tests/test_rolling.py +++ b/xarray/tests/test_rolling.py @@ -606,7 +606,7 @@ def test_rolling_construct_automatic_rechunk(self): # Construct dataset with chunk size of (400, 400, 1) or 1.22 MiB da = DataArray( - dims=["latitute", "longitude", "time"], + dims=["latitude", "longitude", "time"], data=dask.array.random.random((400, 400, 400), chunks=(-1, -1, 1)), name="foo", ) diff --git a/xarray/tests/test_variable.py b/xarray/tests/test_variable.py index c283797bd08..8679d996e23 100644 --- a/xarray/tests/test_variable.py +++ b/xarray/tests/test_variable.py @@ -868,7 +868,7 @@ def test_getitem_error(self): v = Variable(["x", "y", "z"], np.arange(60).reshape(3, 4, 5)) ind = Variable(["x"], [0, 1]) - with pytest.raises(IndexError, match=r"Dimensions of indexers mis"): + with pytest.raises(IndexError, match=r"Dimensions of indexers mismatched"): v[:, ind] @pytest.mark.parametrize( From 0548b951ac0bc20e987c57d921772be320a5ae51 Mon Sep 17 00:00:00 2001 From: Maximilian Roos Date: Sun, 9 Feb 2025 11:55:17 -0800 Subject: [PATCH 2/5] --- xarray/tests/test_variable.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/xarray/tests/test_variable.py b/xarray/tests/test_variable.py index 8679d996e23..b1d5a09efcf 100644 --- a/xarray/tests/test_variable.py +++ b/xarray/tests/test_variable.py @@ -868,7 +868,7 @@ def test_getitem_error(self): v = Variable(["x", "y", "z"], np.arange(60).reshape(3, 4, 5)) ind = Variable(["x"], [0, 1]) - with pytest.raises(IndexError, match=r"Dimensions of indexers mismatched"): + with pytest.raises(IndexError, match=r"Dimensions of indexers mismatch"): v[:, ind] @pytest.mark.parametrize( From df9381b4947c1035f04943adbdf843841ed5b06c Mon Sep 17 00:00:00 2001 From: Maximilian Roos Date: Sun, 2 Mar 2025 13:49:41 -0800 Subject: [PATCH 3/5] --- pyproject.toml | 3 +++ xarray/core/indexes.py | 2 +- 2 files changed, 4 insertions(+), 1 deletion(-) diff --git a/pyproject.toml b/pyproject.toml index 84c100faaf6..11b565dfaab 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -427,3 +427,6 @@ splitted = "splitted" childs = "childs" cutted = "cutted" LOCA = "LOCA" + +[tool.typos.type.jupyter] +extend-ignore-re = ["\"id\": \".*\""] diff --git a/xarray/core/indexes.py b/xarray/core/indexes.py index e99d98d532a..c2bc8b94f3f 100644 --- a/xarray/core/indexes.py +++ b/xarray/core/indexes.py @@ -1464,7 +1464,7 @@ def sel( if any(ds != dim_size0 for ds in dim_size): raise ValueError( "CoordinateTransformIndex only supports advanced (point-wise) indexing " - "with xarray.DataArray or xarray.Variable objects of macthing dimensions." + "with xarray.DataArray or xarray.Variable objects of matching dimensions." ) coord_labels = { From d7630d385d65f2dfea57a499ea1811f0f2e166cc Mon Sep 17 00:00:00 2001 From: Maximilian Roos Date: Sun, 2 Mar 2025 13:51:31 -0800 Subject: [PATCH 4/5] --- pyproject.toml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pyproject.toml b/pyproject.toml index 11b565dfaab..85c9183b30e 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -59,7 +59,7 @@ io = [ etc = ["sparse"] parallel = ["dask[complete]"] viz = ["cartopy", "matplotlib", "nc-time-axis", "seaborn"] -type-stubs = [ +types = [ "pandas-stubs", "types-PyYAML", "types-Pygments", From 6186bac9747af8e194dba5752f1fe6f07f7f1a97 Mon Sep 17 00:00:00 2001 From: "pre-commit-ci[bot]" <66853113+pre-commit-ci[bot]@users.noreply.github.com> Date: Fri, 7 Mar 2025 00:56:26 +0000 Subject: [PATCH 5/5] [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci --- xarray/tests/test_datatree.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/xarray/tests/test_datatree.py b/xarray/tests/test_datatree.py index 067fa9fe260..55b809307e4 100644 --- a/xarray/tests/test_datatree.py +++ b/xarray/tests/test_datatree.py @@ -1602,7 +1602,7 @@ def test_filter_like(self) -> None: assert filtered_tree.equals(barren_tree) assert "flowers" not in filtered_tree.children - # test symetrical pruning results in isomorphic trees + # test symmetrical pruning results in isomorphic trees assert flower_tree.filter_like(fruit_tree).isomorphic( fruit_tree.filter_like(flower_tree) )