From 85c49d3d6995a78f2cb337bf017307d5050d19d8 Mon Sep 17 00:00:00 2001
From: Joseph Gonzalez <josephgonowak97@gmail.com>
Date: Wed, 18 Sep 2024 08:19:59 -0400
Subject: [PATCH 01/17] fix safe chunks validation

---
 xarray/backends/zarr.py       | 111 ++++++++++++++++++++--------------
 xarray/tests/test_backends.py |  68 ++++++++++++++++++---
 2 files changed, 128 insertions(+), 51 deletions(-)

diff --git a/xarray/backends/zarr.py b/xarray/backends/zarr.py
index 52d2175621f..52de392e85d 100644
--- a/xarray/backends/zarr.py
+++ b/xarray/backends/zarr.py
@@ -112,7 +112,7 @@ def __getitem__(self, key):
         # could possibly have a work-around for 0d data here
 
 
-def _determine_zarr_chunks(enc_chunks, var_chunks, ndim, name, safe_chunks):
+def _determine_zarr_chunks(enc_chunks, var_chunks, ndim, name, safe_chunks, region):
     """
     Given encoding chunks (possibly None or []) and variable chunks
     (possibly None or []).
@@ -163,7 +163,7 @@ def _determine_zarr_chunks(enc_chunks, var_chunks, ndim, name, safe_chunks):
 
     if len(enc_chunks_tuple) != ndim:
         # throw away encoding chunks, start over
-        return _determine_zarr_chunks(None, var_chunks, ndim, name, safe_chunks)
+        return _determine_zarr_chunks(None, var_chunks, ndim, name, safe_chunks, region)
 
     for x in enc_chunks_tuple:
         if not isinstance(x, int):
@@ -189,20 +189,36 @@ def _determine_zarr_chunks(enc_chunks, var_chunks, ndim, name, safe_chunks):
     # TODO: incorporate synchronizer to allow writes from multiple dask
     # threads
     if var_chunks and enc_chunks_tuple:
-        for zchunk, dchunks in zip(enc_chunks_tuple, var_chunks, strict=True):
-            for dchunk in dchunks[:-1]:
+        base_error = (
+            f"Specified zarr chunks encoding['chunks']={enc_chunks_tuple!r} for "
+            f"variable named {name!r} would overlap multiple dask chunks {var_chunks!r}. "
+            f"Writing this array in parallel with dask could lead to corrupted data."
+            f"Consider either rechunking using `chunk()`, deleting "
+            f"or modifying `encoding['chunks']`, or specify `safe_chunks=False`."
+        )
+
+        for zchunk, dchunks, interval in zip(enc_chunks_tuple, var_chunks, region, strict=True):
+            if not safe_chunks or len(dchunks) <= 1:
+                # It is not necessary to perform any additional validation if the
+                # safe_chunks is False, or there are less than two dchunks
+                continue
+
+            start = 0
+            if interval.start:
+                # If the start of the interval is not None or 0, it means that the data
+                # is being appended or updated, and in both cases it is mandatory that
+                # the residue of the division between the first dchunk and the zchunk
+                # being equal to the border size
+                border_size = zchunk - interval.start % zchunk
+                if dchunks[0] % zchunk != border_size:
+                    raise ValueError(base_error)
+                # Avoid validating the first chunk inside the loop
+                start = 1
+
+            for dchunk in dchunks[start:-1]:
                 if dchunk % zchunk:
-                    base_error = (
-                        f"Specified zarr chunks encoding['chunks']={enc_chunks_tuple!r} for "
-                        f"variable named {name!r} would overlap multiple dask chunks {var_chunks!r}. "
-                        f"Writing this array in parallel with dask could lead to corrupted data."
-                    )
-                    if safe_chunks:
-                        raise ValueError(
-                            base_error
-                            + " Consider either rechunking using `chunk()`, deleting "
-                            "or modifying `encoding['chunks']`, or specify `safe_chunks=False`."
-                        )
+                    raise ValueError(base_error)
+
         return enc_chunks_tuple
 
     raise AssertionError("We should never get here. Function logic must be wrong.")
@@ -243,7 +259,7 @@ def _get_zarr_dims_and_attrs(zarr_obj, dimension_key, try_nczarr):
 
 
 def extract_zarr_variable_encoding(
-    variable, raise_on_invalid=False, name=None, safe_chunks=True
+    variable, region, raise_on_invalid=False, name=None, safe_chunks=True
 ):
     """
     Extract zarr encoding dictionary from xarray Variable
@@ -251,6 +267,7 @@ def extract_zarr_variable_encoding(
     Parameters
     ----------
     variable : Variable
+    region: tuple[slice]
     raise_on_invalid : bool, optional
 
     Returns
@@ -285,7 +302,7 @@ def extract_zarr_variable_encoding(
                 del encoding[k]
 
     chunks = _determine_zarr_chunks(
-        encoding.get("chunks"), variable.chunks, variable.ndim, name, safe_chunks
+        encoding.get("chunks"), variable.chunks, variable.ndim, name, safe_chunks, region
     )
     encoding["chunks"] = chunks
     return encoding
@@ -762,16 +779,9 @@ def set_variables(self, variables, check_encoding_set, writer, unlimited_dims=No
             if v.encoding == {"_FillValue": None} and fill_value is None:
                 v.encoding = {}
 
-            # We need to do this for both new and existing variables to ensure we're not
-            # writing to a partial chunk, even though we don't use the `encoding` value
-            # when writing to an existing variable. See
-            # https://github.com/pydata/xarray/issues/8371 for details.
-            encoding = extract_zarr_variable_encoding(
-                v,
-                raise_on_invalid=vn in check_encoding_set,
-                name=vn,
-                safe_chunks=self._safe_chunks,
-            )
+            zarr_array = None
+            write_region = self._write_region if self._write_region is not None else {}
+            write_region = {dim: write_region.get(dim, slice(None)) for dim in dims}
 
             if name in existing_keys:
                 # existing variable
@@ -801,7 +811,36 @@ def set_variables(self, variables, check_encoding_set, writer, unlimited_dims=No
                     )
                 else:
                     zarr_array = self.zarr_group[name]
-            else:
+
+                if self._append_dim is not None and self._append_dim in dims:
+                    # resize existing variable
+                    append_axis = dims.index(self._append_dim)
+                    assert write_region[self._append_dim] == slice(None)
+                    write_region[self._append_dim] = slice(
+                        zarr_array.shape[append_axis], None
+                    )
+
+                    new_shape = list(zarr_array.shape)
+                    new_shape[append_axis] += v.shape[append_axis]
+                    zarr_array.resize(new_shape)
+
+            region = tuple(write_region[dim] for dim in dims)
+
+            # We need to do this for both new and existing variables to ensure we're not
+            # writing to a partial chunk, even though we don't use the `encoding` value
+            # when writing to an existing variable. See
+            # https://github.com/pydata/xarray/issues/8371 for details.
+            # Note: Ideally there should be two functions, one for validating the chunks and
+            # another one for extracting the encoding.
+            encoding = extract_zarr_variable_encoding(
+                v,
+                region=region,
+                raise_on_invalid=vn in check_encoding_set,
+                name=vn,
+                safe_chunks=self._safe_chunks,
+            )
+
+            if name not in existing_keys:
                 # new variable
                 encoded_attrs = {}
                 # the magic for storing the hidden dimension data
@@ -833,22 +872,6 @@ def set_variables(self, variables, check_encoding_set, writer, unlimited_dims=No
                 )
                 zarr_array = _put_attrs(zarr_array, encoded_attrs)
 
-            write_region = self._write_region if self._write_region is not None else {}
-            write_region = {dim: write_region.get(dim, slice(None)) for dim in dims}
-
-            if self._append_dim is not None and self._append_dim in dims:
-                # resize existing variable
-                append_axis = dims.index(self._append_dim)
-                assert write_region[self._append_dim] == slice(None)
-                write_region[self._append_dim] = slice(
-                    zarr_array.shape[append_axis], None
-                )
-
-                new_shape = list(zarr_array.shape)
-                new_shape[append_axis] += v.shape[append_axis]
-                zarr_array.resize(new_shape)
-
-            region = tuple(write_region[dim] for dim in dims)
             writer.add(v.data, zarr_array, region)
 
     def close(self) -> None:
diff --git a/xarray/tests/test_backends.py b/xarray/tests/test_backends.py
index 13258fcf6ea..a78b583598b 100644
--- a/xarray/tests/test_backends.py
+++ b/xarray/tests/test_backends.py
@@ -5496,24 +5496,26 @@ def test_encode_zarr_attr_value() -> None:
 
 @requires_zarr
 def test_extract_zarr_variable_encoding() -> None:
+    # The region is not useful in these cases, but I still think that it must be mandatory
+    # because the validation of the chunks is in the same function
     var = xr.Variable("x", [1, 2])
-    actual = backends.zarr.extract_zarr_variable_encoding(var)
+    actual = backends.zarr.extract_zarr_variable_encoding(var, region=tuple())
     assert "chunks" in actual
     assert actual["chunks"] is None
 
     var = xr.Variable("x", [1, 2], encoding={"chunks": (1,)})
-    actual = backends.zarr.extract_zarr_variable_encoding(var)
+    actual = backends.zarr.extract_zarr_variable_encoding(var, region=tuple())
     assert actual["chunks"] == (1,)
 
     # does not raise on invalid
     var = xr.Variable("x", [1, 2], encoding={"foo": (1,)})
-    actual = backends.zarr.extract_zarr_variable_encoding(var)
+    actual = backends.zarr.extract_zarr_variable_encoding(var, region=tuple())
 
     # raises on invalid
     var = xr.Variable("x", [1, 2], encoding={"foo": (1,)})
     with pytest.raises(ValueError, match=r"unexpected encoding parameters"):
         actual = backends.zarr.extract_zarr_variable_encoding(
-            var, raise_on_invalid=True
+            var, raise_on_invalid=True, region=tuple()
         )
 
 
@@ -6096,6 +6098,58 @@ def test_zarr_region_chunk_partial_offset(tmp_path):
         store, safe_chunks=False, region="auto"
     )
 
-    # This write is unsafe, and should raise an error, but does not.
-    # with pytest.raises(ValueError):
-    #     da.isel(x=slice(5, 25)).chunk(x=(10, 10)).to_zarr(store, region="auto")
+    with pytest.raises(ValueError):
+        da.isel(x=slice(5, 25)).chunk(x=(10, 10)).to_zarr(store, region="auto")
+
+
+@requires_zarr
+@requires_dask
+def test_zarr_safe_chunk(tmp_path):
+    # https://github.com/pydata/xarray/pull/8459#issuecomment-1819417545
+    store = tmp_path / "foo.zarr"
+    data = np.ones((20,))
+    da = xr.DataArray(data, dims=["x"], coords={"x": range(20)}, name="foo").chunk(x=5)
+
+    da.isel(x=slice(0, 7)).to_zarr(store, safe_chunks=True, mode="w")
+    with pytest.raises(ValueError):
+        # If the first chunk is smaller than the border size then raise an error
+        da.isel(x=slice(7, 11)).chunk(x=(2, 2)).to_zarr(
+            store, append_dim="x", safe_chunks=True
+        )
+
+    da.isel(x=slice(0, 7)).to_zarr(store, safe_chunks=True, mode="w")
+    # If the first chunk is of the size of the border size then it is valid
+    da.isel(x=slice(7, 11)).chunk(x=(3, 1)).to_zarr(
+        store, safe_chunks=True, append_dim="x"
+    )
+    assert xr.open_zarr(store)["foo"].equals(da.isel(x=slice(0, 11)))
+
+    da.isel(x=slice(0, 7)).to_zarr(store, safe_chunks=True, mode="w")
+    # If the first chunk is of the size of the border size + N * zchunk then it is valid
+    da.isel(x=slice(7, 17)).chunk(x=(8, 2)).to_zarr(
+        store, safe_chunks=True, append_dim="x"
+    )
+    assert xr.open_zarr(store)["foo"].equals(da.isel(x=slice(0, 17)))
+
+    da.isel(x=slice(0, 7)).to_zarr(store, safe_chunks=True, mode="w")
+    with pytest.raises(ValueError):
+        # If the first chunk is valid but the other are not then raise an error
+        da.isel(x=slice(7, 14)).chunk(x=(3, 3, 1)).to_zarr(
+            store, append_dim="x", safe_chunks=True
+        )
+
+    da.isel(x=slice(0, 7)).to_zarr(store, safe_chunks=True, mode="w")
+    with pytest.raises(ValueError):
+        # If the first chunk have a size bigger than the border size but not enough
+        # to complete the size of the next chunk then an error must be raised
+        da.isel(x=slice(7, 14)).chunk(x=(4, 3)).to_zarr(
+            store, append_dim="x", safe_chunks=True
+        )
+
+    da.isel(x=slice(0, 7)).to_zarr(store, safe_chunks=True, mode="w")
+    # Append with a single chunk it's totally valid,
+    # and it does not matter the size of the chunk
+    da.isel(x=slice(7, 19)).chunk(x=-1).to_zarr(
+        store, append_dim="x", safe_chunks=True
+    )
+    assert xr.open_zarr(store)["foo"].equals(da.isel(x=slice(0, 19)))

From 0160d48ee35153f26e96515f887affca61a89348 Mon Sep 17 00:00:00 2001
From: "pre-commit-ci[bot]"
 <66853113+pre-commit-ci[bot]@users.noreply.github.com>
Date: Wed, 18 Sep 2024 12:25:29 +0000
Subject: [PATCH 02/17] [pre-commit.ci] auto fixes from pre-commit.com hooks

for more information, see https://pre-commit.ci
---
 xarray/backends/zarr.py       | 11 +++++++++--
 xarray/tests/test_backends.py |  4 +---
 2 files changed, 10 insertions(+), 5 deletions(-)

diff --git a/xarray/backends/zarr.py b/xarray/backends/zarr.py
index 52de392e85d..c4099f1f5fe 100644
--- a/xarray/backends/zarr.py
+++ b/xarray/backends/zarr.py
@@ -197,7 +197,9 @@ def _determine_zarr_chunks(enc_chunks, var_chunks, ndim, name, safe_chunks, regi
             f"or modifying `encoding['chunks']`, or specify `safe_chunks=False`."
         )
 
-        for zchunk, dchunks, interval in zip(enc_chunks_tuple, var_chunks, region, strict=True):
+        for zchunk, dchunks, interval in zip(
+            enc_chunks_tuple, var_chunks, region, strict=True
+        ):
             if not safe_chunks or len(dchunks) <= 1:
                 # It is not necessary to perform any additional validation if the
                 # safe_chunks is False, or there are less than two dchunks
@@ -302,7 +304,12 @@ def extract_zarr_variable_encoding(
                 del encoding[k]
 
     chunks = _determine_zarr_chunks(
-        encoding.get("chunks"), variable.chunks, variable.ndim, name, safe_chunks, region
+        encoding.get("chunks"),
+        variable.chunks,
+        variable.ndim,
+        name,
+        safe_chunks,
+        region,
     )
     encoding["chunks"] = chunks
     return encoding
diff --git a/xarray/tests/test_backends.py b/xarray/tests/test_backends.py
index a78b583598b..06646e6ec4a 100644
--- a/xarray/tests/test_backends.py
+++ b/xarray/tests/test_backends.py
@@ -6149,7 +6149,5 @@ def test_zarr_safe_chunk(tmp_path):
     da.isel(x=slice(0, 7)).to_zarr(store, safe_chunks=True, mode="w")
     # Append with a single chunk it's totally valid,
     # and it does not matter the size of the chunk
-    da.isel(x=slice(7, 19)).chunk(x=-1).to_zarr(
-        store, append_dim="x", safe_chunks=True
-    )
+    da.isel(x=slice(7, 19)).chunk(x=-1).to_zarr(store, append_dim="x", safe_chunks=True)
     assert xr.open_zarr(store)["foo"].equals(da.isel(x=slice(0, 19)))

From 60a7a3f18e2b450590d311141ca2ee4b79df6dc8 Mon Sep 17 00:00:00 2001
From: Joseph Gonzalez <josephgonowak97@gmail.com>
Date: Wed, 18 Sep 2024 08:55:26 -0400
Subject: [PATCH 03/17] fix safe chunks validation

---
 doc/whats-new.rst | 4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)

diff --git a/doc/whats-new.rst b/doc/whats-new.rst
index 264c07f562b..56f4dda4cca 100644
--- a/doc/whats-new.rst
+++ b/doc/whats-new.rst
@@ -51,7 +51,9 @@ Bug fixes
   the non-missing times could in theory be encoded with integers
   (:issue:`9488`, :pull:`9497`). By `Spencer Clark
   <https://github.com/spencerkclark>`_.
-
+- Fix the safe_chunks validation option on the to_zarr method
+  (:issue:`5511`, :pull:`9513`). By `Joseph Nowak
+  <https://github.com/josephnowak>`_.
 
 Documentation
 ~~~~~~~~~~~~~

From 6c41f4beb059d4ac0a8c04cda117177284e3fd62 Mon Sep 17 00:00:00 2001
From: joseph nowak <josephgonowak97@gmail.com>
Date: Wed, 18 Sep 2024 15:26:00 -0400
Subject: [PATCH 04/17] Update xarray/tests/test_backends.py

Co-authored-by: Maximilian Roos <5635139+max-sixty@users.noreply.github.com>
---
 xarray/tests/test_backends.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/xarray/tests/test_backends.py b/xarray/tests/test_backends.py
index 06646e6ec4a..a2419cf9145 100644
--- a/xarray/tests/test_backends.py
+++ b/xarray/tests/test_backends.py
@@ -6104,7 +6104,7 @@ def test_zarr_region_chunk_partial_offset(tmp_path):
 
 @requires_zarr
 @requires_dask
-def test_zarr_safe_chunk(tmp_path):
+def test_zarr_safe_chunk_append_dim(tmp_path):
     # https://github.com/pydata/xarray/pull/8459#issuecomment-1819417545
     store = tmp_path / "foo.zarr"
     data = np.ones((20,))

From a2a786bcbf0bd0692dcbab2e9196cb0379c70d0a Mon Sep 17 00:00:00 2001
From: Joseph Gonzalez <josephgonowak97@gmail.com>
Date: Fri, 20 Sep 2024 16:15:50 -0400
Subject: [PATCH 05/17] The validation of the chunks now is able to detect full
 or partial chunk and raise a proper error based on the mode selected, it is
 also possible to use the auto region detection with the mode "a"

---
 xarray/backends/zarr.py       | 76 ++++++++++++++++++++------------
 xarray/core/dataarray.py      |  8 ++++
 xarray/core/dataset.py        |  8 ++++
 xarray/tests/test_backends.py | 83 +++++++++++++++++++++++++++++++++--
 4 files changed, 143 insertions(+), 32 deletions(-)

diff --git a/xarray/backends/zarr.py b/xarray/backends/zarr.py
index af289d2ea7b..98936aae31a 100644
--- a/xarray/backends/zarr.py
+++ b/xarray/backends/zarr.py
@@ -112,7 +112,7 @@ def __getitem__(self, key):
         # could possibly have a work-around for 0d data here
 
 
-def _determine_zarr_chunks(enc_chunks, var_chunks, ndim, name, safe_chunks, region):
+def _determine_zarr_chunks(enc_chunks, var_chunks, ndim, name, safe_chunks, region, mode):
     """
     Given encoding chunks (possibly None or []) and variable chunks
     (possibly None or []).
@@ -163,7 +163,7 @@ def _determine_zarr_chunks(enc_chunks, var_chunks, ndim, name, safe_chunks, regi
 
     if len(enc_chunks_tuple) != ndim:
         # throw away encoding chunks, start over
-        return _determine_zarr_chunks(None, var_chunks, ndim, name, safe_chunks, region)
+        return _determine_zarr_chunks(None, var_chunks, ndim, name, safe_chunks, region, mode)
 
     for x in enc_chunks_tuple:
         if not isinstance(x, int):
@@ -189,9 +189,19 @@ def _determine_zarr_chunks(enc_chunks, var_chunks, ndim, name, safe_chunks, regi
     # TODO: incorporate synchronizer to allow writes from multiple dask
     # threads
     if var_chunks and enc_chunks_tuple:
+        # If it is possible to write on partial chunks then it is not necessary to check
+        # the last one contained on the region
+        allow_partial_chunks = True
+        end = -1
+        if mode == "r+":
+            # This mode forces to write only on full chunks, even on the last one
+            allow_partial_chunks = False
+            end = None
+
         base_error = (
             f"Specified zarr chunks encoding['chunks']={enc_chunks_tuple!r} for "
-            f"variable named {name!r} would overlap multiple dask chunks {var_chunks!r}. "
+            f"variable named {name!r} would overlap multiple dask chunks {var_chunks!r} "
+            f"on the region {region}. "
             f"Writing this array in parallel with dask could lead to corrupted data."
             f"Consider either rechunking using `chunk()`, deleting "
             f"or modifying `encoding['chunks']`, or specify `safe_chunks=False`."
@@ -200,27 +210,27 @@ def _determine_zarr_chunks(enc_chunks, var_chunks, ndim, name, safe_chunks, regi
         for zchunk, dchunks, interval in zip(
             enc_chunks_tuple, var_chunks, region, strict=True
         ):
-            if not safe_chunks or len(dchunks) <= 1:
-                # It is not necessary to perform any additional validation if the
-                # safe_chunks is False, or there are less than two dchunks
+            if not safe_chunks:
                 continue
 
-            start = 0
+            # The first border size is the amount of data that needs to be updated on the
+            # first chunk taking into account the region slice.
+            first_border_size = zchunk
             if interval.start:
-                # If the start of the interval is not None or 0, it means that the data
-                # is being appended or updated, and in both cases it is mandatory that
-                # the residue of the division between the first dchunk and the zchunk
-                # being equal to the border size
-                border_size = zchunk - interval.start % zchunk
-                if dchunks[0] % zchunk != border_size:
-                    raise ValueError(base_error)
-                # Avoid validating the first chunk inside the loop
-                start = 1
+                first_border_size = zchunk - interval.start % zchunk
 
-            for dchunk in dchunks[start:-1]:
-                if dchunk % zchunk:
+            if not allow_partial_chunks and first_border_size < zchunk:
+                # If the border is smaller than zchunk, then it is a partial chunk write
+                raise ValueError(first_border_size)
+
+            for dchunk in dchunks[:end]:
+                if (dchunk - first_border_size) % zchunk:
                     raise ValueError(base_error)
 
+                # The first border is only useful during the first iteration,
+                # so ignore it in the next validations
+                first_border_size = 0
+
         return enc_chunks_tuple
 
     raise AssertionError("We should never get here. Function logic must be wrong.")
@@ -261,7 +271,12 @@ def _get_zarr_dims_and_attrs(zarr_obj, dimension_key, try_nczarr):
 
 
 def extract_zarr_variable_encoding(
-    variable, region, raise_on_invalid=False, name=None, safe_chunks=True
+    variable,
+    raise_on_invalid=False,
+    name=None,
+    safe_chunks=True,
+    region=None,
+    mode=None
 ):
     """
     Extract zarr encoding dictionary from xarray Variable
@@ -269,8 +284,11 @@ def extract_zarr_variable_encoding(
     Parameters
     ----------
     variable : Variable
-    region: tuple[slice]
+    region: tuple[slice], optional
     raise_on_invalid : bool, optional
+    safe_chunks: bool, optional
+    name: str | Hashable, optional
+    mode: str, optional
 
     Returns
     -------
@@ -304,12 +322,13 @@ def extract_zarr_variable_encoding(
                 del encoding[k]
 
     chunks = _determine_zarr_chunks(
-        encoding.get("chunks"),
-        variable.chunks,
-        variable.ndim,
-        name,
-        safe_chunks,
-        region,
+        enc_chunks=encoding.get("chunks"),
+        var_chunks=variable.chunks,
+        ndim=variable.ndim,
+        name=name,
+        safe_chunks=safe_chunks,
+        region=region,
+        mode=mode
     )
     encoding["chunks"] = chunks
     return encoding
@@ -845,6 +864,7 @@ def set_variables(self, variables, check_encoding_set, writer, unlimited_dims=No
                 raise_on_invalid=vn in check_encoding_set,
                 name=vn,
                 safe_chunks=self._safe_chunks,
+                mode=self._mode
             )
 
             if name not in existing_keys:
@@ -927,9 +947,9 @@ def _validate_and_autodetect_region(self, ds) -> None:
         if not isinstance(region, dict):
             raise TypeError(f"``region`` must be a dict, got {type(region)}")
         if any(v == "auto" for v in region.values()):
-            if self._mode != "r+":
+            if self._mode not in ["r+", "a"]:
                 raise ValueError(
-                    f"``mode`` must be 'r+' when using ``region='auto'``, got {self._mode!r}"
+                    f"``mode`` must be 'r+' or 'a' when using ``region='auto'``, got {self._mode!r}"
                 )
             region = self._auto_detect_regions(ds, region)
 
diff --git a/xarray/core/dataarray.py b/xarray/core/dataarray.py
index 37369afbf96..1a308213ab3 100644
--- a/xarray/core/dataarray.py
+++ b/xarray/core/dataarray.py
@@ -4304,6 +4304,14 @@ def to_zarr(
             if Zarr arrays are written in parallel. This option may be useful in combination
             with ``compute=False`` to initialize a Zarr store from an existing
             DataArray with arbitrary chunk structure.
+            In addition to the many-to-one relationship validation, it also detects partial
+            chunks writes when using the region parameter,
+            these partial chunks are considered unsafe in the mode "r+" but safe in
+            the mode "a".
+            Note: Even with these validations it can still be unsafe to write
+            two or more chunked arrays in the same location in parallel if they are
+            not writing in independent regions, for those cases it is better to use
+            a synchronizer.
         storage_options : dict, optional
             Any additional parameters for the storage backend (ignored for local
             paths).
diff --git a/xarray/core/dataset.py b/xarray/core/dataset.py
index 7b9b4819245..b1ce264cbc8 100644
--- a/xarray/core/dataset.py
+++ b/xarray/core/dataset.py
@@ -2509,6 +2509,14 @@ def to_zarr(
             if Zarr arrays are written in parallel. This option may be useful in combination
             with ``compute=False`` to initialize a Zarr from an existing
             Dataset with arbitrary chunk structure.
+            In addition to the many-to-one relationship validation, it also detects partial
+            chunks writes when using the region parameter,
+            these partial chunks are considered unsafe in the mode "r+" but safe in
+            the mode "a".
+            Note: Even with these validations it can still be unsafe to write
+            two or more chunked arrays in the same location in parallel if they are
+            not writing in independent regions, for those cases it is better to use
+            a synchronizer.
         storage_options : dict, optional
             Any additional parameters for the storage backend (ignored for local
             paths).
diff --git a/xarray/tests/test_backends.py b/xarray/tests/test_backends.py
index beaf22826ec..a7f13c12f8a 100644
--- a/xarray/tests/test_backends.py
+++ b/xarray/tests/test_backends.py
@@ -5991,9 +5991,10 @@ def test_zarr_region_append(self, tmp_path):
             }
         )
 
-        # Don't allow auto region detection in append mode due to complexities in
-        # implementing the overlap logic and lack of safety with parallel writes
-        with pytest.raises(ValueError):
+        # Now it is valid to use auto region detection with the append mode,
+        # but it is still unsafe to modify dimensions or metadata using the region
+        # parameter.
+        with pytest.raises(KeyError):
             ds_new.to_zarr(
                 tmp_path / "test.zarr", mode="a", append_dim="x", region="auto"
             )
@@ -6105,7 +6106,6 @@ def test_zarr_region_chunk_partial_offset(tmp_path):
 @requires_zarr
 @requires_dask
 def test_zarr_safe_chunk_append_dim(tmp_path):
-    # https://github.com/pydata/xarray/pull/8459#issuecomment-1819417545
     store = tmp_path / "foo.zarr"
     data = np.ones((20,))
     da = xr.DataArray(data, dims=["x"], coords={"x": range(20)}, name="foo").chunk(x=5)
@@ -6151,3 +6151,78 @@ def test_zarr_safe_chunk_append_dim(tmp_path):
     # and it does not matter the size of the chunk
     da.isel(x=slice(7, 19)).chunk(x=-1).to_zarr(store, append_dim="x", safe_chunks=True)
     assert xr.open_zarr(store)["foo"].equals(da.isel(x=slice(0, 19)))
+
+
+@requires_zarr
+@requires_dask
+def test_zarr_safe_chunk_region(tmp_path):
+    store = tmp_path / "foo.zarr"
+
+    arr = xr.DataArray(
+        list(range(10)),
+        dims=["a"],
+        coords={"a": list(range(10))},
+        name="foo"
+    ).chunk(a=3)
+    arr.to_zarr(store, mode="w")
+
+    for mode in ["r+", "a"]:
+        with pytest.raises(ValueError):
+            # There are two Dask chunks on the same Zarr chunk,
+            # which means that it is unsafe in any mode
+            arr.isel(a=slice(0, 3)).chunk(a=(2, 1)).to_zarr(store, region="auto", mode=mode)
+
+        with pytest.raises(ValueError):
+            # the first chunk is covering the border size, but it is not
+            # completely covering the second chunk, which means that it is
+            # unsafe in any mode
+            arr.isel(a=slice(1, 5)).chunk(a=(3, 1)).to_zarr(store, region="auto", mode=mode)
+
+        with pytest.raises(ValueError):
+            # The first chunk is safe but the other two chunks are overlapping with
+            # the same Zarr chunk
+            arr.isel(a=slice(0, 5)).chunk(a=(3, 1, 1)).to_zarr(store, region="auto", mode=mode)
+
+        # Fully update two contiguous chunks is safe in any mode
+        arr.isel(a=slice(3, 9)).to_zarr(store, region="auto", mode=mode)
+
+    # Write the last chunk partially is safe in "a" mode
+    arr.isel(a=slice(3, 8)).to_zarr(store, region="auto", mode="a")
+    with pytest.raises(ValueError):
+        # with "r+" mode it is invalid to write partial chunk even on the last one
+        arr.isel(a=slice(3, 8)).to_zarr(store, region="auto", mode="r+")
+
+    # This is safe with mode "a", the border size is covered by the first chunk of Dask
+    arr.isel(a=slice(1, 4)).chunk(a=(2, 1)).to_zarr(store, region="auto", mode="a")
+
+    with pytest.raises(ValueError):
+        # This is considered unsafe in mode "r+" because it is writing in a partial chunk
+        arr.isel(a=slice(1, 4)).chunk(a=(2, 1)).to_zarr(store, region="auto", mode="r+")
+
+    # This is safe on mode "a" because there is a single dask chunk
+    arr.isel(a=slice(1, 5)).chunk(a=(4,)).to_zarr(store, region="auto", mode="a")
+
+    with pytest.raises(ValueError):
+        # This is unsafe on mode "r+", because there is a single dask
+        # chunk smaller than the Zarr chunk
+        arr.isel(a=slice(1, 5)).chunk(a=(4,)).to_zarr(store, region="auto", mode="r+")
+
+    # The first chunk is completely covering the first Zarr chunk
+    # and the last chunk is a partial chunk
+    arr.isel(a=slice(0, 5)).chunk(a=(3, 2)).to_zarr(store, region="auto", mode="a")
+
+    with pytest.raises(ValueError):
+        # The last chunk is partial, so it is considered unsafe on mode "r+"
+        arr.isel(a=slice(0, 5)).chunk(a=(3, 2)).to_zarr(store, region="auto", mode="r+")
+
+    # The first chunk is covering the border size (2 elements)
+    # and also the second chunk (3 elements), so it is valid
+    arr.isel(a=slice(1, 8)).chunk(a=(5, 2)).to_zarr(store, region="auto", mode="a")
+
+    with pytest.raises(ValueError):
+        # The first chunk is not fully covering the first zarr chunk
+        arr.isel(a=slice(1, 8)).chunk(a=(5, 2)).to_zarr(store, region="auto", mode="r+")
+
+    with pytest.raises(ValueError):
+        # Validate that the border condition is not affecting the "r+" mode
+        arr.isel(a=slice(1, 9)).to_zarr(store, region="auto", mode="r+")

From 604b8e16bcdc1f8565bf561b10e19185183e6efd Mon Sep 17 00:00:00 2001
From: "pre-commit-ci[bot]"
 <66853113+pre-commit-ci[bot]@users.noreply.github.com>
Date: Fri, 20 Sep 2024 20:20:55 +0000
Subject: [PATCH 06/17] [pre-commit.ci] auto fixes from pre-commit.com hooks

for more information, see https://pre-commit.ci
---
 xarray/backends/zarr.py       | 14 +++++++++-----
 xarray/tests/test_backends.py | 17 ++++++++++-------
 2 files changed, 19 insertions(+), 12 deletions(-)

diff --git a/xarray/backends/zarr.py b/xarray/backends/zarr.py
index 98936aae31a..c66cd65e4ad 100644
--- a/xarray/backends/zarr.py
+++ b/xarray/backends/zarr.py
@@ -112,7 +112,9 @@ def __getitem__(self, key):
         # could possibly have a work-around for 0d data here
 
 
-def _determine_zarr_chunks(enc_chunks, var_chunks, ndim, name, safe_chunks, region, mode):
+def _determine_zarr_chunks(
+    enc_chunks, var_chunks, ndim, name, safe_chunks, region, mode
+):
     """
     Given encoding chunks (possibly None or []) and variable chunks
     (possibly None or []).
@@ -163,7 +165,9 @@ def _determine_zarr_chunks(enc_chunks, var_chunks, ndim, name, safe_chunks, regi
 
     if len(enc_chunks_tuple) != ndim:
         # throw away encoding chunks, start over
-        return _determine_zarr_chunks(None, var_chunks, ndim, name, safe_chunks, region, mode)
+        return _determine_zarr_chunks(
+            None, var_chunks, ndim, name, safe_chunks, region, mode
+        )
 
     for x in enc_chunks_tuple:
         if not isinstance(x, int):
@@ -276,7 +280,7 @@ def extract_zarr_variable_encoding(
     name=None,
     safe_chunks=True,
     region=None,
-    mode=None
+    mode=None,
 ):
     """
     Extract zarr encoding dictionary from xarray Variable
@@ -328,7 +332,7 @@ def extract_zarr_variable_encoding(
         name=name,
         safe_chunks=safe_chunks,
         region=region,
-        mode=mode
+        mode=mode,
     )
     encoding["chunks"] = chunks
     return encoding
@@ -864,7 +868,7 @@ def set_variables(self, variables, check_encoding_set, writer, unlimited_dims=No
                 raise_on_invalid=vn in check_encoding_set,
                 name=vn,
                 safe_chunks=self._safe_chunks,
-                mode=self._mode
+                mode=self._mode,
             )
 
             if name not in existing_keys:
diff --git a/xarray/tests/test_backends.py b/xarray/tests/test_backends.py
index a7f13c12f8a..3a3e16afe93 100644
--- a/xarray/tests/test_backends.py
+++ b/xarray/tests/test_backends.py
@@ -6159,10 +6159,7 @@ def test_zarr_safe_chunk_region(tmp_path):
     store = tmp_path / "foo.zarr"
 
     arr = xr.DataArray(
-        list(range(10)),
-        dims=["a"],
-        coords={"a": list(range(10))},
-        name="foo"
+        list(range(10)), dims=["a"], coords={"a": list(range(10))}, name="foo"
     ).chunk(a=3)
     arr.to_zarr(store, mode="w")
 
@@ -6170,18 +6167,24 @@ def test_zarr_safe_chunk_region(tmp_path):
         with pytest.raises(ValueError):
             # There are two Dask chunks on the same Zarr chunk,
             # which means that it is unsafe in any mode
-            arr.isel(a=slice(0, 3)).chunk(a=(2, 1)).to_zarr(store, region="auto", mode=mode)
+            arr.isel(a=slice(0, 3)).chunk(a=(2, 1)).to_zarr(
+                store, region="auto", mode=mode
+            )
 
         with pytest.raises(ValueError):
             # the first chunk is covering the border size, but it is not
             # completely covering the second chunk, which means that it is
             # unsafe in any mode
-            arr.isel(a=slice(1, 5)).chunk(a=(3, 1)).to_zarr(store, region="auto", mode=mode)
+            arr.isel(a=slice(1, 5)).chunk(a=(3, 1)).to_zarr(
+                store, region="auto", mode=mode
+            )
 
         with pytest.raises(ValueError):
             # The first chunk is safe but the other two chunks are overlapping with
             # the same Zarr chunk
-            arr.isel(a=slice(0, 5)).chunk(a=(3, 1, 1)).to_zarr(store, region="auto", mode=mode)
+            arr.isel(a=slice(0, 5)).chunk(a=(3, 1, 1)).to_zarr(
+                store, region="auto", mode=mode
+            )
 
         # Fully update two contiguous chunks is safe in any mode
         arr.isel(a=slice(3, 9)).to_zarr(store, region="auto", mode=mode)

From a30b1e07df9fff306c247e69e421b6ac4de1598c Mon Sep 17 00:00:00 2001
From: Joseph Gonzalez <josephgonowak97@gmail.com>
Date: Fri, 20 Sep 2024 16:22:54 -0400
Subject: [PATCH 07/17] The test_extract_zarr_variable_encoding does not need
 to use the region parameter

---
 xarray/tests/test_backends.py | 10 ++++------
 1 file changed, 4 insertions(+), 6 deletions(-)

diff --git a/xarray/tests/test_backends.py b/xarray/tests/test_backends.py
index a7f13c12f8a..032a24c037c 100644
--- a/xarray/tests/test_backends.py
+++ b/xarray/tests/test_backends.py
@@ -5496,26 +5496,24 @@ def test_encode_zarr_attr_value() -> None:
 
 @requires_zarr
 def test_extract_zarr_variable_encoding() -> None:
-    # The region is not useful in these cases, but I still think that it must be mandatory
-    # because the validation of the chunks is in the same function
     var = xr.Variable("x", [1, 2])
-    actual = backends.zarr.extract_zarr_variable_encoding(var, region=tuple())
+    actual = backends.zarr.extract_zarr_variable_encoding(var)
     assert "chunks" in actual
     assert actual["chunks"] is None
 
     var = xr.Variable("x", [1, 2], encoding={"chunks": (1,)})
-    actual = backends.zarr.extract_zarr_variable_encoding(var, region=tuple())
+    actual = backends.zarr.extract_zarr_variable_encoding(var)
     assert actual["chunks"] == (1,)
 
     # does not raise on invalid
     var = xr.Variable("x", [1, 2], encoding={"foo": (1,)})
-    actual = backends.zarr.extract_zarr_variable_encoding(var, region=tuple())
+    actual = backends.zarr.extract_zarr_variable_encoding(var)
 
     # raises on invalid
     var = xr.Variable("x", [1, 2], encoding={"foo": (1,)})
     with pytest.raises(ValueError, match=r"unexpected encoding parameters"):
         actual = backends.zarr.extract_zarr_variable_encoding(
-            var, raise_on_invalid=True, region=tuple()
+            var, raise_on_invalid=True
         )
 
 

From c781042a1250731ed26e3a674075813f2def4091 Mon Sep 17 00:00:00 2001
From: Joseph Gonzalez <josephgonowak97@gmail.com>
Date: Fri, 20 Sep 2024 17:15:06 -0400
Subject: [PATCH 08/17] Inline the code of the allow_partial_chunks and end,
 document the parameter in order on the extract_zarr_variable_encoding method,
 raise the correct error if the border size is smaller than the zchunk on mode
 equal to r+

---
 xarray/backends/zarr.py | 16 +++++++---------
 1 file changed, 7 insertions(+), 9 deletions(-)

diff --git a/xarray/backends/zarr.py b/xarray/backends/zarr.py
index c66cd65e4ad..756ce21bc9b 100644
--- a/xarray/backends/zarr.py
+++ b/xarray/backends/zarr.py
@@ -195,12 +195,9 @@ def _determine_zarr_chunks(
     if var_chunks and enc_chunks_tuple:
         # If it is possible to write on partial chunks then it is not necessary to check
         # the last one contained on the region
-        allow_partial_chunks = True
-        end = -1
-        if mode == "r+":
-            # This mode forces to write only on full chunks, even on the last one
-            allow_partial_chunks = False
-            end = None
+        allow_partial_chunks = mode != "r+"
+        # The r+ mode force to write only on full chunks, even on the last one
+        end = None if mode == "r+" else -1
 
         base_error = (
             f"Specified zarr chunks encoding['chunks']={enc_chunks_tuple!r} for "
@@ -225,7 +222,7 @@ def _determine_zarr_chunks(
 
             if not allow_partial_chunks and first_border_size < zchunk:
                 # If the border is smaller than zchunk, then it is a partial chunk write
-                raise ValueError(first_border_size)
+                raise ValueError(base_error)
 
             for dchunk in dchunks[:end]:
                 if (dchunk - first_border_size) % zchunk:
@@ -278,6 +275,7 @@ def extract_zarr_variable_encoding(
     variable,
     raise_on_invalid=False,
     name=None,
+    *,
     safe_chunks=True,
     region=None,
     mode=None,
@@ -288,10 +286,10 @@ def extract_zarr_variable_encoding(
     Parameters
     ----------
     variable : Variable
-    region: tuple[slice], optional
+    name: str | Hashable, optional
     raise_on_invalid : bool, optional
     safe_chunks: bool, optional
-    name: str | Hashable, optional
+    region: tuple[slice], optional
     mode: str, optional
 
     Returns

From c454cfef842f9f795460c5994f8bd0ccf0ad3cf4 Mon Sep 17 00:00:00 2001
From: Joseph Gonzalez <josephgonowak97@gmail.com>
Date: Fri, 20 Sep 2024 17:17:18 -0400
Subject: [PATCH 09/17] Inline the code of the allow_partial_chunks and end,
 document the parameter in order on the extract_zarr_variable_encoding method,
 raise the correct error if the border size is smaller than the zchunk on mode
 equal to r+

---
 xarray/backends/zarr.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/xarray/backends/zarr.py b/xarray/backends/zarr.py
index 756ce21bc9b..b10f3c8da94 100644
--- a/xarray/backends/zarr.py
+++ b/xarray/backends/zarr.py
@@ -286,8 +286,8 @@ def extract_zarr_variable_encoding(
     Parameters
     ----------
     variable : Variable
-    name: str | Hashable, optional
     raise_on_invalid : bool, optional
+    name: str | Hashable, optional
     safe_chunks: bool, optional
     region: tuple[slice], optional
     mode: str, optional

From cc585d0fb4b7003822d75c8199b30a0afb47b278 Mon Sep 17 00:00:00 2001
From: Joseph Gonzalez <josephgonowak97@gmail.com>
Date: Sat, 21 Sep 2024 18:14:24 -0400
Subject: [PATCH 10/17] Now the mode r+ is able to update the last chunk of
 Zarr even if it is not "complete"

---
 xarray/backends/zarr.py       | 55 ++++++++++++++++++++++-------------
 xarray/tests/test_backends.py | 41 ++++++++++++++++++++++----
 2 files changed, 70 insertions(+), 26 deletions(-)

diff --git a/xarray/backends/zarr.py b/xarray/backends/zarr.py
index b10f3c8da94..e6fe93a398a 100644
--- a/xarray/backends/zarr.py
+++ b/xarray/backends/zarr.py
@@ -113,7 +113,7 @@ def __getitem__(self, key):
 
 
 def _determine_zarr_chunks(
-    enc_chunks, var_chunks, ndim, name, safe_chunks, region, mode
+    enc_chunks, var_chunks, ndim, name, safe_chunks, region, mode, shape
 ):
     """
     Given encoding chunks (possibly None or []) and variable chunks
@@ -166,7 +166,7 @@ def _determine_zarr_chunks(
     if len(enc_chunks_tuple) != ndim:
         # throw away encoding chunks, start over
         return _determine_zarr_chunks(
-            None, var_chunks, ndim, name, safe_chunks, region, mode
+            None, var_chunks, ndim, name, safe_chunks, region, mode, shape
         )
 
     for x in enc_chunks_tuple:
@@ -208,29 +208,38 @@ def _determine_zarr_chunks(
             f"or modifying `encoding['chunks']`, or specify `safe_chunks=False`."
         )
 
-        for zchunk, dchunks, interval in zip(
-            enc_chunks_tuple, var_chunks, region, strict=True
+        for zchunk, dchunks, interval, size in zip(
+            enc_chunks_tuple, var_chunks, region, shape, strict=True
         ):
             if not safe_chunks:
                 continue
 
-            # The first border size is the amount of data that needs to be updated on the
-            # first chunk taking into account the region slice.
-            first_border_size = zchunk
-            if interval.start:
-                first_border_size = zchunk - interval.start % zchunk
+            for dchunk in dchunks[1:-1]:
+                if dchunk % zchunk:
+                    raise ValueError(base_error)
+
+            region_start = interval.start if interval.start else 0
 
-            if not allow_partial_chunks and first_border_size < zchunk:
-                # If the border is smaller than zchunk, then it is a partial chunk write
-                raise ValueError(base_error)
+            if len(dchunks) > 1:
+                # The first border size is the amount of data that needs to be updated on the
+                # first chunk taking into account the region slice.
+                first_border_size = zchunk
+                if allow_partial_chunks:
+                    first_border_size = zchunk - region_start % zchunk
 
-            for dchunk in dchunks[:end]:
-                if (dchunk - first_border_size) % zchunk:
+                if (dchunks[0] - first_border_size) % zchunk:
                     raise ValueError(base_error)
 
-                # The first border is only useful during the first iteration,
-                # so ignore it in the next validations
-                first_border_size = 0
+            if not allow_partial_chunks:
+                region_stop = interval.stop if interval.stop else size
+                cover_last_chunk = region_stop > size - size % zchunk
+
+                if not cover_last_chunk:
+                    if dchunks[-1] % zchunk:
+                        raise ValueError(base_error)
+                elif dchunks[-1] % zchunk != size % zchunk:
+                    # The remainder must be equal to the size of the last Zarr chunk
+                    raise ValueError(base_error)
 
         return enc_chunks_tuple
 
@@ -279,6 +288,7 @@ def extract_zarr_variable_encoding(
     safe_chunks=True,
     region=None,
     mode=None,
+    shape=None
 ):
     """
     Extract zarr encoding dictionary from xarray Variable
@@ -289,9 +299,9 @@ def extract_zarr_variable_encoding(
     raise_on_invalid : bool, optional
     name: str | Hashable, optional
     safe_chunks: bool, optional
-    region: tuple[slice], optional
+    region: tuple[slice, ...], optional
     mode: str, optional
-
+    shape: tuple[int, ...], optional
     Returns
     -------
     encoding : dict
@@ -331,6 +341,7 @@ def extract_zarr_variable_encoding(
         safe_chunks=safe_chunks,
         region=region,
         mode=mode,
+        shape=shape
     )
     encoding["chunks"] = chunks
     return encoding
@@ -808,6 +819,7 @@ def set_variables(self, variables, check_encoding_set, writer, unlimited_dims=No
                 v.encoding = {}
 
             zarr_array = None
+            zarr_shape = None
             write_region = self._write_region if self._write_region is not None else {}
             write_region = {dim: write_region.get(dim, slice(None)) for dim in dims}
 
@@ -852,6 +864,8 @@ def set_variables(self, variables, check_encoding_set, writer, unlimited_dims=No
                     new_shape[append_axis] += v.shape[append_axis]
                     zarr_array.resize(new_shape)
 
+                zarr_shape = zarr_array.shape
+
             region = tuple(write_region[dim] for dim in dims)
 
             # We need to do this for both new and existing variables to ensure we're not
@@ -862,11 +876,12 @@ def set_variables(self, variables, check_encoding_set, writer, unlimited_dims=No
             # another one for extracting the encoding.
             encoding = extract_zarr_variable_encoding(
                 v,
-                region=region,
                 raise_on_invalid=vn in check_encoding_set,
                 name=vn,
                 safe_chunks=self._safe_chunks,
+                region=region,
                 mode=self._mode,
+                shape=zarr_shape
             )
 
             if name not in existing_keys:
diff --git a/xarray/tests/test_backends.py b/xarray/tests/test_backends.py
index 78d50fcbdac..c04f71ae61c 100644
--- a/xarray/tests/test_backends.py
+++ b/xarray/tests/test_backends.py
@@ -6157,7 +6157,7 @@ def test_zarr_safe_chunk_region(tmp_path):
     store = tmp_path / "foo.zarr"
 
     arr = xr.DataArray(
-        list(range(10)), dims=["a"], coords={"a": list(range(10))}, name="foo"
+        list(range(11)), dims=["a"], coords={"a": list(range(11))}, name="foo"
     ).chunk(a=3)
     arr.to_zarr(store, mode="w")
 
@@ -6187,10 +6187,14 @@ def test_zarr_safe_chunk_region(tmp_path):
         # Fully update two contiguous chunks is safe in any mode
         arr.isel(a=slice(3, 9)).to_zarr(store, region="auto", mode=mode)
 
-    # Write the last chunk partially is safe in "a" mode
+        # The last chunk is considered full based on their current size (2)
+        arr.isel(a=slice(9, 11)).to_zarr(store, region="auto", mode=mode)
+        arr.isel(a=slice(6, None)).chunk(a=-1).to_zarr(store, region="auto", mode=mode)
+
+    # Write the last chunk of a region partially is safe in "a" mode
     arr.isel(a=slice(3, 8)).to_zarr(store, region="auto", mode="a")
     with pytest.raises(ValueError):
-        # with "r+" mode it is invalid to write partial chunk even on the last one
+        # with "r+" mode it is invalid to write partial chunk
         arr.isel(a=slice(3, 8)).to_zarr(store, region="auto", mode="r+")
 
     # This is safe with mode "a", the border size is covered by the first chunk of Dask
@@ -6204,12 +6208,12 @@ def test_zarr_safe_chunk_region(tmp_path):
     arr.isel(a=slice(1, 5)).chunk(a=(4,)).to_zarr(store, region="auto", mode="a")
 
     with pytest.raises(ValueError):
-        # This is unsafe on mode "r+", because there is a single dask
-        # chunk smaller than the Zarr chunk
+        # This is unsafe on mode "r+", because the Dask chunk is partially writing
+        # in the first chunk of Zarr
         arr.isel(a=slice(1, 5)).chunk(a=(4,)).to_zarr(store, region="auto", mode="r+")
 
     # The first chunk is completely covering the first Zarr chunk
-    # and the last chunk is a partial chunk
+    # and the last chunk is a partial one
     arr.isel(a=slice(0, 5)).chunk(a=(3, 2)).to_zarr(store, region="auto", mode="a")
 
     with pytest.raises(ValueError):
@@ -6227,3 +6231,28 @@ def test_zarr_safe_chunk_region(tmp_path):
     with pytest.raises(ValueError):
         # Validate that the border condition is not affecting the "r+" mode
         arr.isel(a=slice(1, 9)).to_zarr(store, region="auto", mode="r+")
+
+    arr.isel(a=slice(10, 11)).to_zarr(store, region="auto", mode="a")
+    with pytest.raises(ValueError):
+        # Validate that even if we write with a single Dask chunk on the last Zarr
+        # chunk it is still unsafe if it is not fully covering it
+        # (the last Zarr chunk has size 2)
+        arr.isel(a=slice(10, 11)).to_zarr(store, region="auto", mode="r+")
+
+    # Validate the same than the above test but in the beginning of the last chunk
+    arr.isel(a=slice(9, 10)).to_zarr(store, region="auto", mode="a")
+    with pytest.raises(ValueError):
+        arr.isel(a=slice(9, 10)).to_zarr(store, region="auto", mode="r+")
+
+    arr.isel(a=slice(7, None)).chunk(a=-1).to_zarr(store, region="auto", mode="a")
+    with pytest.raises(ValueError):
+        # Test that even a Dask chunk that covers the last Zarr chunk can be unsafe
+        # if it is partial covering other Zarr chunks
+        arr.isel(a=slice(7, None)).chunk(a=-1).to_zarr(store, region="auto", mode="r+")
+
+    with pytest.raises(ValueError):
+        # If the chunk is of size equal to the one in the Zarr encoding, but
+        # it is partially writing in the last chunk then raise an error
+        arr.isel(a=slice(8, None)).chunk(a=3).to_zarr(store, region="auto", mode="r+")
+
+

From 9302036426847f3fbde31915660e23e826684633 Mon Sep 17 00:00:00 2001
From: "pre-commit-ci[bot]"
 <66853113+pre-commit-ci[bot]@users.noreply.github.com>
Date: Sat, 21 Sep 2024 22:15:01 +0000
Subject: [PATCH 11/17] [pre-commit.ci] auto fixes from pre-commit.com hooks

for more information, see https://pre-commit.ci
---
 xarray/backends/zarr.py       | 6 +++---
 xarray/tests/test_backends.py | 2 --
 2 files changed, 3 insertions(+), 5 deletions(-)

diff --git a/xarray/backends/zarr.py b/xarray/backends/zarr.py
index e6fe93a398a..775bd1e6d80 100644
--- a/xarray/backends/zarr.py
+++ b/xarray/backends/zarr.py
@@ -288,7 +288,7 @@ def extract_zarr_variable_encoding(
     safe_chunks=True,
     region=None,
     mode=None,
-    shape=None
+    shape=None,
 ):
     """
     Extract zarr encoding dictionary from xarray Variable
@@ -341,7 +341,7 @@ def extract_zarr_variable_encoding(
         safe_chunks=safe_chunks,
         region=region,
         mode=mode,
-        shape=shape
+        shape=shape,
     )
     encoding["chunks"] = chunks
     return encoding
@@ -881,7 +881,7 @@ def set_variables(self, variables, check_encoding_set, writer, unlimited_dims=No
                 safe_chunks=self._safe_chunks,
                 region=region,
                 mode=self._mode,
-                shape=zarr_shape
+                shape=zarr_shape,
             )
 
             if name not in existing_keys:
diff --git a/xarray/tests/test_backends.py b/xarray/tests/test_backends.py
index c04f71ae61c..6529dd74c21 100644
--- a/xarray/tests/test_backends.py
+++ b/xarray/tests/test_backends.py
@@ -6254,5 +6254,3 @@ def test_zarr_safe_chunk_region(tmp_path):
         # If the chunk is of size equal to the one in the Zarr encoding, but
         # it is partially writing in the last chunk then raise an error
         arr.isel(a=slice(8, None)).chunk(a=3).to_zarr(store, region="auto", mode="r+")
-
-

From 0b4b9b1f9bb61becc39ad3bea6da9775cb82a72d Mon Sep 17 00:00:00 2001
From: Joseph Gonzalez <josephgonowak97@gmail.com>
Date: Sat, 21 Sep 2024 19:54:22 -0400
Subject: [PATCH 12/17] Now the mode r+ is able to update the last chunk of
 Zarr even if it is not "complete"

---
 xarray/backends/zarr.py       | 21 ++++++++++++++-------
 xarray/tests/test_backends.py |  9 ++++-----
 2 files changed, 18 insertions(+), 12 deletions(-)

diff --git a/xarray/backends/zarr.py b/xarray/backends/zarr.py
index e6fe93a398a..197f735f950 100644
--- a/xarray/backends/zarr.py
+++ b/xarray/backends/zarr.py
@@ -196,8 +196,6 @@ def _determine_zarr_chunks(
         # If it is possible to write on partial chunks then it is not necessary to check
         # the last one contained on the region
         allow_partial_chunks = mode != "r+"
-        # The r+ mode force to write only on full chunks, even on the last one
-        end = None if mode == "r+" else -1
 
         base_error = (
             f"Specified zarr chunks encoding['chunks']={enc_chunks_tuple!r} for "
@@ -231,14 +229,21 @@ def _determine_zarr_chunks(
                     raise ValueError(base_error)
 
             if not allow_partial_chunks:
+                chunk_start = sum(dchunks[:-1]) + region_start
+                if chunk_start % zchunk:
+                    # The last chunk which can also be the only one is a partial chunk
+                    # if it is not aligned at the beginning
+                    raise ValueError(base_error)
+
                 region_stop = interval.stop if interval.stop else size
-                cover_last_chunk = region_stop > size - size % zchunk
 
-                if not cover_last_chunk:
-                    if dchunks[-1] % zchunk:
+                if size - region_stop + 1 < zchunk:
+                    # If the region is covering the last chunk then check
+                    # if the reminder with the default chunk size
+                    # is equal to the size of the last chunk
+                    if dchunks[-1] % zchunk != size % zchunk:
                         raise ValueError(base_error)
-                elif dchunks[-1] % zchunk != size % zchunk:
-                    # The remainder must be equal to the size of the last Zarr chunk
+                elif dchunks[-1] % zchunk:
                     raise ValueError(base_error)
 
         return enc_chunks_tuple
@@ -307,6 +312,8 @@ def extract_zarr_variable_encoding(
     encoding : dict
         Zarr encoding for `variable`
     """
+
+    shape = shape if shape else variable.shape
     encoding = variable.encoding.copy()
 
     safe_to_drop = {"source", "original_shape"}
diff --git a/xarray/tests/test_backends.py b/xarray/tests/test_backends.py
index c04f71ae61c..919317fb0d0 100644
--- a/xarray/tests/test_backends.py
+++ b/xarray/tests/test_backends.py
@@ -6199,14 +6199,12 @@ def test_zarr_safe_chunk_region(tmp_path):
 
     # This is safe with mode "a", the border size is covered by the first chunk of Dask
     arr.isel(a=slice(1, 4)).chunk(a=(2, 1)).to_zarr(store, region="auto", mode="a")
-
     with pytest.raises(ValueError):
         # This is considered unsafe in mode "r+" because it is writing in a partial chunk
         arr.isel(a=slice(1, 4)).chunk(a=(2, 1)).to_zarr(store, region="auto", mode="r+")
 
     # This is safe on mode "a" because there is a single dask chunk
     arr.isel(a=slice(1, 5)).chunk(a=(4,)).to_zarr(store, region="auto", mode="a")
-
     with pytest.raises(ValueError):
         # This is unsafe on mode "r+", because the Dask chunk is partially writing
         # in the first chunk of Zarr
@@ -6239,7 +6237,7 @@ def test_zarr_safe_chunk_region(tmp_path):
         # (the last Zarr chunk has size 2)
         arr.isel(a=slice(10, 11)).to_zarr(store, region="auto", mode="r+")
 
-    # Validate the same than the above test but in the beginning of the last chunk
+    # Validate the same as the above test but in the beginning of the last chunk
     arr.isel(a=slice(9, 10)).to_zarr(store, region="auto", mode="a")
     with pytest.raises(ValueError):
         arr.isel(a=slice(9, 10)).to_zarr(store, region="auto", mode="r+")
@@ -6252,7 +6250,8 @@ def test_zarr_safe_chunk_region(tmp_path):
 
     with pytest.raises(ValueError):
         # If the chunk is of size equal to the one in the Zarr encoding, but
-        # it is partially writing in the last chunk then raise an error
+        # it is partially writing in the first chunk then raise an error
         arr.isel(a=slice(8, None)).chunk(a=3).to_zarr(store, region="auto", mode="r+")
 
-
+    with pytest.raises(ValueError):
+        arr.isel(a=slice(5, -1)).chunk(a=5).to_zarr(store, region="auto", mode="r+")

From 23a864aa9b1ce298be58506203c31abed6499d76 Mon Sep 17 00:00:00 2001
From: Joseph Gonzalez <josephgonowak97@gmail.com>
Date: Sat, 21 Sep 2024 20:19:55 -0400
Subject: [PATCH 13/17] Add a typehint to the modes to avoid issues with mypy

---
 xarray/tests/test_backends.py | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/xarray/tests/test_backends.py b/xarray/tests/test_backends.py
index 919317fb0d0..ccf1bc73dd6 100644
--- a/xarray/tests/test_backends.py
+++ b/xarray/tests/test_backends.py
@@ -6161,7 +6161,8 @@ def test_zarr_safe_chunk_region(tmp_path):
     ).chunk(a=3)
     arr.to_zarr(store, mode="w")
 
-    for mode in ["r+", "a"]:
+    modes: list[Literal["r+", "a"]] = ["r+", "a"]
+    for mode in modes:
         with pytest.raises(ValueError):
             # There are two Dask chunks on the same Zarr chunk,
             # which means that it is unsafe in any mode

From 1825af355760009ee65026277906046f98631ff4 Mon Sep 17 00:00:00 2001
From: Joseph Gonzalez <josephgonowak97@gmail.com>
Date: Mon, 30 Sep 2024 15:36:11 -0400
Subject: [PATCH 14/17] Fix the detection of the last chunk

---
 xarray/backends/zarr.py       |  9 ++++-----
 xarray/tests/test_backends.py | 10 ++++++++++
 2 files changed, 14 insertions(+), 5 deletions(-)

diff --git a/xarray/backends/zarr.py b/xarray/backends/zarr.py
index 2c6b50b3589..c048ea63419 100644
--- a/xarray/backends/zarr.py
+++ b/xarray/backends/zarr.py
@@ -229,15 +229,14 @@ def _determine_zarr_chunks(
                     raise ValueError(base_error)
 
             if not allow_partial_chunks:
-                chunk_start = sum(dchunks[:-1]) + region_start
-                if chunk_start % zchunk:
+                region_stop = interval.stop if interval.stop else size
+
+                if region_start % zchunk:
                     # The last chunk which can also be the only one is a partial chunk
                     # if it is not aligned at the beginning
                     raise ValueError(base_error)
 
-                region_stop = interval.stop if interval.stop else size
-
-                if size - region_stop + 1 < zchunk:
+                if np.ceil(region_stop / zchunk) == np.ceil(size / zchunk):
                     # If the region is covering the last chunk then check
                     # if the reminder with the default chunk size
                     # is equal to the size of the last chunk
diff --git a/xarray/tests/test_backends.py b/xarray/tests/test_backends.py
index ccf1bc73dd6..430cbb0b011 100644
--- a/xarray/tests/test_backends.py
+++ b/xarray/tests/test_backends.py
@@ -6256,3 +6256,13 @@ def test_zarr_safe_chunk_region(tmp_path):
 
     with pytest.raises(ValueError):
         arr.isel(a=slice(5, -1)).chunk(a=5).to_zarr(store, region="auto", mode="r+")
+
+    # Test if the code is detecting the last chunk correctly
+    data = np.random.RandomState(0).randn(2920, 25, 53)
+    ds = xr.Dataset({'temperature': (('time', 'lat', 'lon'), data)})
+    chunks = {'time': 1000, 'lat': 25, 'lon': 53}
+    ds.chunk(chunks).to_zarr(store, compute=False)
+    region = {'time': slice(1000, 2000, 1)}
+    chunk = ds.isel(region)
+    chunk = chunk.chunk()
+    chunk.chunk().to_zarr(store, region=region)

From 81a27060093e90a10cdcdc2bbfc183fbf8d908dd Mon Sep 17 00:00:00 2001
From: "pre-commit-ci[bot]"
 <66853113+pre-commit-ci[bot]@users.noreply.github.com>
Date: Mon, 30 Sep 2024 19:38:32 +0000
Subject: [PATCH 15/17] [pre-commit.ci] auto fixes from pre-commit.com hooks

for more information, see https://pre-commit.ci
---
 xarray/tests/test_backends.py | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/xarray/tests/test_backends.py b/xarray/tests/test_backends.py
index 430cbb0b011..68470b201d6 100644
--- a/xarray/tests/test_backends.py
+++ b/xarray/tests/test_backends.py
@@ -6259,10 +6259,10 @@ def test_zarr_safe_chunk_region(tmp_path):
 
     # Test if the code is detecting the last chunk correctly
     data = np.random.RandomState(0).randn(2920, 25, 53)
-    ds = xr.Dataset({'temperature': (('time', 'lat', 'lon'), data)})
-    chunks = {'time': 1000, 'lat': 25, 'lon': 53}
+    ds = xr.Dataset({"temperature": (("time", "lat", "lon"), data)})
+    chunks = {"time": 1000, "lat": 25, "lon": 53}
     ds.chunk(chunks).to_zarr(store, compute=False)
-    region = {'time': slice(1000, 2000, 1)}
+    region = {"time": slice(1000, 2000, 1)}
     chunk = ds.isel(region)
     chunk = chunk.chunk()
     chunk.chunk().to_zarr(store, region=region)

From 4924776e77ffce112f63062d0d6877fc54106515 Mon Sep 17 00:00:00 2001
From: Joseph Gonzalez <josephgonowak97@gmail.com>
Date: Mon, 30 Sep 2024 15:50:04 -0400
Subject: [PATCH 16/17] Fix the whats-new and add mode="w" to the new test case

---
 doc/whats-new.rst             |   2 +-
 xarray/tests/test_backends.py | 418 +++++++++++++++++-----------------
 2 files changed, 210 insertions(+), 210 deletions(-)

diff --git a/doc/whats-new.rst b/doc/whats-new.rst
index 89c8d3b4599..72e49a983e3 100644
--- a/doc/whats-new.rst
+++ b/doc/whats-new.rst
@@ -59,7 +59,7 @@ Bug fixes
 - Fix a few bugs affecting groupby reductions with `flox`. (:issue:`8090`, :issue:`9398`).
   By `Deepak Cherian <https://github.com/dcherian>`_.
 - Fix the safe_chunks validation option on the to_zarr method
-  (:issue:`5511`, :pull:`9513`). By `Joseph Nowak
+  (:issue:`5511`, :pull:`9559`). By `Joseph Nowak
   <https://github.com/josephnowak>`_.
 
 Documentation
diff --git a/xarray/tests/test_backends.py b/xarray/tests/test_backends.py
index 68470b201d6..0e5db458116 100644
--- a/xarray/tests/test_backends.py
+++ b/xarray/tests/test_backends.py
@@ -41,11 +41,11 @@
 )
 from xarray.backends.common import robust_getitem
 from xarray.backends.h5netcdf_ import H5netcdfBackendEntrypoint
-from xarray.backends.netcdf3 import _nc3_dtype_coercions
 from xarray.backends.netCDF4_ import (
     NetCDF4BackendEntrypoint,
     _extract_nc4_variable_encoding,
 )
+from xarray.backends.netcdf3 import _nc3_dtype_coercions
 from xarray.backends.pydap_ import PydapDataStore
 from xarray.backends.scipy_ import ScipyBackendEntrypoint
 from xarray.coding.cftime_offsets import cftime_range
@@ -306,7 +306,7 @@ class NetCDF3Only:
     def test_dtype_coercion_error(self) -> None:
         """Failing dtype coercion should lead to an error"""
         for dtype, format in itertools.product(
-            _nc3_dtype_coercions, self.netcdf3_formats
+                _nc3_dtype_coercions, self.netcdf3_formats
         ):
             if dtype == "bool":
                 # coerced upcast (bool to int8) ==> can never fail
@@ -332,7 +332,7 @@ def create_store(self):
 
     @contextlib.contextmanager
     def roundtrip(
-        self, data, save_kwargs=None, open_kwargs=None, allow_cleanup_failure=False
+            self, data, save_kwargs=None, open_kwargs=None, allow_cleanup_failure=False
     ):
         if save_kwargs is None:
             save_kwargs = {}
@@ -345,7 +345,7 @@ def roundtrip(
 
     @contextlib.contextmanager
     def roundtrip_append(
-        self, data, save_kwargs=None, open_kwargs=None, allow_cleanup_failure=False
+            self, data, save_kwargs=None, open_kwargs=None, allow_cleanup_failure=False
     ):
         if save_kwargs is None:
             save_kwargs = {}
@@ -392,8 +392,8 @@ def check_dtypes_roundtripped(self, expected, actual):
 
             # For NetCDF3, the backend should perform dtype coercion
             if (
-                isinstance(self, NetCDF3Only)
-                and str(expected_dtype) in _nc3_dtype_coercions
+                    isinstance(self, NetCDF3Only)
+                    and str(expected_dtype) in _nc3_dtype_coercions
             ):
                 expected_dtype = np.dtype(_nc3_dtype_coercions[str(expected_dtype)])
 
@@ -401,8 +401,8 @@ def check_dtypes_roundtripped(self, expected, actual):
             # TODO: check expected behavior for string dtypes more carefully
             string_kinds = {"O", "S", "U"}
             assert expected_dtype == actual_dtype or (
-                expected_dtype.kind in string_kinds
-                and actual_dtype.kind in string_kinds
+                    expected_dtype.kind in string_kinds
+                    and actual_dtype.kind in string_kinds
             )
 
     def test_roundtrip_test_data(self) -> None:
@@ -584,8 +584,8 @@ def test_roundtrip_cftime_datetime_data(self) -> None:
                     abs_diff = abs(actual.t.values - expected_decoded_t)
                     assert (abs_diff <= np.timedelta64(1, "s")).all()
                     assert (
-                        actual.t.encoding["units"]
-                        == "days since 0001-01-01 00:00:00.000000"
+                            actual.t.encoding["units"]
+                            == "days since 0001-01-01 00:00:00.000000"
                     )
                     assert actual.t.encoding["calendar"] == expected_calendar
 
@@ -626,7 +626,7 @@ def test_roundtrip_coordinates(self) -> None:
         with self.roundtrip(original, open_kwargs={"decode_coords": False}) as expected:
             # check roundtripping when decode_coords=False
             with self.roundtrip(
-                expected, open_kwargs={"decode_coords": False}
+                    expected, open_kwargs={"decode_coords": False}
             ) as actual:
                 assert_identical(expected, actual)
 
@@ -905,8 +905,8 @@ def test_roundtrip_empty_vlen_string_array(self) -> None:
         "decoded_fn, encoded_fn",
         [
             (
-                create_unsigned_masked_scaled_data,
-                create_encoded_unsigned_masked_scaled_data,
+                    create_unsigned_masked_scaled_data,
+                    create_encoded_unsigned_masked_scaled_data,
             ),
             pytest.param(
                 create_bad_unsigned_masked_scaled_data,
@@ -914,12 +914,12 @@ def test_roundtrip_empty_vlen_string_array(self) -> None:
                 marks=pytest.mark.xfail(reason="Bad _Unsigned attribute."),
             ),
             (
-                create_signed_masked_scaled_data,
-                create_encoded_signed_masked_scaled_data,
+                    create_signed_masked_scaled_data,
+                    create_encoded_signed_masked_scaled_data,
             ),
             (
-                create_unsigned_false_masked_scaled_data,
-                create_encoded_unsigned_false_masked_scaled_data,
+                    create_unsigned_false_masked_scaled_data,
+                    create_encoded_unsigned_false_masked_scaled_data,
             ),
             (create_masked_and_scaled_data, create_encoded_masked_and_scaled_data),
         ],
@@ -931,9 +931,9 @@ def test_roundtrip_mask_and_scale(self, decoded_fn, encoded_fn, dtype) -> None:
         decoded = decoded_fn(dtype)
         encoded = encoded_fn(dtype)
         if decoded["x"].encoding["dtype"] == "u1" and not (
-            self.engine == "netcdf4"
-            and self.file_format is None
-            or self.file_format == "NETCDF4"
+                self.engine == "netcdf4"
+                and self.file_format is None
+                or self.file_format == "NETCDF4"
         ):
             pytest.skip("uint8 data can't be written to non-NetCDF4 data")
 
@@ -942,8 +942,8 @@ def test_roundtrip_mask_and_scale(self, decoded_fn, encoded_fn, dtype) -> None:
                 assert decoded.variables[k].dtype == actual.variables[k].dtype
                 # CF _FillValue is always on-disk type
                 assert (
-                    decoded.variables[k].encoding["_FillValue"]
-                    == actual.variables[k].encoding["_FillValue"]
+                        decoded.variables[k].encoding["_FillValue"]
+                        == actual.variables[k].encoding["_FillValue"]
                 )
             assert_allclose(decoded, actual, decode_bytes=False)
 
@@ -954,8 +954,8 @@ def test_roundtrip_mask_and_scale(self, decoded_fn, encoded_fn, dtype) -> None:
                 assert encoded.variables[k].dtype == actual.variables[k].dtype
                 # CF _FillValue is always on-disk type
                 assert (
-                    decoded.variables[k].encoding["_FillValue"]
-                    == actual.variables[k].attrs["_FillValue"]
+                        decoded.variables[k].encoding["_FillValue"]
+                        == actual.variables[k].attrs["_FillValue"]
                 )
             assert_allclose(encoded, actual, decode_bytes=False)
 
@@ -964,8 +964,8 @@ def test_roundtrip_mask_and_scale(self, decoded_fn, encoded_fn, dtype) -> None:
                 assert encoded.variables[k].dtype == actual.variables[k].dtype
                 # CF _FillValue is always on-disk type
                 assert (
-                    encoded.variables[k].attrs["_FillValue"]
-                    == actual.variables[k].attrs["_FillValue"]
+                        encoded.variables[k].attrs["_FillValue"]
+                        == actual.variables[k].attrs["_FillValue"]
                 )
             assert_allclose(encoded, actual, decode_bytes=False)
 
@@ -1030,7 +1030,7 @@ def _roundtrip_with_warnings(*args, **kwargs):
             assert_allclose(decoded, actual, decode_bytes=False)
 
         with _roundtrip_with_warnings(
-            decoded, open_kwargs=dict(decode_cf=False)
+                decoded, open_kwargs=dict(decode_cf=False)
         ) as actual:
             for k in encoded.variables:
                 assert encoded.variables[k].dtype == actual.variables[k].dtype
@@ -1120,7 +1120,7 @@ def test_coordinate_variables_after_dataset_roundtrip(self) -> None:
             assert_equal(actual, expected)
 
     def test_grid_mapping_and_bounds_are_coordinates_after_dataarray_roundtrip(
-        self,
+            self,
     ) -> None:
         original = self._create_cf_dataset()
         # The DataArray roundtrip should have the same warnings as the
@@ -1132,14 +1132,14 @@ def test_grid_mapping_and_bounds_are_coordinates_after_dataarray_roundtrip(
         # needs the to_dataset. The other backends should be fine
         # without it.
         with pytest.warns(
-            UserWarning,
-            match=(
-                r"Variable\(s\) referenced in bounds not in variables: "
-                r"\['l(at|ong)itude_bnds'\]"
-            ),
+                UserWarning,
+                match=(
+                        r"Variable\(s\) referenced in bounds not in variables: "
+                        r"\['l(at|ong)itude_bnds'\]"
+                ),
         ):
             with self.roundtrip(
-                original["variable"].to_dataset(), open_kwargs={"decode_coords": "all"}
+                    original["variable"].to_dataset(), open_kwargs={"decode_coords": "all"}
             ) as actual:
                 assert_identical(actual, original["variable"].to_dataset())
 
@@ -1224,7 +1224,7 @@ def test_invalid_dataarray_names_raise(self) -> None:
         data = np.random.random((2, 2))
         da = xr.DataArray(data)
         for name, (error, msg) in zip(
-            [0, (4, 5), True, ""], [te, te, te, ve], strict=True
+                [0, (4, 5), True, ""], [te, te, te, ve], strict=True
         ):
             ds = Dataset({name: da})
             with pytest.raises(error) as excinfo:
@@ -1357,7 +1357,7 @@ def test_append_with_invalid_dim_raises(self) -> None:
             data["var9"] = data["var2"] * 3
             data = data.isel(dim1=slice(2, 6))  # modify one dimension
             with pytest.raises(
-                ValueError, match=r"Unable to update size for existing dimension"
+                    ValueError, match=r"Unable to update size for existing dimension"
             ):
                 self.save(data, tmp_file, mode="a")
 
@@ -1419,7 +1419,7 @@ def test_byte_attrs(self, byte_attrs_dataset: dict[str, Any]) -> None:
 
 @contextlib.contextmanager
 def create_tmp_file(
-    suffix: str = ".nc", allow_cleanup_failure: bool = False
+        suffix: str = ".nc", allow_cleanup_failure: bool = False
 ) -> Iterator[str]:
     temp_dir = tempfile.mkdtemp()
     path = os.path.join(temp_dir, f"temp-{next(_counter)}{suffix}")
@@ -1435,7 +1435,7 @@ def create_tmp_file(
 
 @contextlib.contextmanager
 def create_tmp_files(
-    nfiles: int, suffix: str = ".nc", allow_cleanup_failure: bool = False
+        nfiles: int, suffix: str = ".nc", allow_cleanup_failure: bool = False
 ) -> Iterator[list[str]]:
     with ExitStack() as stack:
         files = [
@@ -1517,7 +1517,7 @@ def test_write_groups(self) -> None:
         ],
     )
     def test_encoding_kwarg_vlen_string(
-        self, input_strings: list[str], is_bytes: bool
+            self, input_strings: list[str], is_bytes: bool
     ) -> None:
         original = Dataset({"x": input_strings})
 
@@ -1689,9 +1689,9 @@ def test_auto_chunking_is_based_on_disk_chunk_sizes(self) -> None:
 
         with dask.config.set({"array.chunk-size": "100KiB"}):
             with self.chunked_roundtrip(
-                (1, y_size, x_size),
-                (1, y_chunksize, x_chunksize),
-                open_kwargs={"chunks": "auto"},
+                    (1, y_size, x_size),
+                    (1, y_chunksize, x_chunksize),
+                    open_kwargs={"chunks": "auto"},
             ) as ds:
                 t_chunks, y_chunks, x_chunks = ds["image"].data.chunks
                 assert all(np.asanyarray(y_chunks) == y_chunksize)
@@ -1705,21 +1705,21 @@ def test_base_chunking_uses_disk_chunk_sizes(self) -> None:
         x_chunksize = 10
 
         with self.chunked_roundtrip(
-            (1, y_size, x_size),
-            (1, y_chunksize, x_chunksize),
-            open_kwargs={"chunks": {}},
+                (1, y_size, x_size),
+                (1, y_chunksize, x_chunksize),
+                open_kwargs={"chunks": {}},
         ) as ds:
             for chunksizes, expected in zip(
-                ds["image"].data.chunks, (1, y_chunksize, x_chunksize), strict=True
+                    ds["image"].data.chunks, (1, y_chunksize, x_chunksize), strict=True
             ):
                 assert all(np.asanyarray(chunksizes) == expected)
 
     @contextlib.contextmanager
     def chunked_roundtrip(
-        self,
-        array_shape: tuple[int, int, int],
-        chunk_sizes: tuple[int, int, int],
-        open_kwargs: dict[str, Any] | None = None,
+            self,
+            array_shape: tuple[int, int, int],
+            chunk_sizes: tuple[int, int, int],
+            open_kwargs: dict[str, Any] | None = None,
     ) -> Generator[Dataset, None, None]:
         t_size, y_size, x_size = array_shape
         t_chunksize, y_chunksize, x_chunksize = chunk_sizes
@@ -1742,7 +1742,7 @@ def test_preferred_chunks_are_disk_chunk_sizes(self) -> None:
         x_chunksize = 10
 
         with self.chunked_roundtrip(
-            (1, y_size, x_size), (1, y_chunksize, x_chunksize)
+                (1, y_size, x_size), (1, y_chunksize, x_chunksize)
         ) as ds:
             assert ds["image"].encoding["preferred_chunks"] == {
                 "t": 1,
@@ -1759,7 +1759,7 @@ def test_encoding_chunksizes_unlimited(self) -> None:
             "complevel": 0,
             "fletcher32": False,
             "contiguous": False,
-            "chunksizes": (2**20,),
+            "chunksizes": (2 ** 20,),
             "original_shape": (3,),
         }
         with self.roundtrip(ds) as actual:
@@ -1862,14 +1862,14 @@ def test_encoding_enum__no_fill_value(self):
                 with self.roundtrip(original, save_kwargs=save_kwargs) as actual:
                     assert_equal(original, actual)
                     assert (
-                        actual.clouds.encoding["dtype"].metadata["enum"]
-                        == cloud_type_dict
+                            actual.clouds.encoding["dtype"].metadata["enum"]
+                            == cloud_type_dict
                     )
                     if self.engine != "h5netcdf":
                         # not implemented in h5netcdf yet
                         assert (
-                            actual.clouds.encoding["dtype"].metadata["enum_name"]
-                            == "cloud_type"
+                                actual.clouds.encoding["dtype"].metadata["enum_name"]
+                                == "cloud_type"
                         )
 
     @requires_netCDF4
@@ -1898,21 +1898,21 @@ def test_encoding_enum__multiple_variable_with_enum(self):
                 with self.roundtrip(original, save_kwargs=save_kwargs) as actual:
                     assert_equal(original, actual)
                     assert (
-                        actual.clouds.encoding["dtype"] == actual.tifa.encoding["dtype"]
+                            actual.clouds.encoding["dtype"] == actual.tifa.encoding["dtype"]
                     )
                     assert (
-                        actual.clouds.encoding["dtype"].metadata
-                        == actual.tifa.encoding["dtype"].metadata
+                            actual.clouds.encoding["dtype"].metadata
+                            == actual.tifa.encoding["dtype"].metadata
                     )
                     assert (
-                        actual.clouds.encoding["dtype"].metadata["enum"]
-                        == cloud_type_dict
+                            actual.clouds.encoding["dtype"].metadata["enum"]
+                            == cloud_type_dict
                     )
                     if self.engine != "h5netcdf":
                         # not implemented in h5netcdf yet
                         assert (
-                            actual.clouds.encoding["dtype"].metadata["enum_name"]
-                            == "cloud_type"
+                                actual.clouds.encoding["dtype"].metadata["enum_name"]
+                                == "cloud_type"
                         )
 
     @requires_netCDF4
@@ -1940,8 +1940,8 @@ def test_encoding_enum__error_multiple_variable_with_changing_enum(self):
                 )
             with open_dataset(tmp_file) as original:
                 assert (
-                    original.clouds.encoding["dtype"].metadata
-                    == original.tifa.encoding["dtype"].metadata
+                        original.clouds.encoding["dtype"].metadata
+                        == original.tifa.encoding["dtype"].metadata
                 )
                 modified_enum = original.clouds.encoding["dtype"].metadata["enum"]
                 modified_enum.update({"neblig": 2})
@@ -1952,11 +1952,11 @@ def test_encoding_enum__error_multiple_variable_with_changing_enum(self):
                 if self.engine != "h5netcdf":
                     # not implemented yet in h5netcdf
                     with pytest.raises(
-                        ValueError,
-                        match=(
-                            "Cannot save variable .*"
-                            " because an enum `cloud_type` already exists in the Dataset .*"
-                        ),
+                            ValueError,
+                            match=(
+                                    "Cannot save variable .*"
+                                    " because an enum `cloud_type` already exists in the Dataset .*"
+                            ),
                     ):
                         with self.roundtrip(original):
                             pass
@@ -2081,8 +2081,8 @@ def test_compression_encoding(self, compression: str | None) -> None:
             actual_encoding = actual["var2"].encoding
             assert expected_encoding.items() <= actual_encoding.items()
         if (
-            encoding_params["compression"] is not None
-            and "blosc" not in encoding_params["compression"]
+                encoding_params["compression"] is not None
+                and "blosc" not in encoding_params["compression"]
         ):
             # regression test for #156
             expected = data.isel(dim1=0)
@@ -2153,7 +2153,7 @@ def test_deepcopy(self) -> None:
 class TestNetCDF4ViaDaskData(TestNetCDF4Data):
     @contextlib.contextmanager
     def roundtrip(
-        self, data, save_kwargs=None, open_kwargs=None, allow_cleanup_failure=False
+            self, data, save_kwargs=None, open_kwargs=None, allow_cleanup_failure=False
     ):
         if open_kwargs is None:
             open_kwargs = {}
@@ -2161,7 +2161,7 @@ def roundtrip(
             save_kwargs = {}
         open_kwargs.setdefault("chunks", -1)
         with TestNetCDF4Data.roundtrip(
-            self, data, save_kwargs, open_kwargs, allow_cleanup_failure
+                self, data, save_kwargs, open_kwargs, allow_cleanup_failure
         ) as ds:
             yield ds
 
@@ -2219,13 +2219,13 @@ def save(self, dataset, store_target, **kwargs):  # type: ignore[override]
     @contextlib.contextmanager
     def open(self, store_target, **kwargs):
         with xr.open_dataset(
-            store_target, engine="zarr", **kwargs, **self.version_kwargs
+                store_target, engine="zarr", **kwargs, **self.version_kwargs
         ) as ds:
             yield ds
 
     @contextlib.contextmanager
     def roundtrip(
-        self, data, save_kwargs=None, open_kwargs=None, allow_cleanup_failure=False
+            self, data, save_kwargs=None, open_kwargs=None, allow_cleanup_failure=False
     ):
         if save_kwargs is None:
             save_kwargs = {}
@@ -2242,9 +2242,9 @@ def test_roundtrip_consolidated(self, consolidated) -> None:
             pytest.xfail("consolidated metadata is not supported for zarr v3 yet")
         expected = create_test_data()
         with self.roundtrip(
-            expected,
-            save_kwargs={"consolidated": consolidated},
-            open_kwargs={"backend_kwargs": {"consolidated": consolidated}},
+                expected,
+                save_kwargs={"consolidated": consolidated},
+                open_kwargs={"backend_kwargs": {"consolidated": consolidated}},
         ) as actual:
             self.check_dtypes_roundtripped(expected, actual)
             assert_identical(expected, actual)
@@ -2257,8 +2257,8 @@ def test_read_non_consolidated_warning(self) -> None:
         with self.create_zarr_target() as store:
             expected.to_zarr(store, consolidated=False, **self.version_kwargs)
             with pytest.warns(
-                RuntimeWarning,
-                match="Failed to open Zarr store with consolidated",
+                    RuntimeWarning,
+                    match="Failed to open Zarr store with consolidated",
             ):
                 with xr.open_zarr(store, **self.version_kwargs) as ds:
                     assert_identical(ds, expected)
@@ -2529,17 +2529,17 @@ def test_write_persistence_modes(self, group) -> None:
 
         # overwrite mode
         with self.roundtrip(
-            original,
-            save_kwargs={"mode": "w", "group": group},
-            open_kwargs={"group": group},
+                original,
+                save_kwargs={"mode": "w", "group": group},
+                open_kwargs={"group": group},
         ) as actual:
             assert_identical(original, actual)
 
         # don't overwrite mode
         with self.roundtrip(
-            original,
-            save_kwargs={"mode": "w-", "group": group},
-            open_kwargs={"group": group},
+                original,
+                save_kwargs={"mode": "w-", "group": group},
+                open_kwargs={"group": group},
         ) as actual:
             assert_identical(original, actual)
 
@@ -2555,9 +2555,9 @@ def test_write_persistence_modes(self, group) -> None:
 
         # check append mode for normal write
         with self.roundtrip(
-            original,
-            save_kwargs={"mode": "a", "group": group},
-            open_kwargs={"group": group},
+                original,
+                save_kwargs={"mode": "a", "group": group},
+                open_kwargs={"group": group},
         ) as actual:
             assert_identical(original, actual)
 
@@ -2590,7 +2590,7 @@ def test_group(self) -> None:
         original = create_test_data()
         group = "some/random/path"
         with self.roundtrip(
-            original, save_kwargs={"group": group}, open_kwargs={"group": group}
+                original, save_kwargs={"group": group}, open_kwargs={"group": group}
         ) as actual:
             assert_identical(original, actual)
 
@@ -2640,7 +2640,7 @@ def test_append_with_mode_rplus_fails(self) -> None:
         with self.create_zarr_target() as store:
             original.to_zarr(store, **self.version_kwargs)
             with pytest.raises(
-                ValueError, match="dataset contains non-pre-existing variables"
+                    ValueError, match="dataset contains non-pre-existing variables"
             ):
                 modified.to_zarr(store, mode="r+", **self.version_kwargs)
 
@@ -2649,7 +2649,7 @@ def test_append_with_invalid_dim_raises(self) -> None:
         with self.create_zarr_target() as store_target:
             ds.to_zarr(store_target, mode="w", **self.version_kwargs)
             with pytest.raises(
-                ValueError, match="does not match any existing dataset dimensions"
+                    ValueError, match="does not match any existing dataset dimensions"
             ):
                 ds_to_append.to_zarr(
                     store_target, append_dim="notvalid", **self.version_kwargs
@@ -2870,7 +2870,7 @@ def test_write_region(self, consolidated, compute, use_dask, write_empty) -> Non
             )
             if compute:
                 with xr.open_zarr(
-                    store, consolidated=consolidated, **self.version_kwargs
+                        store, consolidated=consolidated, **self.version_kwargs
                 ) as actual:
                     assert_identical(actual, zeros)
             for i in range(0, 10, 2):
@@ -2883,7 +2883,7 @@ def test_write_region(self, consolidated, compute, use_dask, write_empty) -> Non
                     **self.version_kwargs,
                 )
             with xr.open_zarr(
-                store, consolidated=consolidated, **self.version_kwargs
+                    store, consolidated=consolidated, **self.version_kwargs
             ) as actual:
                 assert_identical(actual, nonzeros)
 
@@ -2963,10 +2963,10 @@ def setup_and_verify_store(expected=data):
 
         with setup_and_verify_store() as store:
             with pytest.raises(
-                ValueError,
-                match=re.escape(
-                    "cannot set region unless mode='a', mode='a-', mode='r+' or mode=None"
-                ),
+                    ValueError,
+                    match=re.escape(
+                        "cannot set region unless mode='a', mode='a-', mode='r+' or mode=None"
+                    ),
             ):
                 data.to_zarr(
                     store, region={"x": slice(None)}, mode="w", **self.version_kwargs
@@ -2988,15 +2988,15 @@ def setup_and_verify_store(expected=data):
 
         with setup_and_verify_store() as store:
             with pytest.raises(
-                ValueError,
-                match=r"all keys in ``region`` are not in Dataset dimensions",
+                    ValueError,
+                    match=r"all keys in ``region`` are not in Dataset dimensions",
             ):
                 data.to_zarr(store, region={"y": slice(None)}, **self.version_kwargs)
 
         with setup_and_verify_store() as store:
             with pytest.raises(
-                ValueError,
-                match=r"all variables in the dataset to write must have at least one dimension in common",
+                    ValueError,
+                    match=r"all variables in the dataset to write must have at least one dimension in common",
             ):
                 data2.assign(v=2).to_zarr(
                     store, region={"x": slice(2)}, **self.version_kwargs
@@ -3004,7 +3004,7 @@ def setup_and_verify_store(expected=data):
 
         with setup_and_verify_store() as store:
             with pytest.raises(
-                ValueError, match=r"cannot list the same dimension in both"
+                    ValueError, match=r"cannot list the same dimension in both"
             ):
                 data.to_zarr(
                     store,
@@ -3015,8 +3015,8 @@ def setup_and_verify_store(expected=data):
 
         with setup_and_verify_store() as store:
             with pytest.raises(
-                ValueError,
-                match=r"variable 'u' already exists with different dimension sizes",
+                    ValueError,
+                    match=r"variable 'u' already exists with different dimension sizes",
             ):
                 data2.to_zarr(store, region={"x": slice(3)}, **self.version_kwargs)
 
@@ -3043,7 +3043,7 @@ def test_chunk_encoding_with_partial_dask_chunks(self) -> None:
         ).chunk({"a": 3})
 
         with self.roundtrip(
-            original, save_kwargs={"encoding": {"x": {"chunks": [3, 2]}}}
+                original, save_kwargs={"encoding": {"x": {"chunks": [3, 2]}}}
         ) as ds1:
             assert_equal(ds1, original)
 
@@ -3052,7 +3052,7 @@ def test_chunk_encoding_with_larger_dask_chunks(self) -> None:
         original = xr.Dataset({"a": ("x", [1, 2, 3, 4])}).chunk({"x": 2})
 
         with self.roundtrip(
-            original, save_kwargs={"encoding": {"a": {"chunks": [1]}}}
+                original, save_kwargs={"encoding": {"a": {"chunks": [1]}}}
         ) as ds1:
             assert_equal(ds1, original)
 
@@ -3322,12 +3322,12 @@ def temp_dir(self) -> Iterator[tuple[str, str]]:
 
     @contextlib.contextmanager
     def roundtrip_dir(
-        self,
-        data,
-        store,
-        save_kwargs=None,
-        open_kwargs=None,
-        allow_cleanup_failure=False,
+            self,
+            data,
+            store,
+            save_kwargs=None,
+            open_kwargs=None,
+            allow_cleanup_failure=False,
     ) -> Iterator[Dataset]:
         if save_kwargs is None:
             save_kwargs = {}
@@ -3336,14 +3336,14 @@ def roundtrip_dir(
 
         data.to_zarr(store, **save_kwargs, **self.version_kwargs)
         with xr.open_dataset(
-            store, engine="zarr", **open_kwargs, **self.version_kwargs
+                store, engine="zarr", **open_kwargs, **self.version_kwargs
         ) as ds:
             yield ds
 
     @pytest.mark.parametrize("consolidated", [True, False, None])
     @pytest.mark.parametrize("write_empty", [True, False, None])
     def test_write_empty(
-        self, consolidated: bool | None, write_empty: bool | None
+            self, consolidated: bool | None, write_empty: bool | None
     ) -> None:
         if write_empty is False:
             expected = ["0.1.0", "1.1.0"]
@@ -3383,9 +3383,9 @@ def test_write_empty(
             )
 
             with self.roundtrip_dir(
-                ds,
-                store,
-                {"mode": "a", "append_dim": "Z", "write_empty_chunks": write_empty},
+                    ds,
+                    store,
+                    {"mode": "a", "append_dim": "Z", "write_empty_chunks": write_empty},
             ) as a_ds:
                 expected_ds = xr.concat([ds, ds], dim="Z")
 
@@ -3514,7 +3514,7 @@ def create_store(self):
 
     @contextlib.contextmanager
     def roundtrip(
-        self, data, save_kwargs=None, open_kwargs=None, allow_cleanup_failure=False
+            self, data, save_kwargs=None, open_kwargs=None, allow_cleanup_failure=False
     ):
         if save_kwargs is None:
             save_kwargs = {}
@@ -3582,7 +3582,7 @@ class TestNetCDF3ViaNetCDF4Data(CFEncodedBase, NetCDF3Only):
     def create_store(self):
         with create_tmp_file() as tmp_file:
             with backends.NetCDF4DataStore.open(
-                tmp_file, mode="w", format="NETCDF3_CLASSIC"
+                    tmp_file, mode="w", format="NETCDF3_CLASSIC"
             ) as store:
                 yield store
 
@@ -3603,7 +3603,7 @@ class TestNetCDF4ClassicViaNetCDF4Data(CFEncodedBase, NetCDF3Only):
     def create_store(self):
         with create_tmp_file() as tmp_file:
             with backends.NetCDF4DataStore.open(
-                tmp_file, mode="w", format="NETCDF4_CLASSIC"
+                    tmp_file, mode="w", format="NETCDF4_CLASSIC"
             ) as store:
                 yield store
 
@@ -3706,7 +3706,7 @@ def test_complex_error(self, invalid_netcdf) -> None:
         expected = Dataset({"x": ("y", np.ones(5) + 1j * np.ones(5))})
         save_kwargs = {"invalid_netcdf": invalid_netcdf}
         with pytest.raises(
-            h5netcdf.CompatibilityError, match="are not a supported NetCDF feature"
+                h5netcdf.CompatibilityError, match="are not a supported NetCDF feature"
         ):
             with self.roundtrip(expected, save_kwargs=save_kwargs) as actual:
                 assert_equal(expected, actual)
@@ -3821,7 +3821,7 @@ def test_compression_check_encoding_h5py(self) -> None:
         # Incompatible encodings cause a crash
         with create_tmp_file() as tmp_file:
             with pytest.raises(
-                ValueError, match=r"'zlib' and 'compression' encodings mismatch"
+                    ValueError, match=r"'zlib' and 'compression' encodings mismatch"
             ):
                 data.to_netcdf(
                     tmp_file,
@@ -3831,8 +3831,8 @@ def test_compression_check_encoding_h5py(self) -> None:
 
         with create_tmp_file() as tmp_file:
             with pytest.raises(
-                ValueError,
-                match=r"'complevel' and 'compression_opts' encodings mismatch",
+                    ValueError,
+                    match=r"'complevel' and 'compression_opts' encodings mismatch",
             ):
                 data.to_netcdf(
                     tmp_file,
@@ -3929,7 +3929,7 @@ def test_open_badbytes(self) -> None:
             with open_dataset(b"\211HDF\r\n\032\n", engine="h5netcdf"):  # type: ignore[arg-type]
                 pass
         with pytest.raises(
-            ValueError, match=r"match in any of xarray's currently installed IO"
+                ValueError, match=r"match in any of xarray's currently installed IO"
         ):
             with open_dataset(b"garbage"):  # type: ignore[arg-type]
                 pass
@@ -3937,7 +3937,7 @@ def test_open_badbytes(self) -> None:
             with open_dataset(b"garbage", engine="netcdf4"):  # type: ignore[arg-type]
                 pass
         with pytest.raises(
-            ValueError, match=r"not the signature of a valid netCDF4 file"
+                ValueError, match=r"not the signature of a valid netCDF4 file"
         ):
             with open_dataset(BytesIO(b"garbage"), engine="h5netcdf"):
                 pass
@@ -3991,7 +3991,7 @@ def test_open_fileobj(self) -> None:
 class TestH5NetCDFViaDaskData(TestH5NetCDFData):
     @contextlib.contextmanager
     def roundtrip(
-        self, data, save_kwargs=None, open_kwargs=None, allow_cleanup_failure=False
+            self, data, save_kwargs=None, open_kwargs=None, allow_cleanup_failure=False
     ):
         if save_kwargs is None:
             save_kwargs = {}
@@ -3999,7 +3999,7 @@ def roundtrip(
             open_kwargs = {}
         open_kwargs.setdefault("chunks", -1)
         with TestH5NetCDFData.roundtrip(
-            self, data, save_kwargs, open_kwargs, allow_cleanup_failure
+                self, data, save_kwargs, open_kwargs, allow_cleanup_failure
         ) as ds:
             yield ds
 
@@ -4035,9 +4035,9 @@ class TestH5NetCDFDataRos3Driver(TestCommon):
     @pytest.mark.filterwarnings("ignore:Duplicate dimension names")
     def test_get_variable_list(self) -> None:
         with open_dataset(
-            self.test_remote_dataset,
-            engine="h5netcdf",
-            backend_kwargs={"driver": "ros3"},
+                self.test_remote_dataset,
+                engine="h5netcdf",
+                backend_kwargs={"driver": "ros3"},
         ) as actual:
             assert "Temperature" in list(actual)
 
@@ -4050,7 +4050,7 @@ def test_get_variable_list_empty_driver_kwds(self) -> None:
         backend_kwargs = {"driver": "ros3", "driver_kwds": driver_kwds}
 
         with open_dataset(
-            self.test_remote_dataset, engine="h5netcdf", backend_kwargs=backend_kwargs
+                self.test_remote_dataset, engine="h5netcdf", backend_kwargs=backend_kwargs
         ) as actual:
             assert "Temperature" in list(actual)
 
@@ -4114,7 +4114,7 @@ def skip_if_not_engine(engine):
     reason="Flaky test which can cause the worker to crash (so don't xfail). Very open to contributions fixing this"
 )
 def test_open_mfdataset_manyfiles(
-    readengine, nfiles, parallel, chunks, file_cache_maxsize
+        readengine, nfiles, parallel, chunks, file_cache_maxsize
 ):
     # skip certain combinations
     skip_if_not_engine(readengine)
@@ -4133,12 +4133,12 @@ def test_open_mfdataset_manyfiles(
 
         # check that calculation on opened datasets works properly
         with open_mfdataset(
-            tmpfiles,
-            combine="nested",
-            concat_dim="x",
-            engine=readengine,
-            parallel=parallel,
-            chunks=chunks if (not chunks and readengine != "zarr") else "auto",
+                tmpfiles,
+                combine="nested",
+                concat_dim="x",
+                engine=readengine,
+                parallel=parallel,
+                chunks=chunks if (not chunks and readengine != "zarr") else "auto",
         ) as actual:
             # check that using open_mfdataset returns dask arrays for variables
             assert isinstance(actual["foo"].data, dask_array_type)
@@ -4175,7 +4175,7 @@ def test_open_mfdataset_list_attr() -> None:
             with open_dataset(nfiles[1]) as ds2:
                 original = xr.concat([ds1, ds2], dim="x")
                 with xr.open_mfdataset(
-                    [nfiles[0], nfiles[1]], combine="nested", concat_dim="x"
+                        [nfiles[0], nfiles[1]], combine="nested", concat_dim="x"
                 ) as actual:
                     assert_identical(actual, original)
 
@@ -4230,13 +4230,13 @@ def gen_datasets_with_common_coord_and_time(self):
     @pytest.mark.parametrize("opt", ["all", "minimal", "different"])
     @pytest.mark.parametrize("join", ["outer", "inner", "left", "right"])
     def test_open_mfdataset_does_same_as_concat(
-        self, combine, concat_dim, opt, join
+            self, combine, concat_dim, opt, join
     ) -> None:
         with self.setup_files_and_datasets() as (files, [ds1, ds2]):
             if combine == "by_coords":
                 files.reverse()
             with open_mfdataset(
-                files, data_vars=opt, combine=combine, concat_dim=concat_dim, join=join
+                    files, data_vars=opt, combine=combine, concat_dim=concat_dim, join=join
             ) as ds:
                 ds_expect = xr.concat([ds1, ds2], data_vars=opt, dim="t", join=join)
                 assert_identical(ds, ds_expect)
@@ -4244,31 +4244,31 @@ def test_open_mfdataset_does_same_as_concat(
     @pytest.mark.parametrize(
         ["combine_attrs", "attrs", "expected", "expect_error"],
         (
-            pytest.param("drop", [{"a": 1}, {"a": 2}], {}, False, id="drop"),
-            pytest.param(
-                "override", [{"a": 1}, {"a": 2}], {"a": 1}, False, id="override"
-            ),
-            pytest.param(
-                "no_conflicts", [{"a": 1}, {"a": 2}], None, True, id="no_conflicts"
-            ),
-            pytest.param(
-                "identical",
-                [{"a": 1, "b": 2}, {"a": 1, "c": 3}],
-                None,
-                True,
-                id="identical",
-            ),
-            pytest.param(
-                "drop_conflicts",
-                [{"a": 1, "b": 2}, {"b": -1, "c": 3}],
-                {"a": 1, "c": 3},
-                False,
-                id="drop_conflicts",
-            ),
+                pytest.param("drop", [{"a": 1}, {"a": 2}], {}, False, id="drop"),
+                pytest.param(
+                    "override", [{"a": 1}, {"a": 2}], {"a": 1}, False, id="override"
+                ),
+                pytest.param(
+                    "no_conflicts", [{"a": 1}, {"a": 2}], None, True, id="no_conflicts"
+                ),
+                pytest.param(
+                    "identical",
+                    [{"a": 1, "b": 2}, {"a": 1, "c": 3}],
+                    None,
+                    True,
+                    id="identical",
+                ),
+                pytest.param(
+                    "drop_conflicts",
+                    [{"a": 1, "b": 2}, {"b": -1, "c": 3}],
+                    {"a": 1, "c": 3},
+                    False,
+                    id="drop_conflicts",
+                ),
         ),
     )
     def test_open_mfdataset_dataset_combine_attrs(
-        self, combine_attrs, attrs, expected, expect_error
+            self, combine_attrs, attrs, expected, expect_error
     ):
         with self.setup_files_and_datasets() as (files, [ds1, ds2]):
             # Give the files an inconsistent attribute
@@ -4288,10 +4288,10 @@ def test_open_mfdataset_dataset_combine_attrs(
                     )
             else:
                 with xr.open_mfdataset(
-                    files,
-                    combine="nested",
-                    concat_dim="t",
-                    combine_attrs=combine_attrs,
+                        files,
+                        combine="nested",
+                        concat_dim="t",
+                        combine_attrs=combine_attrs,
                 ) as ds:
                     assert ds.attrs == expected
 
@@ -4330,13 +4330,13 @@ def test_open_mfdataset_dataarray_attr_by_coords(self) -> None:
     )
     @pytest.mark.parametrize("opt", ["all", "minimal", "different"])
     def test_open_mfdataset_exact_join_raises_error(
-        self, combine, concat_dim, opt
+            self, combine, concat_dim, opt
     ) -> None:
         with self.setup_files_and_datasets(fuzz=0.1) as (files, [ds1, ds2]):
             if combine == "by_coords":
                 files.reverse()
             with pytest.raises(
-                ValueError, match=r"cannot align objects.*join.*exact.*"
+                    ValueError, match=r"cannot align objects.*join.*exact.*"
             ):
                 open_mfdataset(
                     files,
@@ -4352,7 +4352,7 @@ def test_common_coord_when_datavars_all(self) -> None:
         with self.setup_files_and_datasets() as (files, [ds1, ds2]):
             # open the files with the data_var option
             with open_mfdataset(
-                files, data_vars=opt, combine="nested", concat_dim="t"
+                    files, data_vars=opt, combine="nested", concat_dim="t"
             ) as ds:
                 coord_shape = ds[self.coord_name].shape
                 coord_shape1 = ds1[self.coord_name].shape
@@ -4370,7 +4370,7 @@ def test_common_coord_when_datavars_minimal(self) -> None:
         with self.setup_files_and_datasets() as (files, [ds1, ds2]):
             # open the files using data_vars option
             with open_mfdataset(
-                files, data_vars=opt, combine="nested", concat_dim="t"
+                    files, data_vars=opt, combine="nested", concat_dim="t"
             ) as ds:
                 coord_shape = ds[self.coord_name].shape
                 coord_shape1 = ds1[self.coord_name].shape
@@ -4404,7 +4404,7 @@ def create_store(self):
 
     @contextlib.contextmanager
     def roundtrip(
-        self, data, save_kwargs=None, open_kwargs=None, allow_cleanup_failure=False
+            self, data, save_kwargs=None, open_kwargs=None, allow_cleanup_failure=False
     ):
         yield data.chunk()
 
@@ -4460,13 +4460,13 @@ def test_open_mfdataset(self) -> None:
                 original.isel(x=slice(5)).to_netcdf(tmp1)
                 original.isel(x=slice(5, 10)).to_netcdf(tmp2)
                 with open_mfdataset(
-                    [tmp1, tmp2], concat_dim="x", combine="nested"
+                        [tmp1, tmp2], concat_dim="x", combine="nested"
                 ) as actual:
                     assert isinstance(actual.foo.variable.data, da.Array)
                     assert actual.foo.variable.data.chunks == ((5, 5),)
                     assert_identical(original, actual)
                 with open_mfdataset(
-                    [tmp1, tmp2], concat_dim="x", combine="nested", chunks={"x": 3}
+                        [tmp1, tmp2], concat_dim="x", combine="nested", chunks={"x": 3}
                 ) as actual:
                     assert actual.foo.variable.data.chunks == ((3, 2, 3, 2),)
 
@@ -4494,18 +4494,18 @@ def test_open_mfdataset_2d(self) -> None:
                         original.isel(x=slice(5), y=slice(4, 8)).to_netcdf(tmp3)
                         original.isel(x=slice(5, 10), y=slice(4, 8)).to_netcdf(tmp4)
                         with open_mfdataset(
-                            [[tmp1, tmp2], [tmp3, tmp4]],
-                            combine="nested",
-                            concat_dim=["y", "x"],
+                                [[tmp1, tmp2], [tmp3, tmp4]],
+                                combine="nested",
+                                concat_dim=["y", "x"],
                         ) as actual:
                             assert isinstance(actual.foo.variable.data, da.Array)
                             assert actual.foo.variable.data.chunks == ((5, 5), (4, 4))
                             assert_identical(original, actual)
                         with open_mfdataset(
-                            [[tmp1, tmp2], [tmp3, tmp4]],
-                            combine="nested",
-                            concat_dim=["y", "x"],
-                            chunks={"x": 3, "y": 2},
+                                [[tmp1, tmp2], [tmp3, tmp4]],
+                                combine="nested",
+                                concat_dim=["y", "x"],
+                                chunks={"x": 3, "y": 2},
                         ) as actual:
                             assert actual.foo.variable.data.chunks == (
                                 (3, 2, 3, 2),
@@ -4521,7 +4521,7 @@ def test_open_mfdataset_pathlib(self) -> None:
                 original.isel(x=slice(5)).to_netcdf(tmp1)
                 original.isel(x=slice(5, 10)).to_netcdf(tmp2)
                 with open_mfdataset(
-                    [tmp1, tmp2], concat_dim="x", combine="nested"
+                        [tmp1, tmp2], concat_dim="x", combine="nested"
                 ) as actual:
                     assert_identical(original, actual)
 
@@ -4540,9 +4540,9 @@ def test_open_mfdataset_2d_pathlib(self) -> None:
                         original.isel(x=slice(5), y=slice(4, 8)).to_netcdf(tmp3)
                         original.isel(x=slice(5, 10), y=slice(4, 8)).to_netcdf(tmp4)
                         with open_mfdataset(
-                            [[tmp1, tmp2], [tmp3, tmp4]],
-                            combine="nested",
-                            concat_dim=["y", "x"],
+                                [[tmp1, tmp2], [tmp3, tmp4]],
+                                combine="nested",
+                                concat_dim=["y", "x"],
                         ) as actual:
                             assert_identical(original, actual)
 
@@ -4554,7 +4554,7 @@ def test_open_mfdataset_2(self) -> None:
                 original.isel(x=slice(5, 10)).to_netcdf(tmp2)
 
                 with open_mfdataset(
-                    [tmp1, tmp2], concat_dim="x", combine="nested"
+                        [tmp1, tmp2], concat_dim="x", combine="nested"
                 ) as actual:
                     assert_identical(original, actual)
 
@@ -4569,7 +4569,7 @@ def test_attrs_mfdataset(self) -> None:
                 ds1.to_netcdf(tmp1)
                 ds2.to_netcdf(tmp2)
                 with open_mfdataset(
-                    [tmp1, tmp2], concat_dim="x", combine="nested"
+                        [tmp1, tmp2], concat_dim="x", combine="nested"
                 ) as actual:
                     # presumes that attributes inherited from
                     # first dataset loaded
@@ -4588,7 +4588,7 @@ def test_open_mfdataset_attrs_file(self) -> None:
             ds1.to_netcdf(tmp1)
             ds2.to_netcdf(tmp2)
             with open_mfdataset(
-                [tmp1, tmp2], concat_dim="x", combine="nested", attrs_file=tmp2
+                    [tmp1, tmp2], concat_dim="x", combine="nested", attrs_file=tmp2
             ) as actual:
                 # attributes are inherited from the master file
                 assert actual.attrs["test2"] == ds2.attrs["test2"]
@@ -4607,7 +4607,7 @@ def test_open_mfdataset_attrs_file_path(self) -> None:
             ds1.to_netcdf(tmp1)
             ds2.to_netcdf(tmp2)
             with open_mfdataset(
-                [tmp1, tmp2], concat_dim="x", combine="nested", attrs_file=tmp2
+                    [tmp1, tmp2], concat_dim="x", combine="nested", attrs_file=tmp2
             ) as actual:
                 # attributes are inherited from the master file
                 assert actual.attrs["test2"] == ds2.attrs["test2"]
@@ -4666,7 +4666,7 @@ def preprocess(ds):
 
             expected = preprocess(original)
             with open_mfdataset(
-                tmp, preprocess=preprocess, combine="by_coords"
+                    tmp, preprocess=preprocess, combine="by_coords"
             ) as actual:
                 assert_identical(expected, actual)
 
@@ -4677,7 +4677,7 @@ def test_save_mfdataset_roundtrip(self) -> None:
             with create_tmp_file() as tmp2:
                 save_mfdataset(datasets, [tmp1, tmp2])
                 with open_mfdataset(
-                    [tmp1, tmp2], concat_dim="x", combine="nested"
+                        [tmp1, tmp2], concat_dim="x", combine="nested"
                 ) as actual:
                     assert_identical(actual, original)
 
@@ -4703,7 +4703,7 @@ def test_save_mfdataset_pathlib_roundtrip(self) -> None:
                 tmp2 = Path(tmps2)
                 save_mfdataset(datasets, [tmp1, tmp2])
                 with open_mfdataset(
-                    [tmp1, tmp2], concat_dim="x", combine="nested"
+                        [tmp1, tmp2], concat_dim="x", combine="nested"
                 ) as actual:
                     assert_identical(actual, original)
 
@@ -4745,7 +4745,7 @@ def test_open_mfdataset_concat_dim_none(self) -> None:
                 data.to_netcdf(tmp1)
                 Dataset({"x": np.nan}).to_netcdf(tmp2)
                 with open_mfdataset(
-                    [tmp1, tmp2], concat_dim=None, combine="nested"
+                        [tmp1, tmp2], concat_dim=None, combine="nested"
                 ) as actual:
                     assert_identical(data, actual)
 
@@ -4807,7 +4807,7 @@ def test_open_multi_dataset(self) -> None:
             original.to_netcdf(tmp1)
             original.to_netcdf(tmp2)
             with open_mfdataset(
-                [tmp1, tmp2], concat_dim=dim, combine="nested"
+                    [tmp1, tmp2], concat_dim=dim, combine="nested"
             ) as actual:
                 assert_identical(expected, actual)
 
@@ -4861,7 +4861,7 @@ def test_save_mfdataset_compute_false_roundtrip(self) -> None:
                 assert isinstance(delayed_obj, Delayed)
                 delayed_obj.compute()
                 with open_mfdataset(
-                    [tmp1, tmp2], combine="nested", concat_dim="x"
+                        [tmp1, tmp2], combine="nested", concat_dim="x"
                 ) as actual:
                     assert_identical(actual, original)
 
@@ -5339,7 +5339,7 @@ def test_use_cftime_standard_calendar_default_in_range(calendar) -> None:
 @pytest.mark.parametrize("calendar", _STANDARD_CALENDARS)
 @pytest.mark.parametrize("units_year", [1500, 2500])
 def test_use_cftime_standard_calendar_default_out_of_range(
-    calendar, units_year
+        calendar, units_year
 ) -> None:
     import cftime
 
@@ -5525,7 +5525,7 @@ def test_open_fsspec() -> None:
     import zarr
 
     if not hasattr(zarr.storage, "FSStore") or not hasattr(
-        zarr.storage.FSStore, "getitems"
+            zarr.storage.FSStore, "getitems"
     ):
         pytest.skip("zarr too old")
 
@@ -5608,7 +5608,7 @@ def test_open_dataset_chunking_zarr(chunks, tmp_path: Path) -> None:
     with dask.config.set({"array.chunk-size": "1MiB"}):
         expected = ds.chunk(chunks)
         with open_dataset(
-            tmp_path / "test.zarr", engine="zarr", chunks=chunks
+                tmp_path / "test.zarr", engine="zarr", chunks=chunks
         ) as actual:
             xr.testing.assert_chunks_equal(actual, expected)
 
@@ -5639,7 +5639,7 @@ def test_chunking_consintency(chunks, tmp_path: Path) -> None:
     with dask.config.set({"array.chunk-size": "1MiB"}):
         expected = ds.chunk(chunks)
         with xr.open_dataset(
-            tmp_path / "test.zarr", engine="zarr", chunks=chunks
+                tmp_path / "test.zarr", engine="zarr", chunks=chunks
         ) as actual:
             xr.testing.assert_chunks_equal(actual, expected)
 
@@ -5733,7 +5733,7 @@ def test_h5netcdf_entrypoint(tmp_path: Path) -> None:
 @requires_netCDF4
 @pytest.mark.parametrize("str_type", (str, np.str_))
 def test_write_file_from_np_str(
-    str_type: type[str] | type[np.str_], tmpdir: str
+        str_type: type[str] | type[np.str_], tmpdir: str
 ) -> None:
     # https://github.com/pydata/xarray/pull/5264
     scenarios = [str_type(v) for v in ["scenario_a", "scenario_b", "scenario_c"]]
@@ -5799,7 +5799,7 @@ def test_raise_writing_to_nczarr(self, mode) -> None:
         with create_tmp_file(suffix=".zarr") as tmp:
             ds = self._create_nczarr(tmp)
             with pytest.raises(
-                KeyError, match="missing the attribute `_ARRAY_DIMENSIONS`,"
+                    KeyError, match="missing the attribute `_ARRAY_DIMENSIONS`,"
             ):
                 ds.to_zarr(tmp, mode=mode)
 
@@ -5948,10 +5948,10 @@ def test_zarr_region_index_write(self, tmp_path):
         region: Mapping[str, slice] | Literal["auto"]
         for region in [region_slice, "auto"]:  # type: ignore[assignment]
             with patch.object(
-                ZarrStore,
-                "set_variables",
-                side_effect=ZarrStore.set_variables,
-                autospec=True,
+                    ZarrStore,
+                    "set_variables",
+                    side_effect=ZarrStore.set_variables,
+                    autospec=True,
             ) as mock:
                 ds_region.to_zarr(tmp_path / "test.zarr", region=region, mode="r+")
 
@@ -6261,7 +6261,7 @@ def test_zarr_safe_chunk_region(tmp_path):
     data = np.random.RandomState(0).randn(2920, 25, 53)
     ds = xr.Dataset({"temperature": (("time", "lat", "lon"), data)})
     chunks = {"time": 1000, "lat": 25, "lon": 53}
-    ds.chunk(chunks).to_zarr(store, compute=False)
+    ds.chunk(chunks).to_zarr(store, compute=False, mode="w")
     region = {"time": slice(1000, 2000, 1)}
     chunk = ds.isel(region)
     chunk = chunk.chunk()

From 58f1866c584bf29abc64f2ef93d5c66844b91c2f Mon Sep 17 00:00:00 2001
From: "pre-commit-ci[bot]"
 <66853113+pre-commit-ci[bot]@users.noreply.github.com>
Date: Mon, 30 Sep 2024 19:50:51 +0000
Subject: [PATCH 17/17] [pre-commit.ci] auto fixes from pre-commit.com hooks

for more information, see https://pre-commit.ci
---
 xarray/tests/test_backends.py | 416 +++++++++++++++++-----------------
 1 file changed, 208 insertions(+), 208 deletions(-)

diff --git a/xarray/tests/test_backends.py b/xarray/tests/test_backends.py
index 0e5db458116..cc8dbd4e02c 100644
--- a/xarray/tests/test_backends.py
+++ b/xarray/tests/test_backends.py
@@ -41,11 +41,11 @@
 )
 from xarray.backends.common import robust_getitem
 from xarray.backends.h5netcdf_ import H5netcdfBackendEntrypoint
+from xarray.backends.netcdf3 import _nc3_dtype_coercions
 from xarray.backends.netCDF4_ import (
     NetCDF4BackendEntrypoint,
     _extract_nc4_variable_encoding,
 )
-from xarray.backends.netcdf3 import _nc3_dtype_coercions
 from xarray.backends.pydap_ import PydapDataStore
 from xarray.backends.scipy_ import ScipyBackendEntrypoint
 from xarray.coding.cftime_offsets import cftime_range
@@ -306,7 +306,7 @@ class NetCDF3Only:
     def test_dtype_coercion_error(self) -> None:
         """Failing dtype coercion should lead to an error"""
         for dtype, format in itertools.product(
-                _nc3_dtype_coercions, self.netcdf3_formats
+            _nc3_dtype_coercions, self.netcdf3_formats
         ):
             if dtype == "bool":
                 # coerced upcast (bool to int8) ==> can never fail
@@ -332,7 +332,7 @@ def create_store(self):
 
     @contextlib.contextmanager
     def roundtrip(
-            self, data, save_kwargs=None, open_kwargs=None, allow_cleanup_failure=False
+        self, data, save_kwargs=None, open_kwargs=None, allow_cleanup_failure=False
     ):
         if save_kwargs is None:
             save_kwargs = {}
@@ -345,7 +345,7 @@ def roundtrip(
 
     @contextlib.contextmanager
     def roundtrip_append(
-            self, data, save_kwargs=None, open_kwargs=None, allow_cleanup_failure=False
+        self, data, save_kwargs=None, open_kwargs=None, allow_cleanup_failure=False
     ):
         if save_kwargs is None:
             save_kwargs = {}
@@ -392,8 +392,8 @@ def check_dtypes_roundtripped(self, expected, actual):
 
             # For NetCDF3, the backend should perform dtype coercion
             if (
-                    isinstance(self, NetCDF3Only)
-                    and str(expected_dtype) in _nc3_dtype_coercions
+                isinstance(self, NetCDF3Only)
+                and str(expected_dtype) in _nc3_dtype_coercions
             ):
                 expected_dtype = np.dtype(_nc3_dtype_coercions[str(expected_dtype)])
 
@@ -401,8 +401,8 @@ def check_dtypes_roundtripped(self, expected, actual):
             # TODO: check expected behavior for string dtypes more carefully
             string_kinds = {"O", "S", "U"}
             assert expected_dtype == actual_dtype or (
-                    expected_dtype.kind in string_kinds
-                    and actual_dtype.kind in string_kinds
+                expected_dtype.kind in string_kinds
+                and actual_dtype.kind in string_kinds
             )
 
     def test_roundtrip_test_data(self) -> None:
@@ -584,8 +584,8 @@ def test_roundtrip_cftime_datetime_data(self) -> None:
                     abs_diff = abs(actual.t.values - expected_decoded_t)
                     assert (abs_diff <= np.timedelta64(1, "s")).all()
                     assert (
-                            actual.t.encoding["units"]
-                            == "days since 0001-01-01 00:00:00.000000"
+                        actual.t.encoding["units"]
+                        == "days since 0001-01-01 00:00:00.000000"
                     )
                     assert actual.t.encoding["calendar"] == expected_calendar
 
@@ -626,7 +626,7 @@ def test_roundtrip_coordinates(self) -> None:
         with self.roundtrip(original, open_kwargs={"decode_coords": False}) as expected:
             # check roundtripping when decode_coords=False
             with self.roundtrip(
-                    expected, open_kwargs={"decode_coords": False}
+                expected, open_kwargs={"decode_coords": False}
             ) as actual:
                 assert_identical(expected, actual)
 
@@ -905,8 +905,8 @@ def test_roundtrip_empty_vlen_string_array(self) -> None:
         "decoded_fn, encoded_fn",
         [
             (
-                    create_unsigned_masked_scaled_data,
-                    create_encoded_unsigned_masked_scaled_data,
+                create_unsigned_masked_scaled_data,
+                create_encoded_unsigned_masked_scaled_data,
             ),
             pytest.param(
                 create_bad_unsigned_masked_scaled_data,
@@ -914,12 +914,12 @@ def test_roundtrip_empty_vlen_string_array(self) -> None:
                 marks=pytest.mark.xfail(reason="Bad _Unsigned attribute."),
             ),
             (
-                    create_signed_masked_scaled_data,
-                    create_encoded_signed_masked_scaled_data,
+                create_signed_masked_scaled_data,
+                create_encoded_signed_masked_scaled_data,
             ),
             (
-                    create_unsigned_false_masked_scaled_data,
-                    create_encoded_unsigned_false_masked_scaled_data,
+                create_unsigned_false_masked_scaled_data,
+                create_encoded_unsigned_false_masked_scaled_data,
             ),
             (create_masked_and_scaled_data, create_encoded_masked_and_scaled_data),
         ],
@@ -931,9 +931,9 @@ def test_roundtrip_mask_and_scale(self, decoded_fn, encoded_fn, dtype) -> None:
         decoded = decoded_fn(dtype)
         encoded = encoded_fn(dtype)
         if decoded["x"].encoding["dtype"] == "u1" and not (
-                self.engine == "netcdf4"
-                and self.file_format is None
-                or self.file_format == "NETCDF4"
+            self.engine == "netcdf4"
+            and self.file_format is None
+            or self.file_format == "NETCDF4"
         ):
             pytest.skip("uint8 data can't be written to non-NetCDF4 data")
 
@@ -942,8 +942,8 @@ def test_roundtrip_mask_and_scale(self, decoded_fn, encoded_fn, dtype) -> None:
                 assert decoded.variables[k].dtype == actual.variables[k].dtype
                 # CF _FillValue is always on-disk type
                 assert (
-                        decoded.variables[k].encoding["_FillValue"]
-                        == actual.variables[k].encoding["_FillValue"]
+                    decoded.variables[k].encoding["_FillValue"]
+                    == actual.variables[k].encoding["_FillValue"]
                 )
             assert_allclose(decoded, actual, decode_bytes=False)
 
@@ -954,8 +954,8 @@ def test_roundtrip_mask_and_scale(self, decoded_fn, encoded_fn, dtype) -> None:
                 assert encoded.variables[k].dtype == actual.variables[k].dtype
                 # CF _FillValue is always on-disk type
                 assert (
-                        decoded.variables[k].encoding["_FillValue"]
-                        == actual.variables[k].attrs["_FillValue"]
+                    decoded.variables[k].encoding["_FillValue"]
+                    == actual.variables[k].attrs["_FillValue"]
                 )
             assert_allclose(encoded, actual, decode_bytes=False)
 
@@ -964,8 +964,8 @@ def test_roundtrip_mask_and_scale(self, decoded_fn, encoded_fn, dtype) -> None:
                 assert encoded.variables[k].dtype == actual.variables[k].dtype
                 # CF _FillValue is always on-disk type
                 assert (
-                        encoded.variables[k].attrs["_FillValue"]
-                        == actual.variables[k].attrs["_FillValue"]
+                    encoded.variables[k].attrs["_FillValue"]
+                    == actual.variables[k].attrs["_FillValue"]
                 )
             assert_allclose(encoded, actual, decode_bytes=False)
 
@@ -1030,7 +1030,7 @@ def _roundtrip_with_warnings(*args, **kwargs):
             assert_allclose(decoded, actual, decode_bytes=False)
 
         with _roundtrip_with_warnings(
-                decoded, open_kwargs=dict(decode_cf=False)
+            decoded, open_kwargs=dict(decode_cf=False)
         ) as actual:
             for k in encoded.variables:
                 assert encoded.variables[k].dtype == actual.variables[k].dtype
@@ -1120,7 +1120,7 @@ def test_coordinate_variables_after_dataset_roundtrip(self) -> None:
             assert_equal(actual, expected)
 
     def test_grid_mapping_and_bounds_are_coordinates_after_dataarray_roundtrip(
-            self,
+        self,
     ) -> None:
         original = self._create_cf_dataset()
         # The DataArray roundtrip should have the same warnings as the
@@ -1132,14 +1132,14 @@ def test_grid_mapping_and_bounds_are_coordinates_after_dataarray_roundtrip(
         # needs the to_dataset. The other backends should be fine
         # without it.
         with pytest.warns(
-                UserWarning,
-                match=(
-                        r"Variable\(s\) referenced in bounds not in variables: "
-                        r"\['l(at|ong)itude_bnds'\]"
-                ),
+            UserWarning,
+            match=(
+                r"Variable\(s\) referenced in bounds not in variables: "
+                r"\['l(at|ong)itude_bnds'\]"
+            ),
         ):
             with self.roundtrip(
-                    original["variable"].to_dataset(), open_kwargs={"decode_coords": "all"}
+                original["variable"].to_dataset(), open_kwargs={"decode_coords": "all"}
             ) as actual:
                 assert_identical(actual, original["variable"].to_dataset())
 
@@ -1224,7 +1224,7 @@ def test_invalid_dataarray_names_raise(self) -> None:
         data = np.random.random((2, 2))
         da = xr.DataArray(data)
         for name, (error, msg) in zip(
-                [0, (4, 5), True, ""], [te, te, te, ve], strict=True
+            [0, (4, 5), True, ""], [te, te, te, ve], strict=True
         ):
             ds = Dataset({name: da})
             with pytest.raises(error) as excinfo:
@@ -1357,7 +1357,7 @@ def test_append_with_invalid_dim_raises(self) -> None:
             data["var9"] = data["var2"] * 3
             data = data.isel(dim1=slice(2, 6))  # modify one dimension
             with pytest.raises(
-                    ValueError, match=r"Unable to update size for existing dimension"
+                ValueError, match=r"Unable to update size for existing dimension"
             ):
                 self.save(data, tmp_file, mode="a")
 
@@ -1419,7 +1419,7 @@ def test_byte_attrs(self, byte_attrs_dataset: dict[str, Any]) -> None:
 
 @contextlib.contextmanager
 def create_tmp_file(
-        suffix: str = ".nc", allow_cleanup_failure: bool = False
+    suffix: str = ".nc", allow_cleanup_failure: bool = False
 ) -> Iterator[str]:
     temp_dir = tempfile.mkdtemp()
     path = os.path.join(temp_dir, f"temp-{next(_counter)}{suffix}")
@@ -1435,7 +1435,7 @@ def create_tmp_file(
 
 @contextlib.contextmanager
 def create_tmp_files(
-        nfiles: int, suffix: str = ".nc", allow_cleanup_failure: bool = False
+    nfiles: int, suffix: str = ".nc", allow_cleanup_failure: bool = False
 ) -> Iterator[list[str]]:
     with ExitStack() as stack:
         files = [
@@ -1517,7 +1517,7 @@ def test_write_groups(self) -> None:
         ],
     )
     def test_encoding_kwarg_vlen_string(
-            self, input_strings: list[str], is_bytes: bool
+        self, input_strings: list[str], is_bytes: bool
     ) -> None:
         original = Dataset({"x": input_strings})
 
@@ -1689,9 +1689,9 @@ def test_auto_chunking_is_based_on_disk_chunk_sizes(self) -> None:
 
         with dask.config.set({"array.chunk-size": "100KiB"}):
             with self.chunked_roundtrip(
-                    (1, y_size, x_size),
-                    (1, y_chunksize, x_chunksize),
-                    open_kwargs={"chunks": "auto"},
+                (1, y_size, x_size),
+                (1, y_chunksize, x_chunksize),
+                open_kwargs={"chunks": "auto"},
             ) as ds:
                 t_chunks, y_chunks, x_chunks = ds["image"].data.chunks
                 assert all(np.asanyarray(y_chunks) == y_chunksize)
@@ -1705,21 +1705,21 @@ def test_base_chunking_uses_disk_chunk_sizes(self) -> None:
         x_chunksize = 10
 
         with self.chunked_roundtrip(
-                (1, y_size, x_size),
-                (1, y_chunksize, x_chunksize),
-                open_kwargs={"chunks": {}},
+            (1, y_size, x_size),
+            (1, y_chunksize, x_chunksize),
+            open_kwargs={"chunks": {}},
         ) as ds:
             for chunksizes, expected in zip(
-                    ds["image"].data.chunks, (1, y_chunksize, x_chunksize), strict=True
+                ds["image"].data.chunks, (1, y_chunksize, x_chunksize), strict=True
             ):
                 assert all(np.asanyarray(chunksizes) == expected)
 
     @contextlib.contextmanager
     def chunked_roundtrip(
-            self,
-            array_shape: tuple[int, int, int],
-            chunk_sizes: tuple[int, int, int],
-            open_kwargs: dict[str, Any] | None = None,
+        self,
+        array_shape: tuple[int, int, int],
+        chunk_sizes: tuple[int, int, int],
+        open_kwargs: dict[str, Any] | None = None,
     ) -> Generator[Dataset, None, None]:
         t_size, y_size, x_size = array_shape
         t_chunksize, y_chunksize, x_chunksize = chunk_sizes
@@ -1742,7 +1742,7 @@ def test_preferred_chunks_are_disk_chunk_sizes(self) -> None:
         x_chunksize = 10
 
         with self.chunked_roundtrip(
-                (1, y_size, x_size), (1, y_chunksize, x_chunksize)
+            (1, y_size, x_size), (1, y_chunksize, x_chunksize)
         ) as ds:
             assert ds["image"].encoding["preferred_chunks"] == {
                 "t": 1,
@@ -1759,7 +1759,7 @@ def test_encoding_chunksizes_unlimited(self) -> None:
             "complevel": 0,
             "fletcher32": False,
             "contiguous": False,
-            "chunksizes": (2 ** 20,),
+            "chunksizes": (2**20,),
             "original_shape": (3,),
         }
         with self.roundtrip(ds) as actual:
@@ -1862,14 +1862,14 @@ def test_encoding_enum__no_fill_value(self):
                 with self.roundtrip(original, save_kwargs=save_kwargs) as actual:
                     assert_equal(original, actual)
                     assert (
-                            actual.clouds.encoding["dtype"].metadata["enum"]
-                            == cloud_type_dict
+                        actual.clouds.encoding["dtype"].metadata["enum"]
+                        == cloud_type_dict
                     )
                     if self.engine != "h5netcdf":
                         # not implemented in h5netcdf yet
                         assert (
-                                actual.clouds.encoding["dtype"].metadata["enum_name"]
-                                == "cloud_type"
+                            actual.clouds.encoding["dtype"].metadata["enum_name"]
+                            == "cloud_type"
                         )
 
     @requires_netCDF4
@@ -1898,21 +1898,21 @@ def test_encoding_enum__multiple_variable_with_enum(self):
                 with self.roundtrip(original, save_kwargs=save_kwargs) as actual:
                     assert_equal(original, actual)
                     assert (
-                            actual.clouds.encoding["dtype"] == actual.tifa.encoding["dtype"]
+                        actual.clouds.encoding["dtype"] == actual.tifa.encoding["dtype"]
                     )
                     assert (
-                            actual.clouds.encoding["dtype"].metadata
-                            == actual.tifa.encoding["dtype"].metadata
+                        actual.clouds.encoding["dtype"].metadata
+                        == actual.tifa.encoding["dtype"].metadata
                     )
                     assert (
-                            actual.clouds.encoding["dtype"].metadata["enum"]
-                            == cloud_type_dict
+                        actual.clouds.encoding["dtype"].metadata["enum"]
+                        == cloud_type_dict
                     )
                     if self.engine != "h5netcdf":
                         # not implemented in h5netcdf yet
                         assert (
-                                actual.clouds.encoding["dtype"].metadata["enum_name"]
-                                == "cloud_type"
+                            actual.clouds.encoding["dtype"].metadata["enum_name"]
+                            == "cloud_type"
                         )
 
     @requires_netCDF4
@@ -1940,8 +1940,8 @@ def test_encoding_enum__error_multiple_variable_with_changing_enum(self):
                 )
             with open_dataset(tmp_file) as original:
                 assert (
-                        original.clouds.encoding["dtype"].metadata
-                        == original.tifa.encoding["dtype"].metadata
+                    original.clouds.encoding["dtype"].metadata
+                    == original.tifa.encoding["dtype"].metadata
                 )
                 modified_enum = original.clouds.encoding["dtype"].metadata["enum"]
                 modified_enum.update({"neblig": 2})
@@ -1952,11 +1952,11 @@ def test_encoding_enum__error_multiple_variable_with_changing_enum(self):
                 if self.engine != "h5netcdf":
                     # not implemented yet in h5netcdf
                     with pytest.raises(
-                            ValueError,
-                            match=(
-                                    "Cannot save variable .*"
-                                    " because an enum `cloud_type` already exists in the Dataset .*"
-                            ),
+                        ValueError,
+                        match=(
+                            "Cannot save variable .*"
+                            " because an enum `cloud_type` already exists in the Dataset .*"
+                        ),
                     ):
                         with self.roundtrip(original):
                             pass
@@ -2081,8 +2081,8 @@ def test_compression_encoding(self, compression: str | None) -> None:
             actual_encoding = actual["var2"].encoding
             assert expected_encoding.items() <= actual_encoding.items()
         if (
-                encoding_params["compression"] is not None
-                and "blosc" not in encoding_params["compression"]
+            encoding_params["compression"] is not None
+            and "blosc" not in encoding_params["compression"]
         ):
             # regression test for #156
             expected = data.isel(dim1=0)
@@ -2153,7 +2153,7 @@ def test_deepcopy(self) -> None:
 class TestNetCDF4ViaDaskData(TestNetCDF4Data):
     @contextlib.contextmanager
     def roundtrip(
-            self, data, save_kwargs=None, open_kwargs=None, allow_cleanup_failure=False
+        self, data, save_kwargs=None, open_kwargs=None, allow_cleanup_failure=False
     ):
         if open_kwargs is None:
             open_kwargs = {}
@@ -2161,7 +2161,7 @@ def roundtrip(
             save_kwargs = {}
         open_kwargs.setdefault("chunks", -1)
         with TestNetCDF4Data.roundtrip(
-                self, data, save_kwargs, open_kwargs, allow_cleanup_failure
+            self, data, save_kwargs, open_kwargs, allow_cleanup_failure
         ) as ds:
             yield ds
 
@@ -2219,13 +2219,13 @@ def save(self, dataset, store_target, **kwargs):  # type: ignore[override]
     @contextlib.contextmanager
     def open(self, store_target, **kwargs):
         with xr.open_dataset(
-                store_target, engine="zarr", **kwargs, **self.version_kwargs
+            store_target, engine="zarr", **kwargs, **self.version_kwargs
         ) as ds:
             yield ds
 
     @contextlib.contextmanager
     def roundtrip(
-            self, data, save_kwargs=None, open_kwargs=None, allow_cleanup_failure=False
+        self, data, save_kwargs=None, open_kwargs=None, allow_cleanup_failure=False
     ):
         if save_kwargs is None:
             save_kwargs = {}
@@ -2242,9 +2242,9 @@ def test_roundtrip_consolidated(self, consolidated) -> None:
             pytest.xfail("consolidated metadata is not supported for zarr v3 yet")
         expected = create_test_data()
         with self.roundtrip(
-                expected,
-                save_kwargs={"consolidated": consolidated},
-                open_kwargs={"backend_kwargs": {"consolidated": consolidated}},
+            expected,
+            save_kwargs={"consolidated": consolidated},
+            open_kwargs={"backend_kwargs": {"consolidated": consolidated}},
         ) as actual:
             self.check_dtypes_roundtripped(expected, actual)
             assert_identical(expected, actual)
@@ -2257,8 +2257,8 @@ def test_read_non_consolidated_warning(self) -> None:
         with self.create_zarr_target() as store:
             expected.to_zarr(store, consolidated=False, **self.version_kwargs)
             with pytest.warns(
-                    RuntimeWarning,
-                    match="Failed to open Zarr store with consolidated",
+                RuntimeWarning,
+                match="Failed to open Zarr store with consolidated",
             ):
                 with xr.open_zarr(store, **self.version_kwargs) as ds:
                     assert_identical(ds, expected)
@@ -2529,17 +2529,17 @@ def test_write_persistence_modes(self, group) -> None:
 
         # overwrite mode
         with self.roundtrip(
-                original,
-                save_kwargs={"mode": "w", "group": group},
-                open_kwargs={"group": group},
+            original,
+            save_kwargs={"mode": "w", "group": group},
+            open_kwargs={"group": group},
         ) as actual:
             assert_identical(original, actual)
 
         # don't overwrite mode
         with self.roundtrip(
-                original,
-                save_kwargs={"mode": "w-", "group": group},
-                open_kwargs={"group": group},
+            original,
+            save_kwargs={"mode": "w-", "group": group},
+            open_kwargs={"group": group},
         ) as actual:
             assert_identical(original, actual)
 
@@ -2555,9 +2555,9 @@ def test_write_persistence_modes(self, group) -> None:
 
         # check append mode for normal write
         with self.roundtrip(
-                original,
-                save_kwargs={"mode": "a", "group": group},
-                open_kwargs={"group": group},
+            original,
+            save_kwargs={"mode": "a", "group": group},
+            open_kwargs={"group": group},
         ) as actual:
             assert_identical(original, actual)
 
@@ -2590,7 +2590,7 @@ def test_group(self) -> None:
         original = create_test_data()
         group = "some/random/path"
         with self.roundtrip(
-                original, save_kwargs={"group": group}, open_kwargs={"group": group}
+            original, save_kwargs={"group": group}, open_kwargs={"group": group}
         ) as actual:
             assert_identical(original, actual)
 
@@ -2640,7 +2640,7 @@ def test_append_with_mode_rplus_fails(self) -> None:
         with self.create_zarr_target() as store:
             original.to_zarr(store, **self.version_kwargs)
             with pytest.raises(
-                    ValueError, match="dataset contains non-pre-existing variables"
+                ValueError, match="dataset contains non-pre-existing variables"
             ):
                 modified.to_zarr(store, mode="r+", **self.version_kwargs)
 
@@ -2649,7 +2649,7 @@ def test_append_with_invalid_dim_raises(self) -> None:
         with self.create_zarr_target() as store_target:
             ds.to_zarr(store_target, mode="w", **self.version_kwargs)
             with pytest.raises(
-                    ValueError, match="does not match any existing dataset dimensions"
+                ValueError, match="does not match any existing dataset dimensions"
             ):
                 ds_to_append.to_zarr(
                     store_target, append_dim="notvalid", **self.version_kwargs
@@ -2870,7 +2870,7 @@ def test_write_region(self, consolidated, compute, use_dask, write_empty) -> Non
             )
             if compute:
                 with xr.open_zarr(
-                        store, consolidated=consolidated, **self.version_kwargs
+                    store, consolidated=consolidated, **self.version_kwargs
                 ) as actual:
                     assert_identical(actual, zeros)
             for i in range(0, 10, 2):
@@ -2883,7 +2883,7 @@ def test_write_region(self, consolidated, compute, use_dask, write_empty) -> Non
                     **self.version_kwargs,
                 )
             with xr.open_zarr(
-                    store, consolidated=consolidated, **self.version_kwargs
+                store, consolidated=consolidated, **self.version_kwargs
             ) as actual:
                 assert_identical(actual, nonzeros)
 
@@ -2963,10 +2963,10 @@ def setup_and_verify_store(expected=data):
 
         with setup_and_verify_store() as store:
             with pytest.raises(
-                    ValueError,
-                    match=re.escape(
-                        "cannot set region unless mode='a', mode='a-', mode='r+' or mode=None"
-                    ),
+                ValueError,
+                match=re.escape(
+                    "cannot set region unless mode='a', mode='a-', mode='r+' or mode=None"
+                ),
             ):
                 data.to_zarr(
                     store, region={"x": slice(None)}, mode="w", **self.version_kwargs
@@ -2988,15 +2988,15 @@ def setup_and_verify_store(expected=data):
 
         with setup_and_verify_store() as store:
             with pytest.raises(
-                    ValueError,
-                    match=r"all keys in ``region`` are not in Dataset dimensions",
+                ValueError,
+                match=r"all keys in ``region`` are not in Dataset dimensions",
             ):
                 data.to_zarr(store, region={"y": slice(None)}, **self.version_kwargs)
 
         with setup_and_verify_store() as store:
             with pytest.raises(
-                    ValueError,
-                    match=r"all variables in the dataset to write must have at least one dimension in common",
+                ValueError,
+                match=r"all variables in the dataset to write must have at least one dimension in common",
             ):
                 data2.assign(v=2).to_zarr(
                     store, region={"x": slice(2)}, **self.version_kwargs
@@ -3004,7 +3004,7 @@ def setup_and_verify_store(expected=data):
 
         with setup_and_verify_store() as store:
             with pytest.raises(
-                    ValueError, match=r"cannot list the same dimension in both"
+                ValueError, match=r"cannot list the same dimension in both"
             ):
                 data.to_zarr(
                     store,
@@ -3015,8 +3015,8 @@ def setup_and_verify_store(expected=data):
 
         with setup_and_verify_store() as store:
             with pytest.raises(
-                    ValueError,
-                    match=r"variable 'u' already exists with different dimension sizes",
+                ValueError,
+                match=r"variable 'u' already exists with different dimension sizes",
             ):
                 data2.to_zarr(store, region={"x": slice(3)}, **self.version_kwargs)
 
@@ -3043,7 +3043,7 @@ def test_chunk_encoding_with_partial_dask_chunks(self) -> None:
         ).chunk({"a": 3})
 
         with self.roundtrip(
-                original, save_kwargs={"encoding": {"x": {"chunks": [3, 2]}}}
+            original, save_kwargs={"encoding": {"x": {"chunks": [3, 2]}}}
         ) as ds1:
             assert_equal(ds1, original)
 
@@ -3052,7 +3052,7 @@ def test_chunk_encoding_with_larger_dask_chunks(self) -> None:
         original = xr.Dataset({"a": ("x", [1, 2, 3, 4])}).chunk({"x": 2})
 
         with self.roundtrip(
-                original, save_kwargs={"encoding": {"a": {"chunks": [1]}}}
+            original, save_kwargs={"encoding": {"a": {"chunks": [1]}}}
         ) as ds1:
             assert_equal(ds1, original)
 
@@ -3322,12 +3322,12 @@ def temp_dir(self) -> Iterator[tuple[str, str]]:
 
     @contextlib.contextmanager
     def roundtrip_dir(
-            self,
-            data,
-            store,
-            save_kwargs=None,
-            open_kwargs=None,
-            allow_cleanup_failure=False,
+        self,
+        data,
+        store,
+        save_kwargs=None,
+        open_kwargs=None,
+        allow_cleanup_failure=False,
     ) -> Iterator[Dataset]:
         if save_kwargs is None:
             save_kwargs = {}
@@ -3336,14 +3336,14 @@ def roundtrip_dir(
 
         data.to_zarr(store, **save_kwargs, **self.version_kwargs)
         with xr.open_dataset(
-                store, engine="zarr", **open_kwargs, **self.version_kwargs
+            store, engine="zarr", **open_kwargs, **self.version_kwargs
         ) as ds:
             yield ds
 
     @pytest.mark.parametrize("consolidated", [True, False, None])
     @pytest.mark.parametrize("write_empty", [True, False, None])
     def test_write_empty(
-            self, consolidated: bool | None, write_empty: bool | None
+        self, consolidated: bool | None, write_empty: bool | None
     ) -> None:
         if write_empty is False:
             expected = ["0.1.0", "1.1.0"]
@@ -3383,9 +3383,9 @@ def test_write_empty(
             )
 
             with self.roundtrip_dir(
-                    ds,
-                    store,
-                    {"mode": "a", "append_dim": "Z", "write_empty_chunks": write_empty},
+                ds,
+                store,
+                {"mode": "a", "append_dim": "Z", "write_empty_chunks": write_empty},
             ) as a_ds:
                 expected_ds = xr.concat([ds, ds], dim="Z")
 
@@ -3514,7 +3514,7 @@ def create_store(self):
 
     @contextlib.contextmanager
     def roundtrip(
-            self, data, save_kwargs=None, open_kwargs=None, allow_cleanup_failure=False
+        self, data, save_kwargs=None, open_kwargs=None, allow_cleanup_failure=False
     ):
         if save_kwargs is None:
             save_kwargs = {}
@@ -3582,7 +3582,7 @@ class TestNetCDF3ViaNetCDF4Data(CFEncodedBase, NetCDF3Only):
     def create_store(self):
         with create_tmp_file() as tmp_file:
             with backends.NetCDF4DataStore.open(
-                    tmp_file, mode="w", format="NETCDF3_CLASSIC"
+                tmp_file, mode="w", format="NETCDF3_CLASSIC"
             ) as store:
                 yield store
 
@@ -3603,7 +3603,7 @@ class TestNetCDF4ClassicViaNetCDF4Data(CFEncodedBase, NetCDF3Only):
     def create_store(self):
         with create_tmp_file() as tmp_file:
             with backends.NetCDF4DataStore.open(
-                    tmp_file, mode="w", format="NETCDF4_CLASSIC"
+                tmp_file, mode="w", format="NETCDF4_CLASSIC"
             ) as store:
                 yield store
 
@@ -3706,7 +3706,7 @@ def test_complex_error(self, invalid_netcdf) -> None:
         expected = Dataset({"x": ("y", np.ones(5) + 1j * np.ones(5))})
         save_kwargs = {"invalid_netcdf": invalid_netcdf}
         with pytest.raises(
-                h5netcdf.CompatibilityError, match="are not a supported NetCDF feature"
+            h5netcdf.CompatibilityError, match="are not a supported NetCDF feature"
         ):
             with self.roundtrip(expected, save_kwargs=save_kwargs) as actual:
                 assert_equal(expected, actual)
@@ -3821,7 +3821,7 @@ def test_compression_check_encoding_h5py(self) -> None:
         # Incompatible encodings cause a crash
         with create_tmp_file() as tmp_file:
             with pytest.raises(
-                    ValueError, match=r"'zlib' and 'compression' encodings mismatch"
+                ValueError, match=r"'zlib' and 'compression' encodings mismatch"
             ):
                 data.to_netcdf(
                     tmp_file,
@@ -3831,8 +3831,8 @@ def test_compression_check_encoding_h5py(self) -> None:
 
         with create_tmp_file() as tmp_file:
             with pytest.raises(
-                    ValueError,
-                    match=r"'complevel' and 'compression_opts' encodings mismatch",
+                ValueError,
+                match=r"'complevel' and 'compression_opts' encodings mismatch",
             ):
                 data.to_netcdf(
                     tmp_file,
@@ -3929,7 +3929,7 @@ def test_open_badbytes(self) -> None:
             with open_dataset(b"\211HDF\r\n\032\n", engine="h5netcdf"):  # type: ignore[arg-type]
                 pass
         with pytest.raises(
-                ValueError, match=r"match in any of xarray's currently installed IO"
+            ValueError, match=r"match in any of xarray's currently installed IO"
         ):
             with open_dataset(b"garbage"):  # type: ignore[arg-type]
                 pass
@@ -3937,7 +3937,7 @@ def test_open_badbytes(self) -> None:
             with open_dataset(b"garbage", engine="netcdf4"):  # type: ignore[arg-type]
                 pass
         with pytest.raises(
-                ValueError, match=r"not the signature of a valid netCDF4 file"
+            ValueError, match=r"not the signature of a valid netCDF4 file"
         ):
             with open_dataset(BytesIO(b"garbage"), engine="h5netcdf"):
                 pass
@@ -3991,7 +3991,7 @@ def test_open_fileobj(self) -> None:
 class TestH5NetCDFViaDaskData(TestH5NetCDFData):
     @contextlib.contextmanager
     def roundtrip(
-            self, data, save_kwargs=None, open_kwargs=None, allow_cleanup_failure=False
+        self, data, save_kwargs=None, open_kwargs=None, allow_cleanup_failure=False
     ):
         if save_kwargs is None:
             save_kwargs = {}
@@ -3999,7 +3999,7 @@ def roundtrip(
             open_kwargs = {}
         open_kwargs.setdefault("chunks", -1)
         with TestH5NetCDFData.roundtrip(
-                self, data, save_kwargs, open_kwargs, allow_cleanup_failure
+            self, data, save_kwargs, open_kwargs, allow_cleanup_failure
         ) as ds:
             yield ds
 
@@ -4035,9 +4035,9 @@ class TestH5NetCDFDataRos3Driver(TestCommon):
     @pytest.mark.filterwarnings("ignore:Duplicate dimension names")
     def test_get_variable_list(self) -> None:
         with open_dataset(
-                self.test_remote_dataset,
-                engine="h5netcdf",
-                backend_kwargs={"driver": "ros3"},
+            self.test_remote_dataset,
+            engine="h5netcdf",
+            backend_kwargs={"driver": "ros3"},
         ) as actual:
             assert "Temperature" in list(actual)
 
@@ -4050,7 +4050,7 @@ def test_get_variable_list_empty_driver_kwds(self) -> None:
         backend_kwargs = {"driver": "ros3", "driver_kwds": driver_kwds}
 
         with open_dataset(
-                self.test_remote_dataset, engine="h5netcdf", backend_kwargs=backend_kwargs
+            self.test_remote_dataset, engine="h5netcdf", backend_kwargs=backend_kwargs
         ) as actual:
             assert "Temperature" in list(actual)
 
@@ -4114,7 +4114,7 @@ def skip_if_not_engine(engine):
     reason="Flaky test which can cause the worker to crash (so don't xfail). Very open to contributions fixing this"
 )
 def test_open_mfdataset_manyfiles(
-        readengine, nfiles, parallel, chunks, file_cache_maxsize
+    readengine, nfiles, parallel, chunks, file_cache_maxsize
 ):
     # skip certain combinations
     skip_if_not_engine(readengine)
@@ -4133,12 +4133,12 @@ def test_open_mfdataset_manyfiles(
 
         # check that calculation on opened datasets works properly
         with open_mfdataset(
-                tmpfiles,
-                combine="nested",
-                concat_dim="x",
-                engine=readengine,
-                parallel=parallel,
-                chunks=chunks if (not chunks and readengine != "zarr") else "auto",
+            tmpfiles,
+            combine="nested",
+            concat_dim="x",
+            engine=readengine,
+            parallel=parallel,
+            chunks=chunks if (not chunks and readengine != "zarr") else "auto",
         ) as actual:
             # check that using open_mfdataset returns dask arrays for variables
             assert isinstance(actual["foo"].data, dask_array_type)
@@ -4175,7 +4175,7 @@ def test_open_mfdataset_list_attr() -> None:
             with open_dataset(nfiles[1]) as ds2:
                 original = xr.concat([ds1, ds2], dim="x")
                 with xr.open_mfdataset(
-                        [nfiles[0], nfiles[1]], combine="nested", concat_dim="x"
+                    [nfiles[0], nfiles[1]], combine="nested", concat_dim="x"
                 ) as actual:
                     assert_identical(actual, original)
 
@@ -4230,13 +4230,13 @@ def gen_datasets_with_common_coord_and_time(self):
     @pytest.mark.parametrize("opt", ["all", "minimal", "different"])
     @pytest.mark.parametrize("join", ["outer", "inner", "left", "right"])
     def test_open_mfdataset_does_same_as_concat(
-            self, combine, concat_dim, opt, join
+        self, combine, concat_dim, opt, join
     ) -> None:
         with self.setup_files_and_datasets() as (files, [ds1, ds2]):
             if combine == "by_coords":
                 files.reverse()
             with open_mfdataset(
-                    files, data_vars=opt, combine=combine, concat_dim=concat_dim, join=join
+                files, data_vars=opt, combine=combine, concat_dim=concat_dim, join=join
             ) as ds:
                 ds_expect = xr.concat([ds1, ds2], data_vars=opt, dim="t", join=join)
                 assert_identical(ds, ds_expect)
@@ -4244,31 +4244,31 @@ def test_open_mfdataset_does_same_as_concat(
     @pytest.mark.parametrize(
         ["combine_attrs", "attrs", "expected", "expect_error"],
         (
-                pytest.param("drop", [{"a": 1}, {"a": 2}], {}, False, id="drop"),
-                pytest.param(
-                    "override", [{"a": 1}, {"a": 2}], {"a": 1}, False, id="override"
-                ),
-                pytest.param(
-                    "no_conflicts", [{"a": 1}, {"a": 2}], None, True, id="no_conflicts"
-                ),
-                pytest.param(
-                    "identical",
-                    [{"a": 1, "b": 2}, {"a": 1, "c": 3}],
-                    None,
-                    True,
-                    id="identical",
-                ),
-                pytest.param(
-                    "drop_conflicts",
-                    [{"a": 1, "b": 2}, {"b": -1, "c": 3}],
-                    {"a": 1, "c": 3},
-                    False,
-                    id="drop_conflicts",
-                ),
+            pytest.param("drop", [{"a": 1}, {"a": 2}], {}, False, id="drop"),
+            pytest.param(
+                "override", [{"a": 1}, {"a": 2}], {"a": 1}, False, id="override"
+            ),
+            pytest.param(
+                "no_conflicts", [{"a": 1}, {"a": 2}], None, True, id="no_conflicts"
+            ),
+            pytest.param(
+                "identical",
+                [{"a": 1, "b": 2}, {"a": 1, "c": 3}],
+                None,
+                True,
+                id="identical",
+            ),
+            pytest.param(
+                "drop_conflicts",
+                [{"a": 1, "b": 2}, {"b": -1, "c": 3}],
+                {"a": 1, "c": 3},
+                False,
+                id="drop_conflicts",
+            ),
         ),
     )
     def test_open_mfdataset_dataset_combine_attrs(
-            self, combine_attrs, attrs, expected, expect_error
+        self, combine_attrs, attrs, expected, expect_error
     ):
         with self.setup_files_and_datasets() as (files, [ds1, ds2]):
             # Give the files an inconsistent attribute
@@ -4288,10 +4288,10 @@ def test_open_mfdataset_dataset_combine_attrs(
                     )
             else:
                 with xr.open_mfdataset(
-                        files,
-                        combine="nested",
-                        concat_dim="t",
-                        combine_attrs=combine_attrs,
+                    files,
+                    combine="nested",
+                    concat_dim="t",
+                    combine_attrs=combine_attrs,
                 ) as ds:
                     assert ds.attrs == expected
 
@@ -4330,13 +4330,13 @@ def test_open_mfdataset_dataarray_attr_by_coords(self) -> None:
     )
     @pytest.mark.parametrize("opt", ["all", "minimal", "different"])
     def test_open_mfdataset_exact_join_raises_error(
-            self, combine, concat_dim, opt
+        self, combine, concat_dim, opt
     ) -> None:
         with self.setup_files_and_datasets(fuzz=0.1) as (files, [ds1, ds2]):
             if combine == "by_coords":
                 files.reverse()
             with pytest.raises(
-                    ValueError, match=r"cannot align objects.*join.*exact.*"
+                ValueError, match=r"cannot align objects.*join.*exact.*"
             ):
                 open_mfdataset(
                     files,
@@ -4352,7 +4352,7 @@ def test_common_coord_when_datavars_all(self) -> None:
         with self.setup_files_and_datasets() as (files, [ds1, ds2]):
             # open the files with the data_var option
             with open_mfdataset(
-                    files, data_vars=opt, combine="nested", concat_dim="t"
+                files, data_vars=opt, combine="nested", concat_dim="t"
             ) as ds:
                 coord_shape = ds[self.coord_name].shape
                 coord_shape1 = ds1[self.coord_name].shape
@@ -4370,7 +4370,7 @@ def test_common_coord_when_datavars_minimal(self) -> None:
         with self.setup_files_and_datasets() as (files, [ds1, ds2]):
             # open the files using data_vars option
             with open_mfdataset(
-                    files, data_vars=opt, combine="nested", concat_dim="t"
+                files, data_vars=opt, combine="nested", concat_dim="t"
             ) as ds:
                 coord_shape = ds[self.coord_name].shape
                 coord_shape1 = ds1[self.coord_name].shape
@@ -4404,7 +4404,7 @@ def create_store(self):
 
     @contextlib.contextmanager
     def roundtrip(
-            self, data, save_kwargs=None, open_kwargs=None, allow_cleanup_failure=False
+        self, data, save_kwargs=None, open_kwargs=None, allow_cleanup_failure=False
     ):
         yield data.chunk()
 
@@ -4460,13 +4460,13 @@ def test_open_mfdataset(self) -> None:
                 original.isel(x=slice(5)).to_netcdf(tmp1)
                 original.isel(x=slice(5, 10)).to_netcdf(tmp2)
                 with open_mfdataset(
-                        [tmp1, tmp2], concat_dim="x", combine="nested"
+                    [tmp1, tmp2], concat_dim="x", combine="nested"
                 ) as actual:
                     assert isinstance(actual.foo.variable.data, da.Array)
                     assert actual.foo.variable.data.chunks == ((5, 5),)
                     assert_identical(original, actual)
                 with open_mfdataset(
-                        [tmp1, tmp2], concat_dim="x", combine="nested", chunks={"x": 3}
+                    [tmp1, tmp2], concat_dim="x", combine="nested", chunks={"x": 3}
                 ) as actual:
                     assert actual.foo.variable.data.chunks == ((3, 2, 3, 2),)
 
@@ -4494,18 +4494,18 @@ def test_open_mfdataset_2d(self) -> None:
                         original.isel(x=slice(5), y=slice(4, 8)).to_netcdf(tmp3)
                         original.isel(x=slice(5, 10), y=slice(4, 8)).to_netcdf(tmp4)
                         with open_mfdataset(
-                                [[tmp1, tmp2], [tmp3, tmp4]],
-                                combine="nested",
-                                concat_dim=["y", "x"],
+                            [[tmp1, tmp2], [tmp3, tmp4]],
+                            combine="nested",
+                            concat_dim=["y", "x"],
                         ) as actual:
                             assert isinstance(actual.foo.variable.data, da.Array)
                             assert actual.foo.variable.data.chunks == ((5, 5), (4, 4))
                             assert_identical(original, actual)
                         with open_mfdataset(
-                                [[tmp1, tmp2], [tmp3, tmp4]],
-                                combine="nested",
-                                concat_dim=["y", "x"],
-                                chunks={"x": 3, "y": 2},
+                            [[tmp1, tmp2], [tmp3, tmp4]],
+                            combine="nested",
+                            concat_dim=["y", "x"],
+                            chunks={"x": 3, "y": 2},
                         ) as actual:
                             assert actual.foo.variable.data.chunks == (
                                 (3, 2, 3, 2),
@@ -4521,7 +4521,7 @@ def test_open_mfdataset_pathlib(self) -> None:
                 original.isel(x=slice(5)).to_netcdf(tmp1)
                 original.isel(x=slice(5, 10)).to_netcdf(tmp2)
                 with open_mfdataset(
-                        [tmp1, tmp2], concat_dim="x", combine="nested"
+                    [tmp1, tmp2], concat_dim="x", combine="nested"
                 ) as actual:
                     assert_identical(original, actual)
 
@@ -4540,9 +4540,9 @@ def test_open_mfdataset_2d_pathlib(self) -> None:
                         original.isel(x=slice(5), y=slice(4, 8)).to_netcdf(tmp3)
                         original.isel(x=slice(5, 10), y=slice(4, 8)).to_netcdf(tmp4)
                         with open_mfdataset(
-                                [[tmp1, tmp2], [tmp3, tmp4]],
-                                combine="nested",
-                                concat_dim=["y", "x"],
+                            [[tmp1, tmp2], [tmp3, tmp4]],
+                            combine="nested",
+                            concat_dim=["y", "x"],
                         ) as actual:
                             assert_identical(original, actual)
 
@@ -4554,7 +4554,7 @@ def test_open_mfdataset_2(self) -> None:
                 original.isel(x=slice(5, 10)).to_netcdf(tmp2)
 
                 with open_mfdataset(
-                        [tmp1, tmp2], concat_dim="x", combine="nested"
+                    [tmp1, tmp2], concat_dim="x", combine="nested"
                 ) as actual:
                     assert_identical(original, actual)
 
@@ -4569,7 +4569,7 @@ def test_attrs_mfdataset(self) -> None:
                 ds1.to_netcdf(tmp1)
                 ds2.to_netcdf(tmp2)
                 with open_mfdataset(
-                        [tmp1, tmp2], concat_dim="x", combine="nested"
+                    [tmp1, tmp2], concat_dim="x", combine="nested"
                 ) as actual:
                     # presumes that attributes inherited from
                     # first dataset loaded
@@ -4588,7 +4588,7 @@ def test_open_mfdataset_attrs_file(self) -> None:
             ds1.to_netcdf(tmp1)
             ds2.to_netcdf(tmp2)
             with open_mfdataset(
-                    [tmp1, tmp2], concat_dim="x", combine="nested", attrs_file=tmp2
+                [tmp1, tmp2], concat_dim="x", combine="nested", attrs_file=tmp2
             ) as actual:
                 # attributes are inherited from the master file
                 assert actual.attrs["test2"] == ds2.attrs["test2"]
@@ -4607,7 +4607,7 @@ def test_open_mfdataset_attrs_file_path(self) -> None:
             ds1.to_netcdf(tmp1)
             ds2.to_netcdf(tmp2)
             with open_mfdataset(
-                    [tmp1, tmp2], concat_dim="x", combine="nested", attrs_file=tmp2
+                [tmp1, tmp2], concat_dim="x", combine="nested", attrs_file=tmp2
             ) as actual:
                 # attributes are inherited from the master file
                 assert actual.attrs["test2"] == ds2.attrs["test2"]
@@ -4666,7 +4666,7 @@ def preprocess(ds):
 
             expected = preprocess(original)
             with open_mfdataset(
-                    tmp, preprocess=preprocess, combine="by_coords"
+                tmp, preprocess=preprocess, combine="by_coords"
             ) as actual:
                 assert_identical(expected, actual)
 
@@ -4677,7 +4677,7 @@ def test_save_mfdataset_roundtrip(self) -> None:
             with create_tmp_file() as tmp2:
                 save_mfdataset(datasets, [tmp1, tmp2])
                 with open_mfdataset(
-                        [tmp1, tmp2], concat_dim="x", combine="nested"
+                    [tmp1, tmp2], concat_dim="x", combine="nested"
                 ) as actual:
                     assert_identical(actual, original)
 
@@ -4703,7 +4703,7 @@ def test_save_mfdataset_pathlib_roundtrip(self) -> None:
                 tmp2 = Path(tmps2)
                 save_mfdataset(datasets, [tmp1, tmp2])
                 with open_mfdataset(
-                        [tmp1, tmp2], concat_dim="x", combine="nested"
+                    [tmp1, tmp2], concat_dim="x", combine="nested"
                 ) as actual:
                     assert_identical(actual, original)
 
@@ -4745,7 +4745,7 @@ def test_open_mfdataset_concat_dim_none(self) -> None:
                 data.to_netcdf(tmp1)
                 Dataset({"x": np.nan}).to_netcdf(tmp2)
                 with open_mfdataset(
-                        [tmp1, tmp2], concat_dim=None, combine="nested"
+                    [tmp1, tmp2], concat_dim=None, combine="nested"
                 ) as actual:
                     assert_identical(data, actual)
 
@@ -4807,7 +4807,7 @@ def test_open_multi_dataset(self) -> None:
             original.to_netcdf(tmp1)
             original.to_netcdf(tmp2)
             with open_mfdataset(
-                    [tmp1, tmp2], concat_dim=dim, combine="nested"
+                [tmp1, tmp2], concat_dim=dim, combine="nested"
             ) as actual:
                 assert_identical(expected, actual)
 
@@ -4861,7 +4861,7 @@ def test_save_mfdataset_compute_false_roundtrip(self) -> None:
                 assert isinstance(delayed_obj, Delayed)
                 delayed_obj.compute()
                 with open_mfdataset(
-                        [tmp1, tmp2], combine="nested", concat_dim="x"
+                    [tmp1, tmp2], combine="nested", concat_dim="x"
                 ) as actual:
                     assert_identical(actual, original)
 
@@ -5339,7 +5339,7 @@ def test_use_cftime_standard_calendar_default_in_range(calendar) -> None:
 @pytest.mark.parametrize("calendar", _STANDARD_CALENDARS)
 @pytest.mark.parametrize("units_year", [1500, 2500])
 def test_use_cftime_standard_calendar_default_out_of_range(
-        calendar, units_year
+    calendar, units_year
 ) -> None:
     import cftime
 
@@ -5525,7 +5525,7 @@ def test_open_fsspec() -> None:
     import zarr
 
     if not hasattr(zarr.storage, "FSStore") or not hasattr(
-            zarr.storage.FSStore, "getitems"
+        zarr.storage.FSStore, "getitems"
     ):
         pytest.skip("zarr too old")
 
@@ -5608,7 +5608,7 @@ def test_open_dataset_chunking_zarr(chunks, tmp_path: Path) -> None:
     with dask.config.set({"array.chunk-size": "1MiB"}):
         expected = ds.chunk(chunks)
         with open_dataset(
-                tmp_path / "test.zarr", engine="zarr", chunks=chunks
+            tmp_path / "test.zarr", engine="zarr", chunks=chunks
         ) as actual:
             xr.testing.assert_chunks_equal(actual, expected)
 
@@ -5639,7 +5639,7 @@ def test_chunking_consintency(chunks, tmp_path: Path) -> None:
     with dask.config.set({"array.chunk-size": "1MiB"}):
         expected = ds.chunk(chunks)
         with xr.open_dataset(
-                tmp_path / "test.zarr", engine="zarr", chunks=chunks
+            tmp_path / "test.zarr", engine="zarr", chunks=chunks
         ) as actual:
             xr.testing.assert_chunks_equal(actual, expected)
 
@@ -5733,7 +5733,7 @@ def test_h5netcdf_entrypoint(tmp_path: Path) -> None:
 @requires_netCDF4
 @pytest.mark.parametrize("str_type", (str, np.str_))
 def test_write_file_from_np_str(
-        str_type: type[str] | type[np.str_], tmpdir: str
+    str_type: type[str] | type[np.str_], tmpdir: str
 ) -> None:
     # https://github.com/pydata/xarray/pull/5264
     scenarios = [str_type(v) for v in ["scenario_a", "scenario_b", "scenario_c"]]
@@ -5799,7 +5799,7 @@ def test_raise_writing_to_nczarr(self, mode) -> None:
         with create_tmp_file(suffix=".zarr") as tmp:
             ds = self._create_nczarr(tmp)
             with pytest.raises(
-                    KeyError, match="missing the attribute `_ARRAY_DIMENSIONS`,"
+                KeyError, match="missing the attribute `_ARRAY_DIMENSIONS`,"
             ):
                 ds.to_zarr(tmp, mode=mode)
 
@@ -5948,10 +5948,10 @@ def test_zarr_region_index_write(self, tmp_path):
         region: Mapping[str, slice] | Literal["auto"]
         for region in [region_slice, "auto"]:  # type: ignore[assignment]
             with patch.object(
-                    ZarrStore,
-                    "set_variables",
-                    side_effect=ZarrStore.set_variables,
-                    autospec=True,
+                ZarrStore,
+                "set_variables",
+                side_effect=ZarrStore.set_variables,
+                autospec=True,
             ) as mock:
                 ds_region.to_zarr(tmp_path / "test.zarr", region=region, mode="r+")