Skip to content

deprecate encoding setters #7708

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Open
wants to merge 3 commits into
base: main
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
5 changes: 3 additions & 2 deletions doc/user-guide/io.rst
Original file line number Diff line number Diff line change
Expand Up @@ -519,8 +519,9 @@ difference between values, with a reference time of the first time value.
Coordinates
...........

You can control the ``coordinates`` attribute written to disk by specifying ``DataArray.encoding["coordinates"]``.
If not specified, xarray automatically sets ``DataArray.encoding["coordinates"]`` to a space-delimited list
You can control the ``coordinates`` attribute written to disk by specifying the ``coordinates`` key in the
output encoding keyword argument to `to_netcdf` or `to_zarr`. If not specified, xarray automatically sets
the ``coordinates`` attribute for each variable to a space-delimited list
of names of coordinate variables that share dimensions with the ``DataArray`` being written.
This allows perfect roundtripping of xarray datasets but may not be desirable.
When an xarray ``Dataset`` contains non-dimensional coordinates that do not share dimensions with any of
Expand Down
4 changes: 3 additions & 1 deletion doc/whats-new.rst
Original file line number Diff line number Diff line change
Expand Up @@ -36,7 +36,9 @@ Breaking changes

Deprecations
~~~~~~~~~~~~

- Setting encoding on Xarray objects directly via the ``encoding`` attribute is deprecated in favor of using the
``encoding`` kwarg in :py:meth:`Dataset.to_netcdf` / :py:meth:`Dataset.to_zarr`. (:issue:`6323`, :pull:`7708`).
By `Joe Hamman <https://github.com/jhamman>`_.

Bug fixes
~~~~~~~~~
Expand Down
2 changes: 1 addition & 1 deletion xarray/backends/api.py
Original file line number Diff line number Diff line change
Expand Up @@ -1254,7 +1254,7 @@ def dump_to_store(
for k, enc in encoding.items():
# no need to shallow copy the variable again; that already happened
# in encode_dataset_coordinates
variables[k].encoding = enc
variables[k]._set_encoding_internal(enc)
check_encoding.add(k)

if encoder:
Expand Down
2 changes: 1 addition & 1 deletion xarray/backends/store.py
Original file line number Diff line number Diff line change
Expand Up @@ -58,7 +58,7 @@ def open_dataset( # type: ignore[override] # allow LSP violation, not supporti
ds = Dataset(vars, attrs=attrs)
ds = ds.set_coords(coord_names.intersection(vars))
ds.set_close(filename_or_obj.close)
ds.encoding = encoding
ds._set_encoding_internal(encoding)

return ds

Expand Down
6 changes: 4 additions & 2 deletions xarray/backends/zarr.py
Original file line number Diff line number Diff line change
Expand Up @@ -594,7 +594,9 @@ def store(
vars_with_encoding = {}
for vn in existing_variable_names:
vars_with_encoding[vn] = variables[vn].copy(deep=False)
vars_with_encoding[vn].encoding = existing_vars[vn].encoding
vars_with_encoding[vn]._set_encoding_internal(
existing_vars[vn].encoding
)
vars_with_encoding, _ = self.encode(vars_with_encoding, {})
variables_encoded.update(vars_with_encoding)

Expand Down Expand Up @@ -650,7 +652,7 @@ def set_variables(self, variables, check_encoding_set, writer, unlimited_dims=No

fill_value = attrs.pop("_FillValue", None)
if v.encoding == {"_FillValue": None} and fill_value is None:
v.encoding = {}
v._encoding = {}

if name in self.zarr_group:
# existing variable
Expand Down
2 changes: 1 addition & 1 deletion xarray/conventions.py
Original file line number Diff line number Diff line change
Expand Up @@ -694,7 +694,7 @@ def decode_cf(
ds = Dataset(vars, attrs=attrs)
ds = ds.set_coords(coord_names.union(extra_coords).intersection(vars))
ds.set_close(close)
ds.encoding = encoding
ds._set_encoding_internal(encoding)

return ds

Expand Down
2 changes: 1 addition & 1 deletion xarray/core/alignment.py
Original file line number Diff line number Diff line change
Expand Up @@ -548,7 +548,7 @@ def _reindex_one(
self.exclude_dims,
self.exclude_vars,
)
new_obj.encoding = obj.encoding
new_obj._set_encoding_internal(obj.encoding)
return new_obj

def reindex_all(self) -> None:
Expand Down
2 changes: 1 addition & 1 deletion xarray/core/concat.py
Original file line number Diff line number Diff line change
Expand Up @@ -643,7 +643,7 @@ def get_indexes(name):
f"Variables {absent_coord_names!r} are coordinates in some datasets but not others."
)
result = result.set_coords(coord_names)
result.encoding = result_encoding
result._set_encoding_internal(result_encoding)

result = result.drop_vars(unlabeled_dims, errors="ignore")

Expand Down
12 changes: 11 additions & 1 deletion xarray/core/dataarray.py
Original file line number Diff line number Diff line change
Expand Up @@ -870,13 +870,23 @@ def attrs(self, value: Mapping[Any, Any]) -> None:
@property
def encoding(self) -> dict[Any, Any]:
"""Dictionary of format-specific settings for how this array should be
serialized."""
serialized.

.. warning::
Setting encoding directly using the encoding property is deprecated.
Use the encoding kwarg in to_netcdf/to_zarr to set output encoding.
"""
return self.variable.encoding

@encoding.setter
def encoding(self, value: Mapping[Any, Any]) -> None:
# deprecated (variable setter will warn)
self.variable.encoding = dict(value)

def _set_encoding_internal(self, value: Mapping[Any, Any]) -> None:
"""temporary method to set encoding without issuing a FutureWarning"""
self.variable._set_encoding_internal(value)
Comment on lines 881 to +888
Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

In the future when @encoding.setter is gone, we will likely still wan/need a way to insert encoding attributes onto objects (coding and backends currently require this). I'm open to new names but this method is intended to be that code path.


def reset_encoding(self: T_DataArray) -> T_DataArray:
"""Return a new DataArray without encoding on the array or any attached
coords."""
Expand Down
24 changes: 21 additions & 3 deletions xarray/core/dataset.py
Original file line number Diff line number Diff line change
Expand Up @@ -657,14 +657,32 @@ def attrs(self, value: Mapping[Any, Any]) -> None:

@property
def encoding(self) -> dict[Any, Any]:
"""Dictionary of global encoding attributes on this dataset"""
"""Dictionary of global encoding attributes on this dataset

.. warning::
Setting encoding directly using the encoding property is deprecated.
Use the encoding kwarg in to_netcdf/to_zarr to set output encoding.
"""
if self._encoding is None:
self._encoding = {}
return self._encoding

@encoding.setter
def encoding(self, value: Mapping[Any, Any]) -> None:
self._encoding = dict(value)
warnings.warn(
"Setting encoding directly using the encoding property is "
"deprecated. Use the encoding kwarg in to_netcdf/to_zarr to "
"set output encoding.",
category=FutureWarning,
)
self._set_encoding_internal(value)

def _set_encoding_internal(self, value: Mapping[Any, Any]) -> None:
"""temporary method to set encoding without issuing a FutureWarning"""
try:
self._encoding = dict(value)
except ValueError:
raise ValueError("encoding must be castable to a dictionary")

def reset_encoding(self: T_Dataset) -> T_Dataset:
"""Return a new Dataset without encoding on the dataset or any of its
Expand Down Expand Up @@ -2821,7 +2839,7 @@ def _reindex_callback(
var = self._variables.get(name)
if var is not None:
new_var.attrs = var.attrs
new_var.encoding = var.encoding
new_var._set_encoding_internal(var.encoding)

# pass through indexes from excluded dimensions
# no extra check needed for multi-coordinate indexes, potential conflicts
Expand Down
4 changes: 2 additions & 2 deletions xarray/core/parallel.py
Original file line number Diff line number Diff line change
Expand Up @@ -553,7 +553,7 @@ def subset_dataset_to_block(

for index in result._indexes:
result[index].attrs = template[index].attrs
result[index].encoding = template[index].encoding
result[index]._set_encoding_internal(template[index].encoding)

for name, gname_l in var_key_map.items():
dims = template[name].dims
Expand All @@ -571,7 +571,7 @@ def subset_dataset_to_block(
hlg, name=gname_l, chunks=var_chunks, dtype=template[name].dtype
)
result[name] = (dims, data, template[name].attrs)
result[name].encoding = template[name].encoding
result[name]._set_encoding_internal(template[name].encoding)

result = result.set_coords(template._coord_names)

Expand Down
21 changes: 18 additions & 3 deletions xarray/core/variable.py
Original file line number Diff line number Diff line change
Expand Up @@ -365,7 +365,7 @@ def __init__(self, dims, data, attrs=None, encoding=None, fastpath=False):
if attrs is not None:
self.attrs = attrs
if encoding is not None:
self.encoding = encoding
self._set_encoding_internal(encoding)

@property
def dtype(self):
Expand Down Expand Up @@ -969,13 +969,28 @@ def attrs(self, value: Mapping[Any, Any]) -> None:

@property
def encoding(self) -> dict[Any, Any]:
"""Dictionary of encodings on this variable."""
"""Dictionary of encodings on this variable.

.. warning::
Setting encoding directly using the encoding property is deprecated.
Use the encoding kwarg in to_netcdf/to_zarr to set output encoding.
"""
if self._encoding is None:
self._encoding = {}
return self._encoding

@encoding.setter
def encoding(self, value):
def encoding(self, value: Mapping[Any, Any]) -> None:
warnings.warn(
"Setting encoding directly using the encoding property is "
"deprecated. Use the encoding kwarg in to_netcdf/to_zarr to "
"set output encoding.",
category=FutureWarning,
)
self._set_encoding_internal(value)

def _set_encoding_internal(self, value: Mapping[Any, Any]) -> None:
"""temporary method to set encoding without issuing a FutureWarning"""
try:
self._encoding = dict(value)
except ValueError:
Expand Down
2 changes: 1 addition & 1 deletion xarray/tests/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -261,7 +261,7 @@ def create_test_data(seed: int | None = None, add_attrs: bool = True) -> Dataset
"dim3",
np.array([0, 1, 2, 0, 0, 1, 1, 2, 2, 3], dtype="int64"),
)
obj.encoding = {"foo": "bar"}
obj._encoding = {"foo": "bar"}
assert all(obj.data.flags.writeable for obj in obj.variables.values())
return obj

Expand Down
6 changes: 6 additions & 0 deletions xarray/tests/test_backends.py
Original file line number Diff line number Diff line change
Expand Up @@ -512,6 +512,7 @@ def test_roundtrip_string_data(self) -> None:
with self.roundtrip(expected) as actual:
assert_identical(expected, actual)

@pytest.mark.filterwarnings("ignore:Setting encoding directly.*:FutureWarning")
Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I've explicitly marked all tests that use the encoding setter with this filter. When we remove the setter, we'll need to come back through and rework these tests.

def test_roundtrip_string_encoded_characters(self) -> None:
expected = Dataset({"x": ("t", ["ab", "cdef"])})
expected["x"].encoding["dtype"] = "S1"
Expand Down Expand Up @@ -1504,6 +1505,7 @@ def test_encoding_kwarg_compression(self) -> None:

assert ds.x.encoding == {}

@pytest.mark.filterwarnings("ignore:Setting encoding directly.*:FutureWarning")
def test_keep_chunksizes_if_no_original_shape(self) -> None:
ds = Dataset({"x": [1, 2, 3]})
chunksizes = (2,)
Expand All @@ -1515,6 +1517,7 @@ def test_keep_chunksizes_if_no_original_shape(self) -> None:
ds["x"].encoding["chunksizes"], actual["x"].encoding["chunksizes"]
)

@pytest.mark.filterwarnings("ignore:Setting encoding directly.*:FutureWarning")
def test_encoding_chunksizes_unlimited(self) -> None:
# regression test for GH1225
ds = Dataset({"x": [1, 2, 3], "y": ("x", [2, 3, 4])})
Expand Down Expand Up @@ -1581,6 +1584,7 @@ def test_read_variable_len_strings(self) -> None:
with open_dataset(tmp_file, **cast(dict, kwargs)) as actual:
assert_identical(expected, actual)

@pytest.mark.filterwarnings("ignore:Setting encoding directly.*:FutureWarning")
def test_encoding_unlimited_dims(self) -> None:
ds = Dataset({"x": ("y", np.arange(10.0))})
with self.roundtrip(ds, save_kwargs=dict(unlimited_dims=["y"])) as actual:
Expand Down Expand Up @@ -2855,6 +2859,7 @@ def test_cross_engine_read_write_netcdf3(self) -> None:
for k in data.variables
]

@pytest.mark.filterwarnings("ignore:Setting encoding directly.*:FutureWarning")
def test_encoding_unlimited_dims(self) -> None:
ds = Dataset({"x": ("y", np.arange(10.0))})
with self.roundtrip(ds, save_kwargs=dict(unlimited_dims=["y"])) as actual:
Expand Down Expand Up @@ -2938,6 +2943,7 @@ def test_read_byte_attrs_as_unicode(self) -> None:
expected = Dataset(attrs={"foo": "bar"})
assert_identical(expected, actual)

@pytest.mark.filterwarnings("ignore:Setting encoding directly.*:FutureWarning")
def test_encoding_unlimited_dims(self) -> None:
ds = Dataset({"x": ("y", np.arange(10.0))})
with self.roundtrip(ds, save_kwargs=dict(unlimited_dims=["y"])) as actual:
Expand Down
1 change: 1 addition & 0 deletions xarray/tests/test_coding.py
Original file line number Diff line number Diff line change
Expand Up @@ -56,6 +56,7 @@ def test_CFMaskCoder_encode_missing_fill_values_conflict(data, encoding) -> None
assert_identical(roundtripped, original)


@pytest.mark.filterwarnings("ignore:Setting encoding directly.*:FutureWarning")
def test_CFMaskCoder_missing_value() -> None:
expected = xr.DataArray(
np.array([[26915, 27755, -9999, 27705], [25595, -9999, 28315, -9999]]),
Expand Down
1 change: 1 addition & 0 deletions xarray/tests/test_coding_times.py
Original file line number Diff line number Diff line change
Expand Up @@ -712,6 +712,7 @@ def test_decode_cf_time_bounds() -> None:


@requires_cftime
@pytest.mark.filterwarnings("ignore:Setting encoding directly.*:FutureWarning")
def test_encode_time_bounds() -> None:
time = pd.date_range("2000-01-16", periods=1)
time_bounds = pd.date_range("2000-01-01", periods=2, freq="MS")
Expand Down
1 change: 1 addition & 0 deletions xarray/tests/test_concat.py
Original file line number Diff line number Diff line change
Expand Up @@ -1015,6 +1015,7 @@ def test_concat(self) -> None:
with pytest.raises(ValueError, match=r"not a valid argument"):
concat([foo, bar], dim="w", data_vars="minimal")

@pytest.mark.filterwarnings("ignore:Setting encoding directly.*:FutureWarning")
def test_concat_encoding(self) -> None:
# Regression test for GH1297
ds = Dataset(
Expand Down
1 change: 1 addition & 0 deletions xarray/tests/test_conventions.py
Original file line number Diff line number Diff line change
Expand Up @@ -154,6 +154,7 @@ def test_var_with_coord_attr(self) -> None:
# Should not have any global coordinates.
assert "coordinates" not in attrs

@pytest.mark.filterwarnings("ignore:Setting encoding directly.*:FutureWarning")
def test_do_not_overwrite_user_coordinates(self) -> None:
orig = Dataset(
coords={"x": [0, 1, 2], "y": ("x", [5, 6, 7]), "z": ("x", [8, 9, 10])},
Expand Down
12 changes: 12 additions & 0 deletions xarray/tests/test_dataarray.py
Original file line number Diff line number Diff line change
Expand Up @@ -269,6 +269,7 @@ def test_sizes(self) -> None:
with pytest.raises(TypeError):
array.sizes["foo"] = 5 # type: ignore

@pytest.mark.filterwarnings("ignore:Setting encoding directly.*:FutureWarning")
def test_encoding(self) -> None:
expected = {"foo": "bar"}
self.dv.encoding["foo"] = "bar"
Expand All @@ -278,6 +279,7 @@ def test_encoding(self) -> None:
self.dv.encoding = expected2
assert expected2 is not self.dv.encoding

@pytest.mark.filterwarnings("ignore:Setting encoding directly.*:FutureWarning")
def test_reset_encoding(self) -> None:
array = self.mda
encoding = {"scale_factor": 10}
Expand Down Expand Up @@ -3324,6 +3326,7 @@ def test_series_categorical_index(self) -> None:
arr = DataArray(s)
assert "'a'" in repr(arr) # should not error

@pytest.mark.filterwarnings("ignore:Setting encoding directly.*:FutureWarning")
@pytest.mark.parametrize("encoding", [True, False])
def test_to_and_from_dict(self, encoding) -> None:
array = DataArray(
Expand Down Expand Up @@ -6735,3 +6738,12 @@ def test_error_on_ellipsis_without_list(self) -> None:
da = DataArray([[1, 2], [1, 2]], dims=("x", "y"))
with pytest.raises(ValueError):
da.stack(flat=...) # type: ignore


def test_setting_encoding_property_warns_deprecated():
da = DataArray([0, 1, 2])
with pytest.warns(
FutureWarning,
match=r"Setting encoding directly using the encoding property is deprecated.*",
):
da.encoding = {"dtype": "f4"}
17 changes: 17 additions & 0 deletions xarray/tests/test_dataset.py
Original file line number Diff line number Diff line change
Expand Up @@ -2873,6 +2873,7 @@ def test_copy_with_data_errors(self) -> None:
with pytest.raises(ValueError, match=r"contain all variables in original"):
orig.copy(data={"var1": new_var1})

@pytest.mark.filterwarnings("ignore:Setting encoding directly.*:FutureWarning")
def test_reset_encoding(self) -> None:
orig = create_test_data()
vencoding = {"scale_factor": 10}
Expand Down Expand Up @@ -3037,6 +3038,7 @@ def test_rename_multiindex(self) -> None:
with pytest.warns(UserWarning, match="does not create an index anymore"):
original.rename({"a": "b"})

@pytest.mark.filterwarnings("ignore:Setting encoding directly.*:FutureWarning")
def test_rename_perserve_attrs_encoding(self) -> None:
# test propagate attrs/encoding to new variable(s) created from Index object
original = Dataset(coords={"x": ("x", [0, 1, 2])})
Expand Down Expand Up @@ -7024,3 +7026,18 @@ def test_transpose_error() -> None:
),
):
ds.transpose(["y", "x"]) # type: ignore


def test_setting_encoding_property_warns_deprecated():
ds = xr.Dataset({"foo": (("x", "y"), [[21]]), "bar": (("x", "y"), [[12]])})
with pytest.warns(
FutureWarning,
match=r"Setting encoding directly using the encoding property is deprecated.*",
):
ds.encoding = {"unlimited_dims": {"x"}}

with pytest.warns(
FutureWarning,
match=r"Setting encoding directly using the encoding property is deprecated.*",
):
ds["foo"].encoding = {"dtype": "f4"}
9 changes: 9 additions & 0 deletions xarray/tests/test_variable.py
Original file line number Diff line number Diff line change
Expand Up @@ -2963,3 +2963,12 @@ def test_pandas_two_only_timedelta_conversion_warning() -> None:
var = Variable(["time"], data)

assert var.dtype == np.dtype("timedelta64[ns]")


def test_setting_encoding_property_warns_deprecated():
v = Variable("x", [1, 2, 3])
with pytest.warns(
FutureWarning,
match=r"Setting encoding directly using the encoding property is deprecated.*",
):
v.encoding = {"dtype": "f4"}