Feature/to dict encoding (#6635)

Joe Hamman · Illviljan · web-flow · commit 0ea1e3554120 · 2022-05-26T15:17:35.000-04:00
* add encoding option to dataset/dataarray/variable to_dict methods

* Update xarray/core/dataset.py

Co-authored-by: Illviljan &lt;14371165+Illviljan@users.noreply.github.com&gt;

* Update xarray/tests/test_dataarray.py

Co-authored-by: Illviljan &lt;14371165+Illviljan@users.noreply.github.com&gt;

* type dict in to_dict

* add whats new

Co-authored-by: Illviljan &lt;14371165+Illviljan@users.noreply.github.com&gt;
diff --git a/doc/whats-new.rst b/doc/whats-new.rst
@@ -48,6 +48,9 @@ New Features
   is faster and requires less memory. (:pull:`6548`)
   By `Michael Niklas <https://github.com/headtr1ck>`_.
 - Improved overall typing.
+- :py:meth:`Dataset.to_dict` and :py:meth:`DataArray.to_dict` may now optionally include encoding
+  attributes. (:pull:`6635`)
+  By Joe Hamman <https://github.com/jhamman>`_.
 
 Breaking changes
 ~~~~~~~~~~~~~~~~
diff --git a/xarray/core/dataarray.py b/xarray/core/dataarray.py
@@ -3067,7 +3067,7 @@ def to_netcdf(
             invalid_netcdf=invalid_netcdf,
         )
 
-    def to_dict(self, data: bool = True) -> dict:
+    def to_dict(self, data: bool = True, encoding: bool = False) -> dict:
         """
         Convert this xarray.DataArray into a dictionary following xarray
         naming conventions.
@@ -3081,15 +3081,20 @@ def to_dict(self, data: bool = True) -> dict:
         data : bool, optional
             Whether to include the actual data in the dictionary. When set to
             False, returns just the schema.
+        encoding : bool, optional
+            Whether to include the Dataset's encoding in the dictionary.
 
         See Also
         --------
         DataArray.from_dict
+        Dataset.to_dict
         """
         d = self.variable.to_dict(data=data)
         d.update({"coords": {}, "name": self.name})
         for k in self.coords:
             d["coords"][k] = self.coords[k].variable.to_dict(data=data)
+        if encoding:
+            d["encoding"] = dict(self.encoding)
         return d
 
     @classmethod
@@ -3155,6 +3160,9 @@ def from_dict(cls, d: dict) -> DataArray:
             raise ValueError("cannot convert dict without the key 'data''")
         else:
             obj = cls(data, coords, d.get("dims"), d.get("name"), d.get("attrs"))
+
+        obj.encoding.update(d.get("encoding", {}))
+
         return obj
 
     @classmethod
diff --git a/xarray/core/dataset.py b/xarray/core/dataset.py
@@ -5944,7 +5944,7 @@ def to_dask_dataframe(self, dim_order=None, set_index=False):
 
         return df
 
-    def to_dict(self, data=True):
+    def to_dict(self, data: bool = True, encoding: bool = False) -> dict:
         """
         Convert this dataset to a dictionary following xarray naming
         conventions.
@@ -5958,21 +5958,34 @@ def to_dict(self, data=True):
         data : bool, optional
             Whether to include the actual data in the dictionary. When set to
             False, returns just the schema.
+        encoding : bool, optional
+            Whether to include the Dataset's encoding in the dictionary.
+
+        Returns
+        -------
+        d : dict
 
         See Also
         --------
         Dataset.from_dict
+        DataArray.to_dict
         """
-        d = {
+        d: dict = {
             "coords": {},
             "attrs": decode_numpy_dict_values(self.attrs),
             "dims": dict(self.dims),
             "data_vars": {},
         }
         for k in self.coords:
-            d["coords"].update({k: self[k].variable.to_dict(data=data)})
+            d["coords"].update(
+                {k: self[k].variable.to_dict(data=data, encoding=encoding)}
+            )
         for k in self.data_vars:
-            d["data_vars"].update({k: self[k].variable.to_dict(data=data)})
+            d["data_vars"].update(
+                {k: self[k].variable.to_dict(data=data, encoding=encoding)}
+            )
+        if encoding:
+            d["encoding"] = dict(self.encoding)
         return d
 
     @classmethod
@@ -6061,6 +6074,7 @@ def from_dict(cls, d):
         obj = obj.set_coords(coords)
 
         obj.attrs.update(d.get("attrs", {}))
+        obj.encoding.update(d.get("encoding", {}))
 
         return obj
 
diff --git a/xarray/core/variable.py b/xarray/core/variable.py
@@ -533,13 +533,17 @@ def to_index(self):
         """Convert this variable to a pandas.Index"""
         return self.to_index_variable().to_index()
 
-    def to_dict(self, data=True):
+    def to_dict(self, data: bool = True, encoding: bool = False) -> dict:
         """Dictionary representation of variable."""
         item = {"dims": self.dims, "attrs": decode_numpy_dict_values(self.attrs)}
         if data:
             item["data"] = ensure_us_time_resolution(self.values).tolist()
         else:
             item.update({"dtype": str(self.dtype), "shape": self.shape})
+
+        if encoding:
+            item["encoding"] = dict(self.encoding)
+
         return item
 
     @property
diff --git a/xarray/tests/test_dataarray.py b/xarray/tests/test_dataarray.py
@@ -3140,18 +3140,22 @@ def test_series_categorical_index(self):
         arr = DataArray(s)
         assert "'a'" in repr(arr)  # should not error
 
-    def test_to_and_from_dict(self):
+    @pytest.mark.parametrize("encoding", [True, False])
+    def test_to_and_from_dict(self, encoding) -> None:
         array = DataArray(
             np.random.randn(2, 3), {"x": ["a", "b"]}, ["x", "y"], name="foo"
         )
+        array.encoding = {"bar": "spam"}
         expected = {
             "name": "foo",
             "dims": ("x", "y"),
             "data": array.values.tolist(),
             "attrs": {},
             "coords": {"x": {"dims": ("x",), "data": ["a", "b"], "attrs": {}}},
         }
-        actual = array.to_dict()
+        if encoding:
+            expected["encoding"] = {"bar": "spam"}
+        actual = array.to_dict(encoding=encoding)
 
         # check that they are identical
         assert expected == actual
@@ -3198,7 +3202,7 @@ def test_to_and_from_dict(self):
         endiantype = "<U1" if sys.byteorder == "little" else ">U1"
         expected_no_data["coords"]["x"].update({"dtype": endiantype, "shape": (2,)})
         expected_no_data.update({"dtype": "float64", "shape": (2, 3)})
-        actual_no_data = array.to_dict(data=False)
+        actual_no_data = array.to_dict(data=False, encoding=encoding)
         assert expected_no_data == actual_no_data
 
     def test_to_and_from_dict_with_time_dim(self):