Skip to content

Commit 0ea1e35

Browse files
Joe HammanIllviljan
Joe Hamman
andauthored
Feature/to dict encoding (#6635)
* add encoding option to dataset/dataarray/variable to_dict methods * Update xarray/core/dataset.py Co-authored-by: Illviljan <[email protected]> * Update xarray/tests/test_dataarray.py Co-authored-by: Illviljan <[email protected]> * type dict in to_dict * add whats new Co-authored-by: Illviljan <[email protected]>
1 parent 3b242a1 commit 0ea1e35

File tree

5 files changed

+42
-9
lines changed

5 files changed

+42
-9
lines changed

doc/whats-new.rst

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -48,6 +48,9 @@ New Features
4848
is faster and requires less memory. (:pull:`6548`)
4949
By `Michael Niklas <https://github.com/headtr1ck>`_.
5050
- Improved overall typing.
51+
- :py:meth:`Dataset.to_dict` and :py:meth:`DataArray.to_dict` may now optionally include encoding
52+
attributes. (:pull:`6635`)
53+
By Joe Hamman <https://github.com/jhamman>`_.
5154

5255
Breaking changes
5356
~~~~~~~~~~~~~~~~

xarray/core/dataarray.py

Lines changed: 9 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -3067,7 +3067,7 @@ def to_netcdf(
30673067
invalid_netcdf=invalid_netcdf,
30683068
)
30693069

3070-
def to_dict(self, data: bool = True) -> dict:
3070+
def to_dict(self, data: bool = True, encoding: bool = False) -> dict:
30713071
"""
30723072
Convert this xarray.DataArray into a dictionary following xarray
30733073
naming conventions.
@@ -3081,15 +3081,20 @@ def to_dict(self, data: bool = True) -> dict:
30813081
data : bool, optional
30823082
Whether to include the actual data in the dictionary. When set to
30833083
False, returns just the schema.
3084+
encoding : bool, optional
3085+
Whether to include the Dataset's encoding in the dictionary.
30843086
30853087
See Also
30863088
--------
30873089
DataArray.from_dict
3090+
Dataset.to_dict
30883091
"""
30893092
d = self.variable.to_dict(data=data)
30903093
d.update({"coords": {}, "name": self.name})
30913094
for k in self.coords:
30923095
d["coords"][k] = self.coords[k].variable.to_dict(data=data)
3096+
if encoding:
3097+
d["encoding"] = dict(self.encoding)
30933098
return d
30943099

30953100
@classmethod
@@ -3155,6 +3160,9 @@ def from_dict(cls, d: dict) -> DataArray:
31553160
raise ValueError("cannot convert dict without the key 'data''")
31563161
else:
31573162
obj = cls(data, coords, d.get("dims"), d.get("name"), d.get("attrs"))
3163+
3164+
obj.encoding.update(d.get("encoding", {}))
3165+
31583166
return obj
31593167

31603168
@classmethod

xarray/core/dataset.py

Lines changed: 18 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -5944,7 +5944,7 @@ def to_dask_dataframe(self, dim_order=None, set_index=False):
59445944

59455945
return df
59465946

5947-
def to_dict(self, data=True):
5947+
def to_dict(self, data: bool = True, encoding: bool = False) -> dict:
59485948
"""
59495949
Convert this dataset to a dictionary following xarray naming
59505950
conventions.
@@ -5958,21 +5958,34 @@ def to_dict(self, data=True):
59585958
data : bool, optional
59595959
Whether to include the actual data in the dictionary. When set to
59605960
False, returns just the schema.
5961+
encoding : bool, optional
5962+
Whether to include the Dataset's encoding in the dictionary.
5963+
5964+
Returns
5965+
-------
5966+
d : dict
59615967
59625968
See Also
59635969
--------
59645970
Dataset.from_dict
5971+
DataArray.to_dict
59655972
"""
5966-
d = {
5973+
d: dict = {
59675974
"coords": {},
59685975
"attrs": decode_numpy_dict_values(self.attrs),
59695976
"dims": dict(self.dims),
59705977
"data_vars": {},
59715978
}
59725979
for k in self.coords:
5973-
d["coords"].update({k: self[k].variable.to_dict(data=data)})
5980+
d["coords"].update(
5981+
{k: self[k].variable.to_dict(data=data, encoding=encoding)}
5982+
)
59745983
for k in self.data_vars:
5975-
d["data_vars"].update({k: self[k].variable.to_dict(data=data)})
5984+
d["data_vars"].update(
5985+
{k: self[k].variable.to_dict(data=data, encoding=encoding)}
5986+
)
5987+
if encoding:
5988+
d["encoding"] = dict(self.encoding)
59765989
return d
59775990

59785991
@classmethod
@@ -6061,6 +6074,7 @@ def from_dict(cls, d):
60616074
obj = obj.set_coords(coords)
60626075

60636076
obj.attrs.update(d.get("attrs", {}))
6077+
obj.encoding.update(d.get("encoding", {}))
60646078

60656079
return obj
60666080

xarray/core/variable.py

Lines changed: 5 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -533,13 +533,17 @@ def to_index(self):
533533
"""Convert this variable to a pandas.Index"""
534534
return self.to_index_variable().to_index()
535535

536-
def to_dict(self, data=True):
536+
def to_dict(self, data: bool = True, encoding: bool = False) -> dict:
537537
"""Dictionary representation of variable."""
538538
item = {"dims": self.dims, "attrs": decode_numpy_dict_values(self.attrs)}
539539
if data:
540540
item["data"] = ensure_us_time_resolution(self.values).tolist()
541541
else:
542542
item.update({"dtype": str(self.dtype), "shape": self.shape})
543+
544+
if encoding:
545+
item["encoding"] = dict(self.encoding)
546+
543547
return item
544548

545549
@property

xarray/tests/test_dataarray.py

Lines changed: 7 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -3140,18 +3140,22 @@ def test_series_categorical_index(self):
31403140
arr = DataArray(s)
31413141
assert "'a'" in repr(arr) # should not error
31423142

3143-
def test_to_and_from_dict(self):
3143+
@pytest.mark.parametrize("encoding", [True, False])
3144+
def test_to_and_from_dict(self, encoding) -> None:
31443145
array = DataArray(
31453146
np.random.randn(2, 3), {"x": ["a", "b"]}, ["x", "y"], name="foo"
31463147
)
3148+
array.encoding = {"bar": "spam"}
31473149
expected = {
31483150
"name": "foo",
31493151
"dims": ("x", "y"),
31503152
"data": array.values.tolist(),
31513153
"attrs": {},
31523154
"coords": {"x": {"dims": ("x",), "data": ["a", "b"], "attrs": {}}},
31533155
}
3154-
actual = array.to_dict()
3156+
if encoding:
3157+
expected["encoding"] = {"bar": "spam"}
3158+
actual = array.to_dict(encoding=encoding)
31553159

31563160
# check that they are identical
31573161
assert expected == actual
@@ -3198,7 +3202,7 @@ def test_to_and_from_dict(self):
31983202
endiantype = "<U1" if sys.byteorder == "little" else ">U1"
31993203
expected_no_data["coords"]["x"].update({"dtype": endiantype, "shape": (2,)})
32003204
expected_no_data.update({"dtype": "float64", "shape": (2, 3)})
3201-
actual_no_data = array.to_dict(data=False)
3205+
actual_no_data = array.to_dict(data=False, encoding=encoding)
32023206
assert expected_no_data == actual_no_data
32033207

32043208
def test_to_and_from_dict_with_time_dim(self):

0 commit comments

Comments
 (0)