Skip to content

Commit a8ed7ed

Browse files
authored
FIX: h5py>=3 string decoding (#4893)
* FIX: set `decode_strings=True` for h5netcdf backend, convert object string to byte string if necessary, unpin h5py * Update strings.py * Update h5netcdf_.py * fix style * FIX:change decode_strings -> decode_vlen_strings, add whats-new.rst entry * FIX: change missed decode_strings -> decode_vlen_strings * FIX: set `decode_vlen_strings=True` in `open` classmethod, call remaining tests with `decode_vlen_strings=True` * FIX: cover tests for h5py=2
1 parent cdf7761 commit a8ed7ed

File tree

6 files changed

+27
-6
lines changed

6 files changed

+27
-6
lines changed

ci/requirements/environment-windows.yml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -11,7 +11,7 @@ dependencies:
1111
- dask
1212
- distributed
1313
- h5netcdf
14-
- h5py=2
14+
- h5py
1515
- hdf5
1616
- hypothesis
1717
- iris

ci/requirements/environment.yml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -13,7 +13,7 @@ dependencies:
1313
- dask
1414
- distributed
1515
- h5netcdf
16-
- h5py=2
16+
- h5py
1717
- hdf5
1818
- hypothesis
1919
- iris

ci/requirements/py38-all-but-dask.yml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -14,7 +14,7 @@ dependencies:
1414
- cftime
1515
- coveralls
1616
- h5netcdf
17-
- h5py=2
17+
- h5py
1818
- hdf5
1919
- hypothesis
2020
- lxml # Optional dep of pydap

doc/whats-new.rst

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -118,6 +118,8 @@ Bug fixes
118118
By `Leif Denby <https://github.com/leifdenby>`_.
119119
- Fix time encoding bug associated with using cftime versions greater than
120120
1.4.0 with xarray (:issue:`4870`, :pull:`4871`). By `Spencer Clark <https://github.com/spencerkclark>`_.
121+
- Fix decoding of vlen strings using h5py versions greater than 3.0.0 with h5netcdf backend (:issue:`4570`, :pull:`4893`).
122+
By `Kai Mühlbauer <https://github.com/kmuehlbauer>`_.
121123

122124
Documentation
123125
~~~~~~~~~~~~~

xarray/backends/h5netcdf_.py

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -131,6 +131,7 @@ def open(
131131
autoclose=False,
132132
invalid_netcdf=None,
133133
phony_dims=None,
134+
decode_vlen_strings=True,
134135
):
135136

136137
if isinstance(filename, bytes):
@@ -157,6 +158,10 @@ def open(
157158
"h5netcdf backend keyword argument 'phony_dims' needs "
158159
"h5netcdf >= 0.8.0."
159160
)
161+
if LooseVersion(h5netcdf.__version__) >= LooseVersion(
162+
"0.10.0"
163+
) and LooseVersion(h5netcdf.core.h5py.__version__) >= LooseVersion("3.0.0"):
164+
kwargs["decode_vlen_strings"] = decode_vlen_strings
160165

161166
if lock is None:
162167
if mode == "r":
@@ -358,6 +363,7 @@ def open_dataset(
358363
lock=None,
359364
invalid_netcdf=None,
360365
phony_dims=None,
366+
decode_vlen_strings=True,
361367
):
362368

363369
store = H5NetCDFStore.open(
@@ -367,6 +373,7 @@ def open_dataset(
367373
lock=lock,
368374
invalid_netcdf=invalid_netcdf,
369375
phony_dims=phony_dims,
376+
decode_vlen_strings=decode_vlen_strings,
370377
)
371378

372379
store_entrypoint = StoreBackendEntrypoint()

xarray/tests/test_backends.py

Lines changed: 15 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -2579,13 +2579,19 @@ def test_open_dataset_group(self):
25792579
v = group.createVariable("x", "int")
25802580
v[...] = 42
25812581

2582-
h5 = h5netcdf.File(tmp_file, mode="r")
2582+
kwargs = {}
2583+
if LooseVersion(h5netcdf.__version__) >= LooseVersion(
2584+
"0.10.0"
2585+
) and LooseVersion(h5netcdf.core.h5py.__version__) >= LooseVersion("3.0.0"):
2586+
kwargs = dict(decode_vlen_strings=True)
2587+
2588+
h5 = h5netcdf.File(tmp_file, mode="r", **kwargs)
25832589
store = backends.H5NetCDFStore(h5["g"])
25842590
with open_dataset(store) as ds:
25852591
expected = Dataset({"x": ((), 42)})
25862592
assert_identical(expected, ds)
25872593

2588-
h5 = h5netcdf.File(tmp_file, mode="r")
2594+
h5 = h5netcdf.File(tmp_file, mode="r", **kwargs)
25892595
store = backends.H5NetCDFStore(h5, group="g")
25902596
with open_dataset(store) as ds:
25912597
expected = Dataset({"x": ((), 42)})
@@ -2600,7 +2606,13 @@ def test_deepcopy(self):
26002606
v = nc.createVariable("y", np.int32, ("x",))
26012607
v[:] = np.arange(10)
26022608

2603-
h5 = h5netcdf.File(tmp_file, mode="r")
2609+
kwargs = {}
2610+
if LooseVersion(h5netcdf.__version__) >= LooseVersion(
2611+
"0.10.0"
2612+
) and LooseVersion(h5netcdf.core.h5py.__version__) >= LooseVersion("3.0.0"):
2613+
kwargs = dict(decode_vlen_strings=True)
2614+
2615+
h5 = h5netcdf.File(tmp_file, mode="r", **kwargs)
26042616
store = backends.H5NetCDFStore(h5)
26052617
with open_dataset(store) as ds:
26062618
copied = ds.copy(deep=True)

0 commit comments

Comments
 (0)