From 231ad510fcd36593f7af45677a6592f8c72b2cb0 Mon Sep 17 00:00:00 2001 From: Joe Hamman Date: Wed, 4 Oct 2017 14:01:07 -0700 Subject: [PATCH 01/15] fix to_netcdf append bug (GH1215) --- doc/whats-new.rst | 8 ++++++-- xarray/backends/common.py | 2 ++ xarray/tests/test_backends.py | 18 ++++++++++++++++++ 3 files changed, 26 insertions(+), 2 deletions(-) diff --git a/doc/whats-new.rst b/doc/whats-new.rst index 7025e2dd7d9..8cf07636c62 100644 --- a/doc/whats-new.rst +++ b/doc/whats-new.rst @@ -54,9 +54,9 @@ Breaking changes [...] Note that both versions are currently supported, but using the old syntax will - produce a warning encouraging users to adopt the new syntax. + produce a warning encouraging users to adopt the new syntax. By `Daniel Rothenberg `_. - + - ``repr`` and the Jupyter Notebook won't automatically compute dask variables. Datasets loaded with ``open_dataset`` won't automatically read coords from disk when calling ``repr`` (:issue:`1522`). @@ -212,6 +212,10 @@ Bug fixes the first argument was a numpy variable (:issue:`1588`). By `Guido Imperiale `_. +- Fix bug in :py:meth:`~xarray.Dataset.to_netcdf` when writing in append mode + (:issue:`1215`). + By `Joe Hamman `_. + .. _whats-new.0.9.6: v0.9.6 (8 June 2017) diff --git a/xarray/backends/common.py b/xarray/backends/common.py index cec55d22589..235c5daf5b1 100644 --- a/xarray/backends/common.py +++ b/xarray/backends/common.py @@ -221,6 +221,8 @@ def set_attributes(self, attributes): def set_variables(self, variables, check_encoding_set, unlimited_dims=None): for vn, v in iteritems(variables): + if self._mode == 'a' and vn in self.variables: + continue name = _encode_variable_name(vn) check = vn in check_encoding_set target, source = self.prepare_variable( diff --git a/xarray/tests/test_backends.py b/xarray/tests/test_backends.py index a977868c7e6..f6b75f6f079 100644 --- a/xarray/tests/test_backends.py +++ b/xarray/tests/test_backends.py @@ -822,6 +822,24 @@ def roundtrip(self, data, save_kwargs={}, open_kwargs={}, autoclose=self.autoclose, **open_kwargs) as ds: yield ds + @contextlib.contextmanager + def roundtrip_append(self, data, save_kwargs={}, open_kwargs={}, + allow_cleanup_failure=False): + with create_tmp_file( + allow_cleanup_failure=allow_cleanup_failure) as tmp_file: + for i, key in enumerate(data.variables): + mode = 'a' if i > 0 else 'w' + data[[key]].to_netcdf(tmp_file, mode=mode, **save_kwargs) + with open_dataset(tmp_file, + autoclose=self.autoclose, **open_kwargs) as ds: + yield ds + + def test_append_write(self): + # regression for GH1215 + data = create_test_data() + with self.roundtrip_append(data) as actual: + self.assertDatasetIdentical(data, actual) + def test_variable_order(self): # doesn't work with scipy or h5py :( ds = Dataset() From 7f5e96f47379f32e7e850263cda4557451f0fc39 Mon Sep 17 00:00:00 2001 From: Joe Hamman Date: Mon, 9 Oct 2017 16:08:48 -0700 Subject: [PATCH 02/15] fix for inmemorystore --- xarray/backends/common.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/xarray/backends/common.py b/xarray/backends/common.py index 235c5daf5b1..c405718f3f2 100644 --- a/xarray/backends/common.py +++ b/xarray/backends/common.py @@ -221,7 +221,7 @@ def set_attributes(self, attributes): def set_variables(self, variables, check_encoding_set, unlimited_dims=None): for vn, v in iteritems(variables): - if self._mode == 'a' and vn in self.variables: + if getattr(self, '_mode', False) == 'a' and vn in self.variables: continue name = _encode_variable_name(vn) check = vn in check_encoding_set From 3625035f740807836b5ff734392130aa1ae6db9b Mon Sep 17 00:00:00 2001 From: Joe Hamman Date: Wed, 18 Oct 2017 16:15:41 -0700 Subject: [PATCH 03/15] overwrite existing vars --- xarray/backends/common.py | 10 ++++++---- 1 file changed, 6 insertions(+), 4 deletions(-) diff --git a/xarray/backends/common.py b/xarray/backends/common.py index b8defadd7fe..822808a44d1 100644 --- a/xarray/backends/common.py +++ b/xarray/backends/common.py @@ -221,12 +221,14 @@ def set_attributes(self, attributes): def set_variables(self, variables, check_encoding_set, unlimited_dims=None): for vn, v in iteritems(variables): - if getattr(self, '_mode', False) == 'a' and vn in self.variables: - continue name = _encode_variable_name(vn) check = vn in check_encoding_set - target, source = self.prepare_variable( - name, v, check, unlimited_dims=unlimited_dims) + if (vn not in self.variables or (getattr(self, '_mode', False) != 'a')): + target, source = self.prepare_variable( + name, v, check, unlimited_dims=unlimited_dims) + else: + target, source = self.ds[name], v.data + self.writer.add(source, target) def set_necessary_dimensions(self, variable, unlimited_dims=None): From 1782108972712394a94835c92dbdc06e1871fc84 Mon Sep 17 00:00:00 2001 From: Joe Hamman Date: Fri, 20 Oct 2017 21:29:24 -0700 Subject: [PATCH 04/15] more tests and docs --- doc/io.rst | 8 +++- xarray/backends/common.py | 5 ++- xarray/core/dataset.py | 3 +- xarray/tests/test_backends.py | 77 ++++++++++++++++++++++++++++++++++- 4 files changed, 87 insertions(+), 6 deletions(-) diff --git a/doc/io.rst b/doc/io.rst index 192890e112a..7cd5a5c80f3 100644 --- a/doc/io.rst +++ b/doc/io.rst @@ -176,6 +176,10 @@ for dealing with datasets too big to fit into memory. Instead, xarray integrates with dask.array (see :ref:`dask`), which provides a fully featured engine for streaming computation. +It is possible to append or overwrite netCDF variables using the ``mode='a'`` +argument. When using this option, all variables in the dataset will be written +to the netCDF file, regardless if they exist in the original dataset. + .. _io.encoding: Reading encoded data @@ -390,7 +394,7 @@ over the network until we look at particular values: Some servers require authentication before we can access the data. For this purpose we can explicitly create a :py:class:`~xarray.backends.PydapDataStore` -and pass in a `Requests`__ session object. For example for +and pass in a `Requests`__ session object. For example for HTTP Basic authentication:: import xarray as xr @@ -403,7 +407,7 @@ HTTP Basic authentication:: session=session) ds = xr.open_dataset(store) -`Pydap's cas module`__ has functions that generate custom sessions for +`Pydap's cas module`__ has functions that generate custom sessions for servers that use CAS single sign-on. For example, to connect to servers that require NASA's URS authentication:: diff --git a/xarray/backends/common.py b/xarray/backends/common.py index 822808a44d1..a6038cfc4a1 100644 --- a/xarray/backends/common.py +++ b/xarray/backends/common.py @@ -223,11 +223,12 @@ def set_variables(self, variables, check_encoding_set, for vn, v in iteritems(variables): name = _encode_variable_name(vn) check = vn in check_encoding_set - if (vn not in self.variables or (getattr(self, '_mode', False) != 'a')): + if (vn not in self.variables or + (getattr(self, '_mode', False) != 'a')): target, source = self.prepare_variable( name, v, check, unlimited_dims=unlimited_dims) else: - target, source = self.ds[name], v.data + target, source = self.ds.variables[name], v.data self.writer.add(source, target) diff --git a/xarray/core/dataset.py b/xarray/core/dataset.py index 8691e7e8198..8a1ca86b2c3 100644 --- a/xarray/core/dataset.py +++ b/xarray/core/dataset.py @@ -963,7 +963,8 @@ def to_netcdf(self, path=None, mode='w', format=None, group=None, default format becomes NETCDF3_64BIT). mode : {'w', 'a'}, optional Write ('w') or append ('a') mode. If mode='w', any existing file at - this location will be overwritten. + this location will be overwritten. If mode='a', exisitng variables + will be over written. format : {'NETCDF4', 'NETCDF4_CLASSIC', 'NETCDF3_64BIT', 'NETCDF3_CLASSIC'}, optional File format for the resulting netCDF file: diff --git a/xarray/tests/test_backends.py b/xarray/tests/test_backends.py index 59cbde551cc..a279da3cadf 100644 --- a/xarray/tests/test_backends.py +++ b/xarray/tests/test_backends.py @@ -830,7 +830,8 @@ def roundtrip_append(self, data, save_kwargs={}, open_kwargs={}, allow_cleanup_failure=allow_cleanup_failure) as tmp_file: for i, key in enumerate(data.variables): mode = 'a' if i > 0 else 'w' - data[[key]].to_netcdf(tmp_file, mode=mode, **save_kwargs) + data[[key]].to_netcdf(tmp_file, mode=mode, engine='netcdf4', + **save_kwargs) with open_dataset(tmp_file, autoclose=self.autoclose, **open_kwargs) as ds: yield ds @@ -841,6 +842,18 @@ def test_append_write(self): with self.roundtrip_append(data) as actual: self.assertDatasetIdentical(data, actual) + def test_append_overwrite_values(self): + data = create_test_data() + with create_tmp_file(allow_cleanup_failure=False) as tmp_file: + data.to_netcdf(tmp_file, mode='w', engine='netcdf4') + data['var2'][:] = -999 + data['var9'] = data['var2'] * 3 + data[['var2', 'var9']].to_netcdf(tmp_file, mode='a', + engine='netcdf4') + actual = open_dataset(tmp_file, autoclose=self.autoclose, + engine='netcdf4') + assert_identical(data, actual) + def test_variable_order(self): # doesn't work with scipy or h5py :( ds = Dataset() @@ -975,6 +988,37 @@ def roundtrip(self, data, save_kwargs={}, open_kwargs={}, autoclose=self.autoclose, **open_kwargs) as ds: yield ds + @contextlib.contextmanager + def roundtrip_append(self, data, save_kwargs={}, open_kwargs={}, + allow_cleanup_failure=False): + with create_tmp_file( + allow_cleanup_failure=allow_cleanup_failure) as tmp_file: + for i, key in enumerate(data.variables): + mode = 'a' if i > 0 else 'w' + data[[key]].to_netcdf(tmp_file, mode=mode, engine='scipy', + **save_kwargs) + data.close() + with open_dataset(tmp_file, + engine='scipy', **open_kwargs) as ds: + yield ds + + def test_append_write(self): + # regression for GH1215 + data = create_test_data() + with self.roundtrip_append(data) as actual: + self.assertDatasetIdentical(data, actual) + + def test_append_overwrite_values(self): + data = create_test_data() + with create_tmp_file(allow_cleanup_failure=False) as tmp_file: + data.to_netcdf(tmp_file, mode='w', engine='scipy') + data['var2'][:] = -999 + data['var9'] = data['var2'] * 3 + data[['var2', 'var9']].to_netcdf(tmp_file, mode='a', + engine='scipy') + actual = open_dataset(tmp_file, engine='scipy') + assert_identical(data, actual) + def test_array_attrs(self): ds = Dataset(attrs={'foo': [[1, 2], [3, 4]]}) with self.assertRaisesRegexp(ValueError, 'must be 1-dimensional'): @@ -1155,6 +1199,37 @@ def roundtrip(self, data, save_kwargs={}, open_kwargs={}, autoclose=self.autoclose, **open_kwargs) as ds: yield ds + @contextlib.contextmanager + def roundtrip_append(self, data, save_kwargs={}, open_kwargs={}, + allow_cleanup_failure=False): + with create_tmp_file( + allow_cleanup_failure=allow_cleanup_failure) as tmp_file: + for i, key in enumerate(data.variables): + mode = 'a' if i > 0 else 'w' + data[[key]].to_netcdf(tmp_file, mode=mode, engine='h5netcdf', + **save_kwargs) + with open_dataset(tmp_file, + autoclose=self.autoclose, **open_kwargs) as ds: + yield ds + + def test_append_write(self): + # regression for GH1215 + data = create_test_data() + with self.roundtrip_append(data) as actual: + self.assertDatasetIdentical(data, actual) + + def test_append_overwrite_values(self): + data = create_test_data() + with create_tmp_file(allow_cleanup_failure=False) as tmp_file: + data.to_netcdf(tmp_file, mode='w', engine='h5netcdf') + data['var2'][:] = -999 + data['var9'] = data['var2'] * 3 + data[['var2', 'var9']].to_netcdf(tmp_file, mode='a', + engine='h5netcdf') + actual = open_dataset(tmp_file, autoclose=self.autoclose, + engine='h5netcdf') + assert_identical(data, actual) + def test_orthogonal_indexing(self): # doesn't work for h5py (without using dask as an intermediate layer) pass From 0418264df2be7dbe98c43cafaac9acbf9e933c88 Mon Sep 17 00:00:00 2001 From: Joe Hamman Date: Fri, 20 Oct 2017 23:13:59 -0700 Subject: [PATCH 05/15] assert_allclose for scipy backend --- xarray/tests/test_backends.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/xarray/tests/test_backends.py b/xarray/tests/test_backends.py index a279da3cadf..de30c4d22c0 100644 --- a/xarray/tests/test_backends.py +++ b/xarray/tests/test_backends.py @@ -1006,7 +1006,7 @@ def test_append_write(self): # regression for GH1215 data = create_test_data() with self.roundtrip_append(data) as actual: - self.assertDatasetIdentical(data, actual) + assert_allclose(data, actual) def test_append_overwrite_values(self): data = create_test_data() @@ -1017,7 +1017,7 @@ def test_append_overwrite_values(self): data[['var2', 'var9']].to_netcdf(tmp_file, mode='a', engine='scipy') actual = open_dataset(tmp_file, engine='scipy') - assert_identical(data, actual) + assert_allclose(data, actual) def test_array_attrs(self): ds = Dataset(attrs={'foo': [[1, 2], [3, 4]]}) From d3e2b97cacd7023972e703c47c9dba505541dea1 Mon Sep 17 00:00:00 2001 From: Joe Hamman Date: Sun, 22 Oct 2017 12:00:16 -0700 Subject: [PATCH 06/15] doc fixes --- doc/io.rst | 2 +- xarray/core/dataset.py | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/doc/io.rst b/doc/io.rst index 7cd5a5c80f3..fa14a491658 100644 --- a/doc/io.rst +++ b/doc/io.rst @@ -178,7 +178,7 @@ streaming computation. It is possible to append or overwrite netCDF variables using the ``mode='a'`` argument. When using this option, all variables in the dataset will be written -to the netCDF file, regardless if they exist in the original dataset. +to the original netCDF file, regardless if they exist in the original dataset. .. _io.encoding: diff --git a/xarray/core/dataset.py b/xarray/core/dataset.py index 8a1ca86b2c3..0d19fd63782 100644 --- a/xarray/core/dataset.py +++ b/xarray/core/dataset.py @@ -964,7 +964,7 @@ def to_netcdf(self, path=None, mode='w', format=None, group=None, mode : {'w', 'a'}, optional Write ('w') or append ('a') mode. If mode='w', any existing file at this location will be overwritten. If mode='a', exisitng variables - will be over written. + will be overwritten. format : {'NETCDF4', 'NETCDF4_CLASSIC', 'NETCDF3_64BIT', 'NETCDF3_CLASSIC'}, optional File format for the resulting netCDF file: From faa50987e74dc7eb4f1f9ce05c8239cee906ffa0 Mon Sep 17 00:00:00 2001 From: Joe Hamman Date: Mon, 23 Oct 2017 22:36:43 -0700 Subject: [PATCH 07/15] remove check for append mode --- xarray/backends/common.py | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/xarray/backends/common.py b/xarray/backends/common.py index a6038cfc4a1..6a7c5e4beb4 100644 --- a/xarray/backends/common.py +++ b/xarray/backends/common.py @@ -223,8 +223,7 @@ def set_variables(self, variables, check_encoding_set, for vn, v in iteritems(variables): name = _encode_variable_name(vn) check = vn in check_encoding_set - if (vn not in self.variables or - (getattr(self, '_mode', False) != 'a')): + if vn not in self.variables: target, source = self.prepare_variable( name, v, check, unlimited_dims=unlimited_dims) else: From 6b4a30d0c04d4c4dcb0a057d5d9f55373c2aa085 Mon Sep 17 00:00:00 2001 From: Joe Hamman Date: Mon, 23 Oct 2017 22:49:20 -0700 Subject: [PATCH 08/15] cleanup tests part 1 --- xarray/tests/test_backends.py | 122 ++++++++-------------------------- 1 file changed, 29 insertions(+), 93 deletions(-) diff --git a/xarray/tests/test_backends.py b/xarray/tests/test_backends.py index de30c4d22c0..05366473b86 100644 --- a/xarray/tests/test_backends.py +++ b/xarray/tests/test_backends.py @@ -563,6 +563,35 @@ def test_encoding_same_dtype(self): self.assertEqual(actual.x.encoding['dtype'], 'f4') self.assertEqual(ds.x.encoding, {}) + @contextlib.contextmanager + def roundtrip_append(self, data, save_kwargs={}, open_kwargs={}, + allow_cleanup_failure=False): + with create_tmp_file( + allow_cleanup_failure=allow_cleanup_failure) as tmp_file: + for i, key in enumerate(data.variables): + mode = 'a' if i > 0 else 'w' + data[[key]].to_netcdf(tmp_file, mode=mode, **save_kwargs) + with open_dataset(tmp_file, + autoclose=self.autoclose, **open_kwargs) as ds: + yield ds + + def test_append_write(self): + # regression for GH1215 + data = create_test_data() + with self.roundtrip_append(data) as actual: + self.assertDatasetIdentical(data, actual) + + def test_append_overwrite_values(self): + # regression for GH1215 + data = create_test_data() + with create_tmp_file(allow_cleanup_failure=False) as tmp_file: + data.to_netcdf(tmp_file, mode='w') + data['var2'][:] = -999 + data['var9'] = data['var2'] * 3 + data[['var2', 'var9']].to_netcdf(tmp_file, mode='a') + with open_dataset(tmp_file, autoclose=self.autoclose) as actual: + assert_identical(data, actual) + _counter = itertools.count() @@ -823,37 +852,6 @@ def roundtrip(self, data, save_kwargs={}, open_kwargs={}, autoclose=self.autoclose, **open_kwargs) as ds: yield ds - @contextlib.contextmanager - def roundtrip_append(self, data, save_kwargs={}, open_kwargs={}, - allow_cleanup_failure=False): - with create_tmp_file( - allow_cleanup_failure=allow_cleanup_failure) as tmp_file: - for i, key in enumerate(data.variables): - mode = 'a' if i > 0 else 'w' - data[[key]].to_netcdf(tmp_file, mode=mode, engine='netcdf4', - **save_kwargs) - with open_dataset(tmp_file, - autoclose=self.autoclose, **open_kwargs) as ds: - yield ds - - def test_append_write(self): - # regression for GH1215 - data = create_test_data() - with self.roundtrip_append(data) as actual: - self.assertDatasetIdentical(data, actual) - - def test_append_overwrite_values(self): - data = create_test_data() - with create_tmp_file(allow_cleanup_failure=False) as tmp_file: - data.to_netcdf(tmp_file, mode='w', engine='netcdf4') - data['var2'][:] = -999 - data['var9'] = data['var2'] * 3 - data[['var2', 'var9']].to_netcdf(tmp_file, mode='a', - engine='netcdf4') - actual = open_dataset(tmp_file, autoclose=self.autoclose, - engine='netcdf4') - assert_identical(data, actual) - def test_variable_order(self): # doesn't work with scipy or h5py :( ds = Dataset() @@ -988,37 +986,6 @@ def roundtrip(self, data, save_kwargs={}, open_kwargs={}, autoclose=self.autoclose, **open_kwargs) as ds: yield ds - @contextlib.contextmanager - def roundtrip_append(self, data, save_kwargs={}, open_kwargs={}, - allow_cleanup_failure=False): - with create_tmp_file( - allow_cleanup_failure=allow_cleanup_failure) as tmp_file: - for i, key in enumerate(data.variables): - mode = 'a' if i > 0 else 'w' - data[[key]].to_netcdf(tmp_file, mode=mode, engine='scipy', - **save_kwargs) - data.close() - with open_dataset(tmp_file, - engine='scipy', **open_kwargs) as ds: - yield ds - - def test_append_write(self): - # regression for GH1215 - data = create_test_data() - with self.roundtrip_append(data) as actual: - assert_allclose(data, actual) - - def test_append_overwrite_values(self): - data = create_test_data() - with create_tmp_file(allow_cleanup_failure=False) as tmp_file: - data.to_netcdf(tmp_file, mode='w', engine='scipy') - data['var2'][:] = -999 - data['var9'] = data['var2'] * 3 - data[['var2', 'var9']].to_netcdf(tmp_file, mode='a', - engine='scipy') - actual = open_dataset(tmp_file, engine='scipy') - assert_allclose(data, actual) - def test_array_attrs(self): ds = Dataset(attrs={'foo': [[1, 2], [3, 4]]}) with self.assertRaisesRegexp(ValueError, 'must be 1-dimensional'): @@ -1199,37 +1166,6 @@ def roundtrip(self, data, save_kwargs={}, open_kwargs={}, autoclose=self.autoclose, **open_kwargs) as ds: yield ds - @contextlib.contextmanager - def roundtrip_append(self, data, save_kwargs={}, open_kwargs={}, - allow_cleanup_failure=False): - with create_tmp_file( - allow_cleanup_failure=allow_cleanup_failure) as tmp_file: - for i, key in enumerate(data.variables): - mode = 'a' if i > 0 else 'w' - data[[key]].to_netcdf(tmp_file, mode=mode, engine='h5netcdf', - **save_kwargs) - with open_dataset(tmp_file, - autoclose=self.autoclose, **open_kwargs) as ds: - yield ds - - def test_append_write(self): - # regression for GH1215 - data = create_test_data() - with self.roundtrip_append(data) as actual: - self.assertDatasetIdentical(data, actual) - - def test_append_overwrite_values(self): - data = create_test_data() - with create_tmp_file(allow_cleanup_failure=False) as tmp_file: - data.to_netcdf(tmp_file, mode='w', engine='h5netcdf') - data['var2'][:] = -999 - data['var9'] = data['var2'] * 3 - data[['var2', 'var9']].to_netcdf(tmp_file, mode='a', - engine='h5netcdf') - actual = open_dataset(tmp_file, autoclose=self.autoclose, - engine='h5netcdf') - assert_identical(data, actual) - def test_orthogonal_indexing(self): # doesn't work for h5py (without using dask as an intermediate layer) pass From d8019409bea645223628645131efae64821410a0 Mon Sep 17 00:00:00 2001 From: Joe Hamman Date: Tue, 24 Oct 2017 10:17:03 -0700 Subject: [PATCH 09/15] add engine attribute to test cases --- xarray/core/dataset.py | 2 +- xarray/tests/test_backends.py | 22 +++++++++++++++++----- 2 files changed, 18 insertions(+), 6 deletions(-) diff --git a/xarray/core/dataset.py b/xarray/core/dataset.py index fead5c7fcd2..49f14ffccc5 100644 --- a/xarray/core/dataset.py +++ b/xarray/core/dataset.py @@ -974,7 +974,7 @@ def to_netcdf(self, path=None, mode='w', format=None, group=None, default format becomes NETCDF3_64BIT). mode : {'w', 'a'}, optional Write ('w') or append ('a') mode. If mode='w', any existing file at - this location will be overwritten. If mode='a', exisitng variables + this location will be overwritten. If mode='a', existing variables will be overwritten. format : {'NETCDF4', 'NETCDF4_CLASSIC', 'NETCDF3_64BIT', 'NETCDF3_CLASSIC'}, optional File format for the resulting netCDF file: diff --git a/xarray/tests/test_backends.py b/xarray/tests/test_backends.py index 05366473b86..6c5171ffaca 100644 --- a/xarray/tests/test_backends.py +++ b/xarray/tests/test_backends.py @@ -128,6 +128,7 @@ class Only32BitTypes(object): class DatasetIOTestCases(object): autoclose = False + engine = None def create_store(self): raise NotImplementedError @@ -579,18 +580,20 @@ def test_append_write(self): # regression for GH1215 data = create_test_data() with self.roundtrip_append(data) as actual: - self.assertDatasetIdentical(data, actual) + assert_allclose(data, actual) def test_append_overwrite_values(self): # regression for GH1215 data = create_test_data() with create_tmp_file(allow_cleanup_failure=False) as tmp_file: - data.to_netcdf(tmp_file, mode='w') + data.to_netcdf(tmp_file, mode='w', engine=self.engine) data['var2'][:] = -999 data['var9'] = data['var2'] * 3 - data[['var2', 'var9']].to_netcdf(tmp_file, mode='a') - with open_dataset(tmp_file, autoclose=self.autoclose) as actual: - assert_identical(data, actual) + data[['var2', 'var9']].to_netcdf(tmp_file, mode='a', + engine=self.engine) + with open_dataset(tmp_file, autoclose=self.autoclose, + engine=self.engine) as actual: + assert_allclose(data, actual) _counter = itertools.count() @@ -621,6 +624,7 @@ def create_tmp_files(nfiles, suffix='.nc', allow_cleanup_failure=False): @requires_netCDF4 class BaseNetCDF4Test(CFEncodedDataTest): + engine = 'netcdf4' def test_open_group(self): # Create a netCDF file with a dataset stored within a group with create_tmp_file() as tmp_file: @@ -912,6 +916,8 @@ class NetCDF4ViaDaskDataTestAutocloseTrue(NetCDF4ViaDaskDataTest): @requires_scipy class ScipyInMemoryDataTest(CFEncodedDataTest, Only32BitTypes, TestCase): + engine = 'scipy' + @contextlib.contextmanager def create_store(self): fobj = BytesIO() @@ -1024,6 +1030,8 @@ class ScipyFilePathTestAutocloseTrue(ScipyFilePathTest): @requires_netCDF4 class NetCDF3ViaNetCDF4DataTest(CFEncodedDataTest, Only32BitTypes, TestCase): + engine = 'netcdf4' + @contextlib.contextmanager def create_store(self): with create_tmp_file() as tmp_file: @@ -1050,6 +1058,8 @@ class NetCDF3ViaNetCDF4DataTestAutocloseTrue(NetCDF3ViaNetCDF4DataTest): @requires_netCDF4 class NetCDF4ClassicViaNetCDF4DataTest(CFEncodedDataTest, Only32BitTypes, TestCase): + engine = 'netcdf4' + @contextlib.contextmanager def create_store(self): with create_tmp_file() as tmp_file: @@ -1151,6 +1161,8 @@ class GenericNetCDFDataTestAutocloseTrue(GenericNetCDFDataTest): @requires_h5netcdf @requires_netCDF4 class H5NetCDFDataTest(BaseNetCDF4Test, TestCase): + engine = 'h5netcdf' + @contextlib.contextmanager def create_store(self): with create_tmp_file() as tmp_file: From 6347cadd943d2f64047683b1e9707b7d6b711b0e Mon Sep 17 00:00:00 2001 From: Joe Hamman Date: Tue, 24 Oct 2017 10:25:22 -0700 Subject: [PATCH 10/15] one more fix --- xarray/tests/test_backends.py | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) diff --git a/xarray/tests/test_backends.py b/xarray/tests/test_backends.py index 6c5171ffaca..dde3d80f7c2 100644 --- a/xarray/tests/test_backends.py +++ b/xarray/tests/test_backends.py @@ -571,7 +571,8 @@ def roundtrip_append(self, data, save_kwargs={}, open_kwargs={}, allow_cleanup_failure=allow_cleanup_failure) as tmp_file: for i, key in enumerate(data.variables): mode = 'a' if i > 0 else 'w' - data[[key]].to_netcdf(tmp_file, mode=mode, **save_kwargs) + data[[key]].to_netcdf(tmp_file, mode=mode, + **save_kwargs) with open_dataset(tmp_file, autoclose=self.autoclose, **open_kwargs) as ds: yield ds @@ -579,7 +580,9 @@ def roundtrip_append(self, data, save_kwargs={}, open_kwargs={}, def test_append_write(self): # regression for GH1215 data = create_test_data() - with self.roundtrip_append(data) as actual: + kwargs = dict(save_kwargs={'engine': self.engine}, + open_kwargs={'engine': self.engine}) + with self.roundtrip_append(data, **kwargs) as actual: assert_allclose(data, actual) def test_append_overwrite_values(self): From 6ea50425f9a38bc382b39b1884ef2ec0d897dde2 Mon Sep 17 00:00:00 2001 From: Joe Hamman Date: Tue, 24 Oct 2017 10:31:02 -0700 Subject: [PATCH 11/15] pep8 --- xarray/tests/test_backends.py | 1 + 1 file changed, 1 insertion(+) diff --git a/xarray/tests/test_backends.py b/xarray/tests/test_backends.py index dde3d80f7c2..64902d2fe53 100644 --- a/xarray/tests/test_backends.py +++ b/xarray/tests/test_backends.py @@ -628,6 +628,7 @@ def create_tmp_files(nfiles, suffix='.nc', allow_cleanup_failure=False): @requires_netCDF4 class BaseNetCDF4Test(CFEncodedDataTest): engine = 'netcdf4' + def test_open_group(self): # Create a netCDF file with a dataset stored within a group with create_tmp_file() as tmp_file: From 5a68bc59e3341be7ea638be2592ef3fba34bdd3c Mon Sep 17 00:00:00 2001 From: Joe Hamman Date: Tue, 24 Oct 2017 15:06:10 -0700 Subject: [PATCH 12/15] refactor roundtrip tests --- xarray/tests/test_backends.py | 152 ++++++++++++---------------------- 1 file changed, 52 insertions(+), 100 deletions(-) diff --git a/xarray/tests/test_backends.py b/xarray/tests/test_backends.py index 64902d2fe53..c60b5e3bc32 100644 --- a/xarray/tests/test_backends.py +++ b/xarray/tests/test_backends.py @@ -129,12 +129,45 @@ class Only32BitTypes(object): class DatasetIOTestCases(object): autoclose = False engine = None + file_format = None def create_store(self): raise NotImplementedError - def roundtrip(self, data, **kwargs): - raise NotImplementedError + @contextlib.contextmanager + def roundtrip(self, data, save_kwargs={}, open_kwargs={}, + allow_cleanup_failure=False): + with create_tmp_file( + allow_cleanup_failure=allow_cleanup_failure) as path: + self.save(data, path, **save_kwargs) + with self.open(path, **open_kwargs) as ds: + yield ds + + @contextlib.contextmanager + def roundtrip_append(self, data, save_kwargs={}, open_kwargs={}, + allow_cleanup_failure=False): + with create_tmp_file( + allow_cleanup_failure=allow_cleanup_failure) as path: + for i, key in enumerate(data.variables): + mode = 'a' if i > 0 else 'w' + self.save(data[[key]], path, mode=mode, **save_kwargs) + with self.open(path, **open_kwargs) as ds: + yield ds + + # The save/open methods may be overwritten below + def save(self, dataset, path, **kwargs): + if 'engine' not in kwargs: + kwargs['engine'] = self.engine + if 'format' not in kwargs: + kwargs['format'] = self.file_format + dataset.to_netcdf(path, **kwargs) + + @contextlib.contextmanager + def open(self, path, **kwargs): + if 'engine' not in kwargs: + kwargs['engine'] = self.engine + with open_dataset(path, autoclose=self.autoclose, **kwargs) as ds: + yield ds def test_zero_dimensional_variable(self): expected = create_test_data() @@ -564,19 +597,6 @@ def test_encoding_same_dtype(self): self.assertEqual(actual.x.encoding['dtype'], 'f4') self.assertEqual(ds.x.encoding, {}) - @contextlib.contextmanager - def roundtrip_append(self, data, save_kwargs={}, open_kwargs={}, - allow_cleanup_failure=False): - with create_tmp_file( - allow_cleanup_failure=allow_cleanup_failure) as tmp_file: - for i, key in enumerate(data.variables): - mode = 'a' if i > 0 else 'w' - data[[key]].to_netcdf(tmp_file, mode=mode, - **save_kwargs) - with open_dataset(tmp_file, - autoclose=self.autoclose, **open_kwargs) as ds: - yield ds - def test_append_write(self): # regression for GH1215 data = create_test_data() @@ -589,13 +609,11 @@ def test_append_overwrite_values(self): # regression for GH1215 data = create_test_data() with create_tmp_file(allow_cleanup_failure=False) as tmp_file: - data.to_netcdf(tmp_file, mode='w', engine=self.engine) + self.save(data, tmp_file, mode='w') data['var2'][:] = -999 data['var9'] = data['var2'] * 3 - data[['var2', 'var9']].to_netcdf(tmp_file, mode='a', - engine=self.engine) - with open_dataset(tmp_file, autoclose=self.autoclose, - engine=self.engine) as actual: + self.save(data[['var2', 'var9']], tmp_file, mode='a') + with self.open(tmp_file) as actual: assert_allclose(data, actual) @@ -627,6 +645,7 @@ def create_tmp_files(nfiles, suffix='.nc', allow_cleanup_failure=False): @requires_netCDF4 class BaseNetCDF4Test(CFEncodedDataTest): + engine = 'netcdf4' def test_open_group(self): @@ -850,16 +869,6 @@ def create_store(self): with backends.NetCDF4DataStore.open(tmp_file, mode='w') as store: yield store - @contextlib.contextmanager - def roundtrip(self, data, save_kwargs={}, open_kwargs={}, - allow_cleanup_failure=False): - with create_tmp_file( - allow_cleanup_failure=allow_cleanup_failure) as tmp_file: - data.to_netcdf(tmp_file, **save_kwargs) - with open_dataset(tmp_file, - autoclose=self.autoclose, **open_kwargs) as ds: - yield ds - def test_variable_order(self): # doesn't work with scipy or h5py :( ds = Dataset() @@ -927,14 +936,6 @@ def create_store(self): fobj = BytesIO() yield backends.ScipyDataStore(fobj, 'w') - @contextlib.contextmanager - def roundtrip(self, data, save_kwargs={}, open_kwargs={}, - allow_cleanup_failure=False): - serialized = data.to_netcdf(**save_kwargs) - with open_dataset(serialized, engine='scipy', - autoclose=self.autoclose, **open_kwargs) as ds: - yield ds - def test_to_netcdf_explicit_engine(self): # regression test for GH1321 Dataset({'foo': 42}).to_netcdf(engine='scipy') @@ -954,6 +955,8 @@ class ScipyInMemoryDataTestAutocloseTrue(ScipyInMemoryDataTest): @requires_scipy class ScipyFileObjectTest(CFEncodedDataTest, Only32BitTypes, TestCase): + engine = 'scipy' + @contextlib.contextmanager def create_store(self): fobj = BytesIO() @@ -964,9 +967,9 @@ def roundtrip(self, data, save_kwargs={}, open_kwargs={}, allow_cleanup_failure=False): with create_tmp_file() as tmp_file: with open(tmp_file, 'wb') as f: - data.to_netcdf(f, **save_kwargs) + self.save(data, f, **save_kwargs) with open(tmp_file, 'rb') as f: - with open_dataset(f, engine='scipy', **open_kwargs) as ds: + with self.open(f, **open_kwargs) as ds: yield ds @pytest.mark.skip(reason='cannot pickle file objects') @@ -980,22 +983,14 @@ def test_pickle_dataarray(self): @requires_scipy class ScipyFilePathTest(CFEncodedDataTest, Only32BitTypes, TestCase): + engine = 'scipy' + @contextlib.contextmanager def create_store(self): with create_tmp_file() as tmp_file: with backends.ScipyDataStore(tmp_file, mode='w') as store: yield store - @contextlib.contextmanager - def roundtrip(self, data, save_kwargs={}, open_kwargs={}, - allow_cleanup_failure=False): - with create_tmp_file( - allow_cleanup_failure=allow_cleanup_failure) as tmp_file: - data.to_netcdf(tmp_file, engine='scipy', **save_kwargs) - with open_dataset(tmp_file, engine='scipy', - autoclose=self.autoclose, **open_kwargs) as ds: - yield ds - def test_array_attrs(self): ds = Dataset(attrs={'foo': [[1, 2], [3, 4]]}) with self.assertRaisesRegexp(ValueError, 'must be 1-dimensional'): @@ -1035,6 +1030,7 @@ class ScipyFilePathTestAutocloseTrue(ScipyFilePathTest): @requires_netCDF4 class NetCDF3ViaNetCDF4DataTest(CFEncodedDataTest, Only32BitTypes, TestCase): engine = 'netcdf4' + file_format = 'NETCDF3_CLASSIC' @contextlib.contextmanager def create_store(self): @@ -1043,17 +1039,6 @@ def create_store(self): tmp_file, mode='w', format='NETCDF3_CLASSIC') as store: yield store - @contextlib.contextmanager - def roundtrip(self, data, save_kwargs={}, open_kwargs={}, - allow_cleanup_failure=False): - with create_tmp_file( - allow_cleanup_failure=allow_cleanup_failure) as tmp_file: - data.to_netcdf(tmp_file, format='NETCDF3_CLASSIC', - engine='netcdf4', **save_kwargs) - with open_dataset(tmp_file, engine='netcdf4', - autoclose=self.autoclose, **open_kwargs) as ds: - yield ds - class NetCDF3ViaNetCDF4DataTestAutocloseTrue(NetCDF3ViaNetCDF4DataTest): autoclose = True @@ -1063,6 +1048,7 @@ class NetCDF3ViaNetCDF4DataTestAutocloseTrue(NetCDF3ViaNetCDF4DataTest): class NetCDF4ClassicViaNetCDF4DataTest(CFEncodedDataTest, Only32BitTypes, TestCase): engine = 'netcdf4' + file_format = 'NETCDF4_CLASSIC' @contextlib.contextmanager def create_store(self): @@ -1071,17 +1057,6 @@ def create_store(self): tmp_file, mode='w', format='NETCDF4_CLASSIC') as store: yield store - @contextlib.contextmanager - def roundtrip(self, data, save_kwargs={}, open_kwargs={}, - allow_cleanup_failure=False): - with create_tmp_file( - allow_cleanup_failure=allow_cleanup_failure) as tmp_file: - data.to_netcdf(tmp_file, format='NETCDF4_CLASSIC', - engine='netcdf4', **save_kwargs) - with open_dataset(tmp_file, engine='netcdf4', - autoclose=self.autoclose, **open_kwargs) as ds: - yield ds - class NetCDF4ClassicViaNetCDF4DataTestAutocloseTrue( NetCDF4ClassicViaNetCDF4DataTest): @@ -1092,21 +1067,12 @@ class NetCDF4ClassicViaNetCDF4DataTestAutocloseTrue( class GenericNetCDFDataTest(CFEncodedDataTest, Only32BitTypes, TestCase): # verify that we can read and write netCDF3 files as long as we have scipy # or netCDF4-python installed + file_format = 'netcdf3_64bit' def test_write_store(self): # there's no specific store to test here pass - @contextlib.contextmanager - def roundtrip(self, data, save_kwargs={}, open_kwargs={}, - allow_cleanup_failure=False): - with create_tmp_file( - allow_cleanup_failure=allow_cleanup_failure) as tmp_file: - data.to_netcdf(tmp_file, format='netcdf3_64bit', **save_kwargs) - with open_dataset(tmp_file, - autoclose=self.autoclose, **open_kwargs) as ds: - yield ds - def test_engine(self): data = create_test_data() with self.assertRaisesRegexp(ValueError, 'unrecognized engine'): @@ -1172,16 +1138,6 @@ def create_store(self): with create_tmp_file() as tmp_file: yield backends.H5NetCDFStore(tmp_file, 'w') - @contextlib.contextmanager - def roundtrip(self, data, save_kwargs={}, open_kwargs={}, - allow_cleanup_failure=False): - with create_tmp_file( - allow_cleanup_failure=allow_cleanup_failure) as tmp_file: - data.to_netcdf(tmp_file, engine='h5netcdf', **save_kwargs) - with open_dataset(tmp_file, engine='h5netcdf', - autoclose=self.autoclose, **open_kwargs) as ds: - yield ds - def test_orthogonal_indexing(self): # doesn't work for h5py (without using dask as an intermediate layer) pass @@ -1691,14 +1647,10 @@ def test_orthogonal_indexing(self): pass @contextlib.contextmanager - def roundtrip(self, data, save_kwargs={}, open_kwargs={}, - allow_cleanup_failure=False): - with create_tmp_file( - allow_cleanup_failure=allow_cleanup_failure) as tmp_file: - data.to_netcdf(tmp_file, engine='scipy', **save_kwargs) - with open_dataset(tmp_file, engine='pynio', - autoclose=self.autoclose, **open_kwargs) as ds: - yield ds + def open(self, path, **kwargs): + with open_dataset(path, autoclose=self.autoclose, engine='pynio', + **kwargs) as ds: + yield ds def test_weakrefs(self): example = Dataset({'foo': ('x', np.arange(5.0))}) From 92c49a89ac926f35723fdbc0af5054dbc0f2f87d Mon Sep 17 00:00:00 2001 From: Joe Hamman Date: Tue, 24 Oct 2017 15:27:20 -0700 Subject: [PATCH 13/15] mods for pynio --- xarray/tests/test_backends.py | 3 +++ 1 file changed, 3 insertions(+) diff --git a/xarray/tests/test_backends.py b/xarray/tests/test_backends.py index c60b5e3bc32..ae7e1e64f43 100644 --- a/xarray/tests/test_backends.py +++ b/xarray/tests/test_backends.py @@ -1652,6 +1652,9 @@ def open(self, path, **kwargs): **kwargs) as ds: yield ds + def save(self, dataset, path, **kwargs): + dataset.to_netcdf(path, engine='scipy') + def test_weakrefs(self): example = Dataset({'foo': ('x', np.arange(5.0))}) expected = example.rename({'foo': 'bar', 'x': 'y'}) From 0b1efd22f03510fd33c762f35d32a508f96a6ece Mon Sep 17 00:00:00 2001 From: Joe Hamman Date: Tue, 24 Oct 2017 15:48:19 -0700 Subject: [PATCH 14/15] fix multiple kwargs --- xarray/tests/test_backends.py | 21 +++++++-------------- 1 file changed, 7 insertions(+), 14 deletions(-) diff --git a/xarray/tests/test_backends.py b/xarray/tests/test_backends.py index ae7e1e64f43..10a93c2ae32 100644 --- a/xarray/tests/test_backends.py +++ b/xarray/tests/test_backends.py @@ -156,17 +156,13 @@ def roundtrip_append(self, data, save_kwargs={}, open_kwargs={}, # The save/open methods may be overwritten below def save(self, dataset, path, **kwargs): - if 'engine' not in kwargs: - kwargs['engine'] = self.engine - if 'format' not in kwargs: - kwargs['format'] = self.file_format - dataset.to_netcdf(path, **kwargs) + dataset.to_netcdf(path, engine=self.engine, format=self.file_format, + **kwargs) @contextlib.contextmanager def open(self, path, **kwargs): - if 'engine' not in kwargs: - kwargs['engine'] = self.engine - with open_dataset(path, autoclose=self.autoclose, **kwargs) as ds: + with open_dataset(path, engine=self.engine, autoclose=self.autoclose, + **kwargs) as ds: yield ds def test_zero_dimensional_variable(self): @@ -600,9 +596,7 @@ def test_encoding_same_dtype(self): def test_append_write(self): # regression for GH1215 data = create_test_data() - kwargs = dict(save_kwargs={'engine': self.engine}, - open_kwargs={'engine': self.engine}) - with self.roundtrip_append(data, **kwargs) as actual: + with self.roundtrip_append(data) as actual: assert_allclose(data, actual) def test_append_overwrite_values(self): @@ -1648,12 +1642,11 @@ def test_orthogonal_indexing(self): @contextlib.contextmanager def open(self, path, **kwargs): - with open_dataset(path, autoclose=self.autoclose, engine='pynio', - **kwargs) as ds: + with open_dataset(path, autoclose=self.autoclose, **kwargs) as ds: yield ds def save(self, dataset, path, **kwargs): - dataset.to_netcdf(path, engine='scipy') + dataset.to_netcdf(path, engine='scipy', **kwargs) def test_weakrefs(self): example = Dataset({'foo': ('x', np.arange(5.0))}) From 09101d63db0ece7e66b0fcee8082a13783733708 Mon Sep 17 00:00:00 2001 From: Joe Hamman Date: Tue, 24 Oct 2017 19:54:23 -0700 Subject: [PATCH 15/15] fix for pynio --- xarray/tests/test_backends.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/xarray/tests/test_backends.py b/xarray/tests/test_backends.py index 10a93c2ae32..3370e5aae26 100644 --- a/xarray/tests/test_backends.py +++ b/xarray/tests/test_backends.py @@ -1642,7 +1642,8 @@ def test_orthogonal_indexing(self): @contextlib.contextmanager def open(self, path, **kwargs): - with open_dataset(path, autoclose=self.autoclose, **kwargs) as ds: + with open_dataset(path, engine='pynio', autoclose=self.autoclose, + **kwargs) as ds: yield ds def save(self, dataset, path, **kwargs):