From eeeb3a59075cdc8abeced664d8f6a6415f31dfc4 Mon Sep 17 00:00:00 2001 From: Barron Henderson Date: Fri, 27 Oct 2017 12:34:46 -0400 Subject: [PATCH 01/46] Added PNC backend to xarray PNC is used for GEOS-Chem, CAMx, CMAQ and other atmospheric data formats that have their own file formats and meta-data conventions. It can provide a CF compliant netCDF-like interface. --- xarray/backends/__init__.py | 1 + xarray/backends/api.py | 5 ++- xarray/backends/pnc_.py | 85 +++++++++++++++++++++++++++++++++++++ 3 files changed, 90 insertions(+), 1 deletion(-) create mode 100644 xarray/backends/pnc_.py diff --git a/xarray/backends/__init__.py b/xarray/backends/__init__.py index a082bd53e5e..3983dc26a03 100644 --- a/xarray/backends/__init__.py +++ b/xarray/backends/__init__.py @@ -10,3 +10,4 @@ from .pynio_ import NioDataStore from .scipy_ import ScipyDataStore from .h5netcdf_ import H5NetCDFStore +from .pnc_ import PncDataStore diff --git a/xarray/backends/api.py b/xarray/backends/api.py index 36c686e7a91..a2dbc73b19f 100644 --- a/xarray/backends/api.py +++ b/xarray/backends/api.py @@ -174,7 +174,7 @@ def open_dataset(filename_or_obj, group=None, decode_cf=True, decode_coords : bool, optional If True, decode the 'coordinates' attribute to identify coordinates in the resulting dataset. - engine : {'netcdf4', 'scipy', 'pydap', 'h5netcdf', 'pynio'}, optional + engine : {'netcdf4', 'scipy', 'pydap', 'h5netcdf', 'pynio', 'pnc'}, optional Engine to use when reading files. If not provided, the default engine is chosen based on available dependencies, with a preference for 'netcdf4'. @@ -295,6 +295,9 @@ def maybe_decode_store(store, lock=False): elif engine == 'pynio': store = backends.NioDataStore(filename_or_obj, autoclose=autoclose) + elif engine == 'pnc': + store = backends.PncDataStore(filename_or_obj, + autoclose=autoclose) else: raise ValueError('unrecognized engine for open_dataset: %r' % engine) diff --git a/xarray/backends/pnc_.py b/xarray/backends/pnc_.py new file mode 100644 index 00000000000..306d97bbaa2 --- /dev/null +++ b/xarray/backends/pnc_.py @@ -0,0 +1,85 @@ +from __future__ import absolute_import +from __future__ import division +from __future__ import print_function + +import functools + +import numpy as np + +from .. import Variable +from ..core.utils import (FrozenOrderedDict, Frozen, + NdimSizeLenMixin, DunderArrayMixin) +from ..core import indexing + +from .common import AbstractDataStore, DataStorePickleMixin + + +class PncArrayWrapper(NdimSizeLenMixin, DunderArrayMixin): + + def __init__(self, variable_name, datastore): + self.datastore = datastore + self.variable_name = variable_name + array = self.get_array() + self.shape = array.shape + self.dtype = np.dtype(array.dtype) + + def get_array(self): + self.datastore.assert_open() + return self.datastore.ds.variables[self.variable_name] + + def __getitem__(self, key): + key = indexing.to_tuple(key) + with self.datastore.ensure_open(autoclose=True): + array = self.get_array() + if key == () and self.ndim == 0: + return array[...] + return array[key] + + +class PncDataStore(AbstractDataStore, DataStorePickleMixin): + """Store for accessing datasets via PseudoNetCDF + """ + def __init__(self, filename, mode='r', autoclose=False): + from PseudoNetCDF import pncopen + try: + opener = functools.partial(pncopen, filename, mode=mode, format = 'netcdf') + self.ds = opener() + except: + try: + opener = functools.partial(pncopen, filename, mode=mode) + self.ds = opener() + except: + opener = functools.partial(pncopen, filename) + self.ds = opener() + self._autoclose = autoclose + self._isopen = True + self._opener = opener + self._mode = mode + + def open_store_variable(self, name, var): + data = indexing.LazilyIndexedArray(PncArrayWrapper(name, self)) + return Variable(var.dimensions, data, dict([(k, getattr(var, k)) for k in var.ncattrs()])) + + def get_variables(self): + with self.ensure_open(autoclose=False): + return FrozenOrderedDict((k, self.open_store_variable(k, v)) + for k, v in self.ds.variables.items()) + + def get_attrs(self): + with self.ensure_open(autoclose=True): + return Frozen(dict([(k, getattr(self.ds, k)) for k in self.ds.ncattrs()])) + + def get_dimensions(self): + with self.ensure_open(autoclose=True): + return Frozen(self.ds.dimensions) + + def get_encoding(self): + encoding = {} + encoding['unlimited_dims'] = set( + [k for k in self.ds.dimensions if self.ds.dimensions[k].isunlimited()]) + return encoding + + def close(self): + if self._isopen: + self.ds.close() + self._isopen = False \ No newline at end of file From 5ac4b6f9b3e05d992d7e2cbd2e770f4eda1b98f2 Mon Sep 17 00:00:00 2001 From: barronh Date: Mon, 12 Feb 2018 18:29:09 -0500 Subject: [PATCH 02/46] Added whats-new documentation --- doc/whats-new.rst | 2 ++ 1 file changed, 2 insertions(+) diff --git a/doc/whats-new.rst b/doc/whats-new.rst index 115cd9bb54f..5f45952a5f3 100644 --- a/doc/whats-new.rst +++ b/doc/whats-new.rst @@ -43,6 +43,8 @@ Documentation Enhancements ~~~~~~~~~~~~ +- added backend for many Atmospheric data formats ranging including GEOS-Chem, + CAMx, NOAA arlpacked bit and many others. - reduce methods such as :py:func:`DataArray.sum()` now accepts ``dtype`` arguments. (:issue:`1838`) By `Keisuke Fujii `_. From f73436dc746adff62046aed98670e0b6b1b34742 Mon Sep 17 00:00:00 2001 From: Barron Henderson Date: Mon, 12 Feb 2018 21:38:39 -0500 Subject: [PATCH 03/46] Updating pnc_ to remove DunderArrayMixin dependency --- xarray/backends/pnc_.py | 12 +++++++----- 1 file changed, 7 insertions(+), 5 deletions(-) diff --git a/xarray/backends/pnc_.py b/xarray/backends/pnc_.py index 306d97bbaa2..f117bb4fb60 100644 --- a/xarray/backends/pnc_.py +++ b/xarray/backends/pnc_.py @@ -7,14 +7,13 @@ import numpy as np from .. import Variable -from ..core.utils import (FrozenOrderedDict, Frozen, - NdimSizeLenMixin, DunderArrayMixin) +from ..core.utils import (FrozenOrderedDict, Frozen) from ..core import indexing -from .common import AbstractDataStore, DataStorePickleMixin +from .common import AbstractDataStore, DataStorePickleMixin, BackendArray -class PncArrayWrapper(NdimSizeLenMixin, DunderArrayMixin): +class PncArrayWrapper(BackendArray): def __init__(self, variable_name, datastore): self.datastore = datastore @@ -28,7 +27,9 @@ def get_array(self): return self.datastore.ds.variables[self.variable_name] def __getitem__(self, key): - key = indexing.to_tuple(key) + key = indexing.unwrap_explicit_indexer( + key, target=self, allow=indexing.BasicIndexer) + with self.datastore.ensure_open(autoclose=True): array = self.get_array() if key == () and self.ndim == 0: @@ -39,6 +40,7 @@ def __getitem__(self, key): class PncDataStore(AbstractDataStore, DataStorePickleMixin): """Store for accessing datasets via PseudoNetCDF """ + def __init__(self, filename, mode='r', autoclose=False): from PseudoNetCDF import pncopen try: From 9507303fc8395c2258e73a3da1a3e27be5368c78 Mon Sep 17 00:00:00 2001 From: Barron Henderson Date: Mon, 12 Feb 2018 22:18:56 -0500 Subject: [PATCH 04/46] Adding basic tests for pnc Right now, pnc is simply being tested as a reader for NetCDF3 files --- xarray/tests/__init__.py | 1 + xarray/tests/test_backends.py | 35 ++++++++++++++++++++++++++++++++++- 2 files changed, 35 insertions(+), 1 deletion(-) diff --git a/xarray/tests/__init__.py b/xarray/tests/__init__.py index dadcdeff640..6fdc3ef997a 100644 --- a/xarray/tests/__init__.py +++ b/xarray/tests/__init__.py @@ -68,6 +68,7 @@ def _importorskip(modname, minversion=None): has_netCDF4, requires_netCDF4 = _importorskip('netCDF4') has_h5netcdf, requires_h5netcdf = _importorskip('h5netcdf') has_pynio, requires_pynio = _importorskip('Nio') +has_pnc, requires_pnc = _importorskip('PseudoNetCDF') has_dask, requires_dask = _importorskip('dask') has_bottleneck, requires_bottleneck = _importorskip('bottleneck') has_rasterio, requires_rasterio = _importorskip('rasterio') diff --git a/xarray/tests/test_backends.py b/xarray/tests/test_backends.py index 85b6bdea346..8079176370b 100644 --- a/xarray/tests/test_backends.py +++ b/xarray/tests/test_backends.py @@ -28,7 +28,7 @@ from . import (TestCase, requires_scipy, requires_netCDF4, requires_pydap, requires_scipy_or_netCDF4, requires_dask, requires_h5netcdf, - requires_pynio, requires_pathlib, requires_zarr, + requires_pynio, requires_pnc, requires_pathlib, requires_zarr, requires_rasterio, has_netCDF4, has_scipy, assert_allclose, flaky, network, assert_identical, raises_regex, assert_equal, assert_array_equal) @@ -1679,6 +1679,7 @@ def test_3_autoclose_pynio(self): def test_4_autoclose_h5netcdf(self): self.validate_open_mfdataset_autoclose(engine=['h5netcdf']) + # These tests below are marked as flaky (and skipped by default) because # they fail sometimes on Travis-CI, for no clear reason. @@ -2175,6 +2176,38 @@ class TestPyNioAutocloseTrue(TestPyNio): autoclose = True +@requires_scipy +@requires_pnc +class TestPnc(NetCDF3Only, TestCase): + def test_write_store(self): + # pnc is read-only for now + pass + + @contextlib.contextmanager + def open(self, path, **kwargs): + with open_dataset(path, engine='pnc', autoclose=self.autoclose, + **kwargs) as ds: + yield ds + + def save(self, dataset, path, **kwargs): + dataset.to_netcdf(path, engine='scipy', **kwargs) + + def test_weakrefs(self): + example = Dataset({'foo': ('x', np.arange(5.0))}) + expected = example.rename({'foo': 'bar', 'x': 'y'}) + + with create_tmp_file() as tmp_file: + example.to_netcdf(tmp_file, engine='scipy') + on_disk = open_dataset(tmp_file, engine='pnc') + actual = on_disk.rename({'foo': 'bar', 'x': 'y'}) + del on_disk # trigger garbage collection + assert_identical(actual, expected) + + +class TestPncAutocloseTrue(TestPnc): + autoclose = True + + @requires_rasterio @contextlib.contextmanager def create_tmp_geotiff(nx=4, ny=3, nz=3, From ef22872566ee5d26f6676d43d1c0ccc6d0af7685 Mon Sep 17 00:00:00 2001 From: Barron Henderson Date: Tue, 13 Feb 2018 21:00:26 -0500 Subject: [PATCH 05/46] Updating for flake8 compliance --- xarray/backends/pnc_.py | 18 +++++++++++------- xarray/tests/test_backends.py | 2 -- 2 files changed, 11 insertions(+), 9 deletions(-) diff --git a/xarray/backends/pnc_.py b/xarray/backends/pnc_.py index f117bb4fb60..381edcfa2ee 100644 --- a/xarray/backends/pnc_.py +++ b/xarray/backends/pnc_.py @@ -44,13 +44,14 @@ class PncDataStore(AbstractDataStore, DataStorePickleMixin): def __init__(self, filename, mode='r', autoclose=False): from PseudoNetCDF import pncopen try: - opener = functools.partial(pncopen, filename, mode=mode, format = 'netcdf') + opener = functools.partial(pncopen, filename, + mode=mode, format='netcdf') self.ds = opener() - except: + except Exception as e: try: opener = functools.partial(pncopen, filename, mode=mode) self.ds = opener() - except: + except Exception as e: opener = functools.partial(pncopen, filename) self.ds = opener() self._autoclose = autoclose @@ -60,7 +61,8 @@ def __init__(self, filename, mode='r', autoclose=False): def open_store_variable(self, name, var): data = indexing.LazilyIndexedArray(PncArrayWrapper(name, self)) - return Variable(var.dimensions, data, dict([(k, getattr(var, k)) for k in var.ncattrs()])) + return Variable(var.dimensions, data, dict([(k, getattr(var, k)) + for k in var.ncattrs()])) def get_variables(self): with self.ensure_open(autoclose=False): @@ -69,7 +71,8 @@ def get_variables(self): def get_attrs(self): with self.ensure_open(autoclose=True): - return Frozen(dict([(k, getattr(self.ds, k)) for k in self.ds.ncattrs()])) + return Frozen(dict([(k, getattr(self.ds, k)) + for k in self.ds.ncattrs()])) def get_dimensions(self): with self.ensure_open(autoclose=True): @@ -78,10 +81,11 @@ def get_dimensions(self): def get_encoding(self): encoding = {} encoding['unlimited_dims'] = set( - [k for k in self.ds.dimensions if self.ds.dimensions[k].isunlimited()]) + [k for k in self.ds.dimensions + if self.ds.dimensions[k].isunlimited()]) return encoding def close(self): if self._isopen: self.ds.close() - self._isopen = False \ No newline at end of file + self._isopen = False diff --git a/xarray/tests/test_backends.py b/xarray/tests/test_backends.py index 8079176370b..e599fca98ff 100644 --- a/xarray/tests/test_backends.py +++ b/xarray/tests/test_backends.py @@ -1679,10 +1679,8 @@ def test_3_autoclose_pynio(self): def test_4_autoclose_h5netcdf(self): self.validate_open_mfdataset_autoclose(engine=['h5netcdf']) - # These tests below are marked as flaky (and skipped by default) because # they fail sometimes on Travis-CI, for no clear reason. - @requires_dask @requires_netCDF4 @flaky From 56f087c65a561aa7dcb3dab3693d6bc6caed5268 Mon Sep 17 00:00:00 2001 From: Barron Henderson Date: Sat, 17 Feb 2018 07:56:34 -0500 Subject: [PATCH 06/46] flake does not like unused e --- xarray/backends/pnc_.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/xarray/backends/pnc_.py b/xarray/backends/pnc_.py index 381edcfa2ee..82fe5a4591b 100644 --- a/xarray/backends/pnc_.py +++ b/xarray/backends/pnc_.py @@ -47,11 +47,11 @@ def __init__(self, filename, mode='r', autoclose=False): opener = functools.partial(pncopen, filename, mode=mode, format='netcdf') self.ds = opener() - except Exception as e: + except Exception: try: opener = functools.partial(pncopen, filename, mode=mode) self.ds = opener() - except Exception as e: + except Exception: opener = functools.partial(pncopen, filename) self.ds = opener() self._autoclose = autoclose From 5a3c62d6f559754bbc5426f75811b818072b031e Mon Sep 17 00:00:00 2001 From: Barron Henderson Date: Tue, 6 Mar 2018 21:21:32 -0500 Subject: [PATCH 07/46] Updating pnc to PseudoNetCDF --- xarray/backends/api.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/xarray/backends/api.py b/xarray/backends/api.py index 734701b507b..92c525a1acd 100644 --- a/xarray/backends/api.py +++ b/xarray/backends/api.py @@ -174,7 +174,7 @@ def open_dataset(filename_or_obj, group=None, decode_cf=True, decode_coords : bool, optional If True, decode the 'coordinates' attribute to identify coordinates in the resulting dataset. - engine : {'netcdf4', 'scipy', 'pydap', 'h5netcdf', 'pynio', 'pnc'}, optional + engine : {'netcdf4', 'scipy', 'pydap', 'h5netcdf', 'pynio', 'pseudonetcdf'}, optional Engine to use when reading files. If not provided, the default engine is chosen based on available dependencies, with a preference for 'netcdf4'. @@ -295,7 +295,7 @@ def maybe_decode_store(store, lock=False): elif engine == 'pynio': store = backends.NioDataStore(filename_or_obj, autoclose=autoclose) - elif engine == 'pnc': + elif engine == 'pseudonetcdf': store = backends.PncDataStore(filename_or_obj, autoclose=autoclose) else: From 8eb427d66a8b55bb0185875763ac71eea4db99a3 Mon Sep 17 00:00:00 2001 From: Barron Henderson Date: Tue, 6 Mar 2018 21:22:21 -0500 Subject: [PATCH 08/46] Remove outer except --- xarray/backends/pnc_.py | 11 +++-------- 1 file changed, 3 insertions(+), 8 deletions(-) diff --git a/xarray/backends/pnc_.py b/xarray/backends/pnc_.py index 82fe5a4591b..cd7cfe42d2e 100644 --- a/xarray/backends/pnc_.py +++ b/xarray/backends/pnc_.py @@ -44,16 +44,11 @@ class PncDataStore(AbstractDataStore, DataStorePickleMixin): def __init__(self, filename, mode='r', autoclose=False): from PseudoNetCDF import pncopen try: - opener = functools.partial(pncopen, filename, - mode=mode, format='netcdf') + opener = functools.partial(pncopen, filename, mode=mode) self.ds = opener() except Exception: - try: - opener = functools.partial(pncopen, filename, mode=mode) - self.ds = opener() - except Exception: - opener = functools.partial(pncopen, filename) - self.ds = opener() + opener = functools.partial(pncopen, filename) + self.ds = opener() self._autoclose = autoclose self._isopen = True self._opener = opener From ca75c76ae5053dedbc1ba26b1f3301fe84d74c3a Mon Sep 17 00:00:00 2001 From: Barron Henderson Date: Tue, 6 Mar 2018 21:23:19 -0500 Subject: [PATCH 09/46] Updating pnc to PseudoNetCDF --- xarray/tests/test_backends.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/xarray/tests/test_backends.py b/xarray/tests/test_backends.py index 917e2f424fb..1b0ab97b474 100644 --- a/xarray/tests/test_backends.py +++ b/xarray/tests/test_backends.py @@ -2203,7 +2203,7 @@ def test_write_store(self): @contextlib.contextmanager def open(self, path, **kwargs): - with open_dataset(path, engine='pnc', autoclose=self.autoclose, + with open_dataset(path, engine='pseudonetcdf', autoclose=self.autoclose, **kwargs) as ds: yield ds From 196c03fd375ab013f4dd07547343da4ced6eb331 Mon Sep 17 00:00:00 2001 From: Barron Henderson Date: Sat, 17 Mar 2018 10:48:46 -0400 Subject: [PATCH 10/46] Added open and updated init Based on shoyer review --- xarray/backends/api.py | 8 +++++--- xarray/backends/pnc_.py | 26 +++++++++++++++++--------- xarray/tests/test_backends.py | 3 ++- 3 files changed, 24 insertions(+), 13 deletions(-) diff --git a/xarray/backends/api.py b/xarray/backends/api.py index 92c525a1acd..f718d284da6 100644 --- a/xarray/backends/api.py +++ b/xarray/backends/api.py @@ -134,7 +134,8 @@ def _protect_dataset_variables_inplace(dataset, cache): def open_dataset(filename_or_obj, group=None, decode_cf=True, mask_and_scale=True, decode_times=True, autoclose=False, concat_characters=True, decode_coords=True, engine=None, - chunks=None, lock=None, cache=None, drop_variables=None): + chunks=None, lock=None, cache=None, drop_variables=None, + backend_kwargs=None): """Load and decode a dataset from a file or file-like object. Parameters @@ -296,8 +297,9 @@ def maybe_decode_store(store, lock=False): store = backends.NioDataStore(filename_or_obj, autoclose=autoclose) elif engine == 'pseudonetcdf': - store = backends.PncDataStore(filename_or_obj, - autoclose=autoclose) + store = backends.PncDataStore.open(filename_or_obj, + autoclose=autoclose, + **backend_kwargs) else: raise ValueError('unrecognized engine for open_dataset: %r' % engine) diff --git a/xarray/backends/pnc_.py b/xarray/backends/pnc_.py index cd7cfe42d2e..c229db2820c 100644 --- a/xarray/backends/pnc_.py +++ b/xarray/backends/pnc_.py @@ -40,19 +40,27 @@ def __getitem__(self, key): class PncDataStore(AbstractDataStore, DataStorePickleMixin): """Store for accessing datasets via PseudoNetCDF """ - - def __init__(self, filename, mode='r', autoclose=False): + @classmethod + def open(cls, filename, format=None, writer=None, + autoclose=False, **format_kwds): from PseudoNetCDF import pncopen - try: - opener = functools.partial(pncopen, filename, mode=mode) - self.ds = opener() - except Exception: - opener = functools.partial(pncopen, filename) - self.ds = opener() + opener = functools.partial(pncopen, filename, **format_kwds) + ds = opener() + mode = format_kwds.get('mode', 'r') + return cls(ds, mode=mode, writer=writer, opener=opener, + autoclose=autoclose) + + def __init__(self, pnc_dataset, mode='r', writer=None, opener=None, + autoclose=False): + + if autoclose and opener is None: + raise ValueError('autoclose requires an opener') + + self.ds = pnc_dataset self._autoclose = autoclose self._isopen = True self._opener = opener - self._mode = mode + super(PncDataStore, self).__init__() def open_store_variable(self, name, var): data = indexing.LazilyIndexedArray(PncArrayWrapper(name, self)) diff --git a/xarray/tests/test_backends.py b/xarray/tests/test_backends.py index 1b0ab97b474..b26a51ec010 100644 --- a/xarray/tests/test_backends.py +++ b/xarray/tests/test_backends.py @@ -2203,7 +2203,8 @@ def test_write_store(self): @contextlib.contextmanager def open(self, path, **kwargs): - with open_dataset(path, engine='pseudonetcdf', autoclose=self.autoclose, + with open_dataset(path, engine='pseudonetcdf', + autoclose=self.autoclose, **kwargs) as ds: yield ds From 282408f899295ca3ca166dd1da123bceb07caedd Mon Sep 17 00:00:00 2001 From: Barron Henderson Date: Sat, 17 Mar 2018 11:03:59 -0400 Subject: [PATCH 11/46] Updated indexing and test fix Indexing supports #1899 --- xarray/backends/pnc_.py | 18 +++++++++--------- xarray/tests/test_backends.py | 2 +- 2 files changed, 10 insertions(+), 10 deletions(-) diff --git a/xarray/backends/pnc_.py b/xarray/backends/pnc_.py index c229db2820c..0fbae6e344c 100644 --- a/xarray/backends/pnc_.py +++ b/xarray/backends/pnc_.py @@ -27,14 +27,14 @@ def get_array(self): return self.datastore.ds.variables[self.variable_name] def __getitem__(self, key): - key = indexing.unwrap_explicit_indexer( - key, target=self, allow=indexing.BasicIndexer) + key, np_inds = indexing.decompose_indexer( + key, self.shape, indexing.IndexingSupport.OUTER_1VECTOR) - with self.datastore.ensure_open(autoclose=True): - array = self.get_array() - if key == () and self.ndim == 0: - return array[...] - return array[key] + array = self.get_array()[key.tuple] # index backend array + if len(np_inds.tuple) > 0: + # index the loaded np.ndarray + array = indexing.NumpyIndexingAdapter(array)[np_inds] + return array class PncDataStore(AbstractDataStore, DataStorePickleMixin): @@ -56,14 +56,14 @@ def __init__(self, pnc_dataset, mode='r', writer=None, opener=None, if autoclose and opener is None: raise ValueError('autoclose requires an opener') - self.ds = pnc_dataset + self._ds = pnc_dataset self._autoclose = autoclose self._isopen = True self._opener = opener super(PncDataStore, self).__init__() def open_store_variable(self, name, var): - data = indexing.LazilyIndexedArray(PncArrayWrapper(name, self)) + data = indexing.LazilyOuterIndexedArray(PncArrayWrapper(name, self)) return Variable(var.dimensions, data, dict([(k, getattr(var, k)) for k in var.ncattrs()])) diff --git a/xarray/tests/test_backends.py b/xarray/tests/test_backends.py index a4eb96ada42..b55e48818cc 100644 --- a/xarray/tests/test_backends.py +++ b/xarray/tests/test_backends.py @@ -32,7 +32,7 @@ assert_identical, flaky, has_netCDF4, has_scipy, network, raises_regex, requires_dask, requires_h5netcdf, requires_netCDF4, requires_pathlib, requires_pydap, requires_pynio, requires_rasterio, requires_scipy, - requires_scipy_or_netCDF4, requires_zarr) + requires_scipy_or_netCDF4, requires_zarr, requires_pnc) from .test_dataset import create_test_data try: From b1890b13171a9b17ae26cc6d1f726b408a03bac3 Mon Sep 17 00:00:00 2001 From: Barron Henderson Date: Mon, 19 Mar 2018 20:55:28 -0400 Subject: [PATCH 12/46] Added PseudoNetCDF to doc/io.rst --- doc/io.rst | 27 +++++++++++++++++++++++++++ 1 file changed, 27 insertions(+) diff --git a/doc/io.rst b/doc/io.rst index c14e1516b38..5aff61ca7dc 100644 --- a/doc/io.rst +++ b/doc/io.rst @@ -661,6 +661,33 @@ exporting your objects to pandas and using its broad range of `IO tools`_. .. _IO tools: http://pandas.pydata.org/pandas-docs/stable/io.html +.. _io.PseudoNetCDF: + + +Formats supported by PseudoNetCDF +--------------------------------- + +xarray can also read CAMx, BPCH, ARL PACKED BIT, and many other file +formats supported by PseudoNetCDF, if PseudoNetCDF is installed. +PseudoNetCDF can also provide Climate Forecasting Conventions to +CMAQ files. In addition, PseudoNetCDF can automatically register custom +readers that subclass PseudoNetCDF.PseudoNetCDFFile. PseudoNetCDF can +identify readers heuristically, or format can be specified via a key in +`backend_kwargs`. + +To use PseudoNetCDF to read such files, supply +``engine='pseudonetcdf'`` to :py:func:`~xarray.open_dataset`. + +Add ``backend_kwargs={'format': ''}`` where `` +options are listed on the PseudoNetCDF page. + +We recommend installing PseudoNetCDF via conda:: + + conda install -c conda-forge PseudoNetCDF + +.. _PseuodoNetCDF: http://github.com/barronh/PseudoNetCDF + +.. _combining multiple files: Combining multiple files From eda629f252af4a3663d229c09adbf758bb4e8c01 Mon Sep 17 00:00:00 2001 From: Barron Henderson Date: Mon, 19 Mar 2018 21:46:31 -0400 Subject: [PATCH 13/46] Changing test subtype --- xarray/tests/test_backends.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/xarray/tests/test_backends.py b/xarray/tests/test_backends.py index b55e48818cc..1a961e2e376 100644 --- a/xarray/tests/test_backends.py +++ b/xarray/tests/test_backends.py @@ -342,6 +342,7 @@ def test_roundtrip_string_encoded_characters(self): self.assertEqual(actual['x'].encoding['_Encoding'], 'ascii') def test_roundtrip_datetime_data(self): + import pdb; pdb.set_trace() times = pd.to_datetime(['2000-01-01', '2000-01-02', 'NaT']) expected = Dataset({'t': ('t', times), 't0': times[0]}) kwds = {'encoding': {'t0': {'units': 'days since 1950-01-01'}}} @@ -2199,7 +2200,7 @@ class TestPyNioAutocloseTrue(TestPyNio): @requires_scipy @requires_pnc -class TestPnc(NetCDF3Only, TestCase): +class TestPnc(CFEncodedDataTest, TestCase): def test_write_store(self): # pnc is read-only for now pass From 816c7da05130e9dfb4dcf26a113c56fb3262801e Mon Sep 17 00:00:00 2001 From: Barron Henderson Date: Mon, 19 Mar 2018 21:49:55 -0400 Subject: [PATCH 14/46] Changing test subtype removing pdb --- xarray/tests/test_backends.py | 1 - 1 file changed, 1 deletion(-) diff --git a/xarray/tests/test_backends.py b/xarray/tests/test_backends.py index 1a961e2e376..7629410e5c3 100644 --- a/xarray/tests/test_backends.py +++ b/xarray/tests/test_backends.py @@ -342,7 +342,6 @@ def test_roundtrip_string_encoded_characters(self): self.assertEqual(actual['x'].encoding['_Encoding'], 'ascii') def test_roundtrip_datetime_data(self): - import pdb; pdb.set_trace() times = pd.to_datetime(['2000-01-01', '2000-01-02', 'NaT']) expected = Dataset({'t': ('t', times), 't0': times[0]}) kwds = {'encoding': {'t0': {'units': 'days since 1950-01-01'}}} From c8b2ca376ea9fc2ddd0253caef3ed82deb691742 Mon Sep 17 00:00:00 2001 From: Barron Henderson Date: Mon, 19 Mar 2018 22:00:45 -0400 Subject: [PATCH 15/46] pnc test case requires netcdf3only For now, pnc is only supporting the classic data model --- xarray/tests/test_backends.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/xarray/tests/test_backends.py b/xarray/tests/test_backends.py index 7629410e5c3..b55e48818cc 100644 --- a/xarray/tests/test_backends.py +++ b/xarray/tests/test_backends.py @@ -2199,7 +2199,7 @@ class TestPyNioAutocloseTrue(TestPyNio): @requires_scipy @requires_pnc -class TestPnc(CFEncodedDataTest, TestCase): +class TestPnc(NetCDF3Only, TestCase): def test_write_store(self): # pnc is read-only for now pass From 85ac334116e95649df8385fbc346194c5a19407f Mon Sep 17 00:00:00 2001 From: Barron Henderson Date: Sat, 24 Mar 2018 17:27:36 -0400 Subject: [PATCH 16/46] adding backend_kwargs default as dict This ensures **mapping is possible. --- xarray/backends/api.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/xarray/backends/api.py b/xarray/backends/api.py index 322f5e3c5be..e87f236d512 100644 --- a/xarray/backends/api.py +++ b/xarray/backends/api.py @@ -148,7 +148,7 @@ def open_dataset(filename_or_obj, group=None, decode_cf=True, mask_and_scale=True, decode_times=True, autoclose=False, concat_characters=True, decode_coords=True, engine=None, chunks=None, lock=None, cache=None, drop_variables=None, - backend_kwargs=None): + backend_kwargs={}): """Load and decode a dataset from a file or file-like object. Parameters From c46caeb65ae0611549863dad49a28e70b2d24e55 Mon Sep 17 00:00:00 2001 From: Barron Henderson Date: Sat, 24 Mar 2018 17:29:19 -0400 Subject: [PATCH 17/46] Upgrading tests to CFEncodedDataTest Some tests are bypassed. PseudoNetCDF string treatment is not currently compatible with xarray. This will be addressed soon. --- xarray/tests/test_backends.py | 57 ++++++++++++++++++++++++++++++----- 1 file changed, 49 insertions(+), 8 deletions(-) diff --git a/xarray/tests/test_backends.py b/xarray/tests/test_backends.py index b55e48818cc..46ac005722c 100644 --- a/xarray/tests/test_backends.py +++ b/xarray/tests/test_backends.py @@ -2199,34 +2199,75 @@ class TestPyNioAutocloseTrue(TestPyNio): @requires_scipy @requires_pnc -class TestPnc(NetCDF3Only, TestCase): +class TestPseudoNetCDF(CFEncodedDataTest, NetCDF3Only, TestCase): def test_write_store(self): - # pnc is read-only for now + # pseudonetcdf is read-only for now pass @contextlib.contextmanager def open(self, path, **kwargs): - with open_dataset(path, engine='pseudonetcdf', - autoclose=self.autoclose, + with open_dataset(path, engine='pseudonetcdf', autoclose=self.autoclose, **kwargs) as ds: yield ds def save(self, dataset, path, **kwargs): - dataset.to_netcdf(path, engine='scipy', **kwargs) + dataset.to_netcdf(path, engine='netcdf4', **kwargs) def test_weakrefs(self): example = Dataset({'foo': ('x', np.arange(5.0))}) expected = example.rename({'foo': 'bar', 'x': 'y'}) with create_tmp_file() as tmp_file: - example.to_netcdf(tmp_file, engine='scipy') - on_disk = open_dataset(tmp_file, engine='pnc') + example.to_netcdf(tmp_file, engine='netcdf4') + on_disk = open_dataset(tmp_file, engine='pseudonetcdf') actual = on_disk.rename({'foo': 'bar', 'x': 'y'}) del on_disk # trigger garbage collection assert_identical(actual, expected) + def test_pickle(self): + # pnc does not support pickling + pass + + def test_pickle_dataarray(self): + # pnc does not support pickling + pass + + def test_pickle_dataarray(self): + # pnc does not support pickling + pass + + def test_roundtrip_boolean_dtype(self): + # pnc does not support boolean dtype + pass + + def test_roundtrip_mask_and_scale(self): + # pnc does not support auto masking and scaling + pass + + def test_roundtrip_object_dtype(self): + # pnc does not support object types + pass + + def test_roundtrip_string_encoded_characters(self): + # pnc string treatment does not meet xarray expectations + # working on the right approach + pass + + def test_roundtrip_test_data(self): + expected = create_test_data() + with self.roundtrip(expected) as actual: + # dim3 sting type is Date: Sat, 24 Mar 2018 17:35:12 -0400 Subject: [PATCH 18/46] Not currently supporting autoclose I do not fully understand the usecase, so I have not implemented these tests. --- xarray/tests/test_backends.py | 4 ---- 1 file changed, 4 deletions(-) diff --git a/xarray/tests/test_backends.py b/xarray/tests/test_backends.py index 46ac005722c..1fdda2295f3 100644 --- a/xarray/tests/test_backends.py +++ b/xarray/tests/test_backends.py @@ -2267,10 +2267,6 @@ def test_unsigned_roundtrip_mask_and_scale(self): pass -class TestPseudoNetCDFAutocloseTrue(TestPseudoNetCDF): - autoclose = True - - @requires_rasterio @contextlib.contextmanager def create_tmp_geotiff(nx=4, ny=3, nz=3, From c3b7c827d9cfba35b19cf9268f4d4df5949b3ded Mon Sep 17 00:00:00 2001 From: Barron Henderson Date: Sat, 24 Mar 2018 17:37:39 -0400 Subject: [PATCH 19/46] Minor updates for flake8 --- xarray/tests/test_backends.py | 8 ++------ 1 file changed, 2 insertions(+), 6 deletions(-) diff --git a/xarray/tests/test_backends.py b/xarray/tests/test_backends.py index 1fdda2295f3..05ad40150d2 100644 --- a/xarray/tests/test_backends.py +++ b/xarray/tests/test_backends.py @@ -2206,7 +2206,8 @@ def test_write_store(self): @contextlib.contextmanager def open(self, path, **kwargs): - with open_dataset(path, engine='pseudonetcdf', autoclose=self.autoclose, + with open_dataset(path, engine='pseudonetcdf', + autoclose=self.autoclose, **kwargs) as ds: yield ds @@ -2232,10 +2233,6 @@ def test_pickle_dataarray(self): # pnc does not support pickling pass - def test_pickle_dataarray(self): - # pnc does not support pickling - pass - def test_roundtrip_boolean_dtype(self): # pnc does not support boolean dtype pass @@ -2261,7 +2258,6 @@ def test_roundtrip_test_data(self): # self.check_dtypes_roundtripped(expected, actual) assert_identical(expected, actual) - def test_unsigned_roundtrip_mask_and_scale(self): # pnc does not support object types pass From 7906492a05e05321d04f47ae439d30a354a17986 Mon Sep 17 00:00:00 2001 From: Barron Henderson Date: Sun, 25 Mar 2018 08:38:32 -0400 Subject: [PATCH 20/46] Explicit skipping Using pytest.mark.skip to skip unsupported tests --- xarray/tests/test_backends.py | 11 ++++++++--- 1 file changed, 8 insertions(+), 3 deletions(-) diff --git a/xarray/tests/test_backends.py b/xarray/tests/test_backends.py index 05ad40150d2..db4f966924e 100644 --- a/xarray/tests/test_backends.py +++ b/xarray/tests/test_backends.py @@ -2197,7 +2197,7 @@ class TestPyNioAutocloseTrue(TestPyNio): autoclose = True -@requires_scipy +@requires_netCDF4 @requires_pnc class TestPseudoNetCDF(CFEncodedDataTest, NetCDF3Only, TestCase): def test_write_store(self): @@ -2225,26 +2225,30 @@ def test_weakrefs(self): del on_disk # trigger garbage collection assert_identical(actual, expected) + @pytest.mark.skip(reason='cannot pickle file objects') def test_pickle(self): - # pnc does not support pickling pass + @pytest.mark.skip(reason='cannot pickle file objects') def test_pickle_dataarray(self): - # pnc does not support pickling pass + @pytest.mark.skip(reason='does not support boolean dtype') def test_roundtrip_boolean_dtype(self): # pnc does not support boolean dtype pass + @pytest.mark.skip(reason='does not support auto masking and scaling') def test_roundtrip_mask_and_scale(self): # pnc does not support auto masking and scaling pass + @pytest.mark.skip(reason='does not support object types') def test_roundtrip_object_dtype(self): # pnc does not support object types pass + @pytest.mark.skip(reason='encoding types are not consistent with xarray') def test_roundtrip_string_encoded_characters(self): # pnc string treatment does not meet xarray expectations # working on the right approach @@ -2258,6 +2262,7 @@ def test_roundtrip_test_data(self): # self.check_dtypes_roundtripped(expected, actual) assert_identical(expected, actual) + @pytest.mark.skip(reason='does not support auto masking and scaling') def test_unsigned_roundtrip_mask_and_scale(self): # pnc does not support object types pass From 4df9fba06ac880989d2a23e9b964c6d5219f977a Mon Sep 17 00:00:00 2001 From: Barron Henderson Date: Mon, 26 Mar 2018 21:24:20 -0400 Subject: [PATCH 21/46] removing trailing whitespace from pytest skip --- xarray/tests/test_backends.py | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/xarray/tests/test_backends.py b/xarray/tests/test_backends.py index db4f966924e..55e4fde58d3 100644 --- a/xarray/tests/test_backends.py +++ b/xarray/tests/test_backends.py @@ -2225,25 +2225,25 @@ def test_weakrefs(self): del on_disk # trigger garbage collection assert_identical(actual, expected) - @pytest.mark.skip(reason='cannot pickle file objects') + @pytest.mark.skip(reason='cannot pickle file objects') def test_pickle(self): pass - @pytest.mark.skip(reason='cannot pickle file objects') + @pytest.mark.skip(reason='cannot pickle file objects') def test_pickle_dataarray(self): pass - @pytest.mark.skip(reason='does not support boolean dtype') + @pytest.mark.skip(reason='does not support boolean dtype') def test_roundtrip_boolean_dtype(self): # pnc does not support boolean dtype pass - @pytest.mark.skip(reason='does not support auto masking and scaling') + @pytest.mark.skip(reason='does not support auto masking and scaling') def test_roundtrip_mask_and_scale(self): # pnc does not support auto masking and scaling pass - @pytest.mark.skip(reason='does not support object types') + @pytest.mark.skip(reason='does not support object types') def test_roundtrip_object_dtype(self): # pnc does not support object types pass @@ -2262,7 +2262,7 @@ def test_roundtrip_test_data(self): # self.check_dtypes_roundtripped(expected, actual) assert_identical(expected, actual) - @pytest.mark.skip(reason='does not support auto masking and scaling') + @pytest.mark.skip(reason='does not support auto masking and scaling') def test_unsigned_roundtrip_mask_and_scale(self): # pnc does not support object types pass From ec95a3ad5cf2a6c7184f9db601ee39369e137e74 Mon Sep 17 00:00:00 2001 From: Barron Henderson Date: Mon, 2 Apr 2018 20:49:17 -0400 Subject: [PATCH 22/46] Adding pip support --- ci/requirements-py36.yml | 1 + doc/io.rst | 3 ++- 2 files changed, 3 insertions(+), 1 deletion(-) diff --git a/ci/requirements-py36.yml b/ci/requirements-py36.yml index 2788221ee74..07fb75462df 100644 --- a/ci/requirements-py36.yml +++ b/ci/requirements-py36.yml @@ -25,3 +25,4 @@ dependencies: - pytest-cov - pydap - lxml + - PseudoNetCDF diff --git a/doc/io.rst b/doc/io.rst index 5aff61ca7dc..a40850ace72 100644 --- a/doc/io.rst +++ b/doc/io.rst @@ -681,7 +681,8 @@ To use PseudoNetCDF to read such files, supply Add ``backend_kwargs={'format': ''}`` where `` options are listed on the PseudoNetCDF page. -We recommend installing PseudoNetCDF via conda:: +PseudoNetCDF is available via pip and has a recipe awaiting approval +that will enable installing PseudoNetCDF via conda:: conda install -c conda-forge PseudoNetCDF From ad7b709f591fbc8ec4e3ceae816779e688ef291c Mon Sep 17 00:00:00 2001 From: Barron Henderson Date: Sat, 14 Apr 2018 12:13:34 -0400 Subject: [PATCH 23/46] Addressing comments --- doc/io.rst | 24 +++++++++++------------- xarray/backends/api.py | 20 +++++++++++++++++--- xarray/backends/pnc_.py | 5 +++++ xarray/tests/test_backends.py | 34 ---------------------------------- 4 files changed, 33 insertions(+), 50 deletions(-) diff --git a/doc/io.rst b/doc/io.rst index a40850ace72..2761bb76385 100644 --- a/doc/io.rst +++ b/doc/io.rst @@ -650,25 +650,13 @@ We recommend installing PyNIO via conda:: .. _PyNIO: https://www.pyngl.ucar.edu/Nio.shtml -.. _combining multiple files: - - -Formats supported by Pandas ---------------------------- - -For more options (tabular formats and CSV files in particular), consider -exporting your objects to pandas and using its broad range of `IO tools`_. - -.. _IO tools: http://pandas.pydata.org/pandas-docs/stable/io.html - .. _io.PseudoNetCDF: - Formats supported by PseudoNetCDF --------------------------------- xarray can also read CAMx, BPCH, ARL PACKED BIT, and many other file -formats supported by PseudoNetCDF, if PseudoNetCDF is installed. +formats supported by PseudoNetCDF_, if PseudoNetCDF is installed. PseudoNetCDF can also provide Climate Forecasting Conventions to CMAQ files. In addition, PseudoNetCDF can automatically register custom readers that subclass PseudoNetCDF.PseudoNetCDFFile. PseudoNetCDF can @@ -688,6 +676,16 @@ that will enable installing PseudoNetCDF via conda:: .. _PseuodoNetCDF: http://github.com/barronh/PseudoNetCDF + +Formats supported by Pandas +--------------------------- + +For more options (tabular formats and CSV files in particular), consider +exporting your objects to pandas and using its broad range of `IO tools`_. + +.. _IO tools: http://pandas.pydata.org/pandas-docs/stable/io.html + + .. _combining multiple files: diff --git a/xarray/backends/api.py b/xarray/backends/api.py index e87f236d512..fb9bc7bf2a5 100644 --- a/xarray/backends/api.py +++ b/xarray/backends/api.py @@ -148,7 +148,7 @@ def open_dataset(filename_or_obj, group=None, decode_cf=True, mask_and_scale=True, decode_times=True, autoclose=False, concat_characters=True, decode_coords=True, engine=None, chunks=None, lock=None, cache=None, drop_variables=None, - backend_kwargs={}): + backend_kwargs=None): """Load and decode a dataset from a file or file-like object. Parameters @@ -213,6 +213,10 @@ def open_dataset(filename_or_obj, group=None, decode_cf=True, A variable or list of variables to exclude from being parsed from the dataset. This may be useful to drop variables with problems or inconsistent values. + backend_kwargs: dictionary, optional + A dictionary of keyword arguments to pass on to the backend. This + may be useful when backend options would improve performance or + allow user control of dataset processing. Returns ------- @@ -232,6 +236,9 @@ def open_dataset(filename_or_obj, group=None, decode_cf=True, if cache is None: cache = chunks is None + if backend_kwargs is None: + backend_kwargs = {} + def maybe_decode_store(store, lock=False): ds = conventions.decode_cf( store, mask_and_scale=mask_and_scale, decode_times=decode_times, @@ -334,7 +341,8 @@ def maybe_decode_store(store, lock=False): def open_dataarray(filename_or_obj, group=None, decode_cf=True, mask_and_scale=True, decode_times=True, autoclose=False, concat_characters=True, decode_coords=True, engine=None, - chunks=None, lock=None, cache=None, drop_variables=None): + chunks=None, lock=None, cache=None, drop_variables=None, + backend_kwargs=None): """Open an DataArray from a netCDF file containing a single data variable. This is designed to read netCDF files with only one data variable. If @@ -401,6 +409,10 @@ def open_dataarray(filename_or_obj, group=None, decode_cf=True, A variable or list of variables to exclude from being parsed from the dataset. This may be useful to drop variables with problems or inconsistent values. + backend_kwargs: dictionary, optional + A dictionary of keyword arguments to pass on to the backend. This + may be useful when backend options would improve performance or + allow user control of dataset processing. Notes ----- @@ -415,13 +427,15 @@ def open_dataarray(filename_or_obj, group=None, decode_cf=True, -------- open_dataset """ + dataset = open_dataset(filename_or_obj, group=group, decode_cf=decode_cf, mask_and_scale=mask_and_scale, decode_times=decode_times, autoclose=autoclose, concat_characters=concat_characters, decode_coords=decode_coords, engine=engine, chunks=chunks, lock=lock, cache=cache, - drop_variables=drop_variables) + drop_variables=drop_variables, + backend_kwargs=backend_kwargs) if len(dataset.data_vars) != 1: raise ValueError('Given file dataset contains more than one data ' diff --git a/xarray/backends/pnc_.py b/xarray/backends/pnc_.py index 0fbae6e344c..ee2f7d5dd12 100644 --- a/xarray/backends/pnc_.py +++ b/xarray/backends/pnc_.py @@ -61,6 +61,11 @@ def __init__(self, pnc_dataset, mode='r', writer=None, opener=None, self._isopen = True self._opener = opener super(PncDataStore, self).__init__() + if not hasattr(self, '_mode'): + try: + self._mode = mode + except Exception: + pass def open_store_variable(self, name, var): data = indexing.LazilyOuterIndexedArray(PncArrayWrapper(name, self)) diff --git a/xarray/tests/test_backends.py b/xarray/tests/test_backends.py index 7186293b489..369f30ef05d 100644 --- a/xarray/tests/test_backends.py +++ b/xarray/tests/test_backends.py @@ -2229,40 +2229,6 @@ def open(self, path, **kwargs): def save(self, dataset, path, **kwargs): dataset.to_netcdf(path, engine='netcdf4', **kwargs) - def test_weakrefs(self): - example = Dataset({'foo': ('x', np.arange(5.0))}) - expected = example.rename({'foo': 'bar', 'x': 'y'}) - - with create_tmp_file() as tmp_file: - example.to_netcdf(tmp_file, engine='netcdf4') - on_disk = open_dataset(tmp_file, engine='pseudonetcdf') - actual = on_disk.rename({'foo': 'bar', 'x': 'y'}) - del on_disk # trigger garbage collection - assert_identical(actual, expected) - - @pytest.mark.skip(reason='cannot pickle file objects') - def test_pickle(self): - pass - - @pytest.mark.skip(reason='cannot pickle file objects') - def test_pickle_dataarray(self): - pass - - @pytest.mark.skip(reason='does not support boolean dtype') - def test_roundtrip_boolean_dtype(self): - # pnc does not support boolean dtype - pass - - @pytest.mark.skip(reason='does not support auto masking and scaling') - def test_roundtrip_mask_and_scale(self): - # pnc does not support auto masking and scaling - pass - - @pytest.mark.skip(reason='does not support object types') - def test_roundtrip_object_dtype(self): - # pnc does not support object types - pass - @pytest.mark.skip(reason='encoding types are not consistent with xarray') def test_roundtrip_string_encoded_characters(self): # pnc string treatment does not meet xarray expectations From 26dd0f974b57aa774fe5d295b6d0432257507765 Mon Sep 17 00:00:00 2001 From: Barron Henderson Date: Sun, 15 Apr 2018 09:14:49 -0400 Subject: [PATCH 24/46] Bypassing pickle, mask/scale, and object These tests cause errors that do not affect desired backend performance. --- xarray/tests/test_backends.py | 23 +++++++++++++++++++++++ 1 file changed, 23 insertions(+) diff --git a/xarray/tests/test_backends.py b/xarray/tests/test_backends.py index 369f30ef05d..b7a2daffdd1 100644 --- a/xarray/tests/test_backends.py +++ b/xarray/tests/test_backends.py @@ -2229,6 +2229,29 @@ def open(self, path, **kwargs): def save(self, dataset, path, **kwargs): dataset.to_netcdf(path, engine='netcdf4', **kwargs) + @pytest.mark.skip(reason='cannot pickle file objects') + def test_pickle(self): + pass + + @pytest.mark.skip(reason='cannot pickle file objects') + def test_pickle_dataarray(self): + pass + + @pytest.mark.skip(reason='does not support boolean dtype') + def test_roundtrip_boolean_dtype(self): + # pnc does not support boolean dtype + pass + + @pytest.mark.skip(reason='does not support auto masking and scaling') + def test_roundtrip_mask_and_scale(self): + # pnc does not support auto masking and scaling + pass + + @pytest.mark.skip(reason='does not support object types') + def test_roundtrip_object_dtype(self): + # pnc does not support object types + pass + @pytest.mark.skip(reason='encoding types are not consistent with xarray') def test_roundtrip_string_encoded_characters(self): # pnc string treatment does not meet xarray expectations From d999de1db1579a29977a70d8621bec947bda1080 Mon Sep 17 00:00:00 2001 From: Barron Henderson Date: Sun, 15 Apr 2018 16:21:22 -0400 Subject: [PATCH 25/46] Added uamiv test PseudoNetCDF reads other formats. This adds a test of uamiv to the standard test for a backend and skips mask/scale, object, and boolean tests --- xarray/tests/data/example.uamiv | Bin 0 -> 608 bytes xarray/tests/test_backends.py | 34 ++++++++++++++++++++++++-------- 2 files changed, 26 insertions(+), 8 deletions(-) create mode 100644 xarray/tests/data/example.uamiv diff --git a/xarray/tests/data/example.uamiv b/xarray/tests/data/example.uamiv new file mode 100644 index 0000000000000000000000000000000000000000..fcedcd53097122839b5b94d1fabd2cb70d7c003e GIT binary patch literal 608 zcmb8rv1$TA5XSL2h+tviBU~jm38#rx0dEtcl_(OdQiP~rL`g_OlET6=WlBpQ#a5rf zN6G)YTSY{W4E%29cK2pS&4S2zd|YF)5KZjoR;gEOedvC#K<_&avzwN`8~gXTIF xI-B-6oH6M=RsVnVGX1`ok76FN>IIhAm^lN}xe)vVE=C)Vc*P7q_{H25(?3@UPTv3k literal 0 HcmV?d00001 diff --git a/xarray/tests/test_backends.py b/xarray/tests/test_backends.py index b7a2daffdd1..e4ef74d6fde 100644 --- a/xarray/tests/test_backends.py +++ b/xarray/tests/test_backends.py @@ -2226,17 +2226,35 @@ def open(self, path, **kwargs): **kwargs) as ds: yield ds + def test_uamiv_format_read(self): + """ + Open a CAMx file and test data variables + """ + camxfile = open_example_dataset('example.uamiv', engine='pseudonetcdf', + autoclose=False, + backend_kwargs=dict(format='uamiv')) + data = np.arange(20, dtype='f').reshape(1, 1, 4, 5) + expected = xr.Variable(('TSTEP', 'LAY', 'ROW', 'COL'), data, + dict(units='ppm', long_name='O3'.ljust(16), + var_desc='O3'.ljust(80))) + actual = camxfile.variables['O3'] + assert_allclose(expected, actual) + + data = np.array( + ['2002-06-03T00:00:00.000000000'], + dtype='datetime64[ns]' + ) + expected = xr.Variable(('TSTEP',), data, + dict(bounds='time_bounds', + long_name=('synthesized time coordinate ' + + 'from SDATE, STIME, STEP ' + + 'global attributes'))) + actual = camxfile.variables['time'] + assert_allclose(expected, actual) + def save(self, dataset, path, **kwargs): dataset.to_netcdf(path, engine='netcdf4', **kwargs) - @pytest.mark.skip(reason='cannot pickle file objects') - def test_pickle(self): - pass - - @pytest.mark.skip(reason='cannot pickle file objects') - def test_pickle_dataarray(self): - pass - @pytest.mark.skip(reason='does not support boolean dtype') def test_roundtrip_boolean_dtype(self): # pnc does not support boolean dtype From 87e8612871033fc0d3d06471a32d7e83717dee05 Mon Sep 17 00:00:00 2001 From: Barron Henderson Date: Sun, 15 Apr 2018 16:22:36 -0400 Subject: [PATCH 26/46] Adding support for autoclose ensure open must be called before accessing variable data --- xarray/backends/pnc_.py | 11 ++++++++--- 1 file changed, 8 insertions(+), 3 deletions(-) diff --git a/xarray/backends/pnc_.py b/xarray/backends/pnc_.py index ee2f7d5dd12..25cc3618da1 100644 --- a/xarray/backends/pnc_.py +++ b/xarray/backends/pnc_.py @@ -30,7 +30,9 @@ def __getitem__(self, key): key, np_inds = indexing.decompose_indexer( key, self.shape, indexing.IndexingSupport.OUTER_1VECTOR) - array = self.get_array()[key.tuple] # index backend array + with self.datastore.ensure_open(autoclose=True): + array = self.get_array()[key.tuple] # index backend array + if len(np_inds.tuple) > 0: # index the loaded np.ndarray array = indexing.NumpyIndexingAdapter(array)[np_inds] @@ -68,7 +70,10 @@ def __init__(self, pnc_dataset, mode='r', writer=None, opener=None, pass def open_store_variable(self, name, var): - data = indexing.LazilyOuterIndexedArray(PncArrayWrapper(name, self)) + with self.ensure_open(autoclose=False): + data = indexing.LazilyOuterIndexedArray( + PncArrayWrapper(name, self) + ) return Variable(var.dimensions, data, dict([(k, getattr(var, k)) for k in var.ncattrs()])) @@ -96,4 +101,4 @@ def get_encoding(self): def close(self): if self._isopen: self.ds.close() - self._isopen = False + self._isopen = False From dd94be58830c73012361b89433e67f4eb7e28dcf Mon Sep 17 00:00:00 2001 From: Barron Henderson Date: Sun, 15 Apr 2018 16:23:28 -0400 Subject: [PATCH 27/46] Adding bakcend_kwargs to all backends Most backends currently take no keywords, so an empty ditionary is appropriate. --- xarray/backends/api.py | 15 ++++++++++----- 1 file changed, 10 insertions(+), 5 deletions(-) diff --git a/xarray/backends/api.py b/xarray/backends/api.py index fb9bc7bf2a5..2641079a950 100644 --- a/xarray/backends/api.py +++ b/xarray/backends/api.py @@ -304,18 +304,23 @@ def maybe_decode_store(store, lock=False): if engine == 'netcdf4': store = backends.NetCDF4DataStore.open(filename_or_obj, group=group, - autoclose=autoclose) + autoclose=autoclose, + **backend_kwargs) elif engine == 'scipy': store = backends.ScipyDataStore(filename_or_obj, - autoclose=autoclose) + autoclose=autoclose, + **backend_kwargs) elif engine == 'pydap': - store = backends.PydapDataStore.open(filename_or_obj) + store = backends.PydapDataStore.open(filename_or_obj, + **backend_kwargs) elif engine == 'h5netcdf': store = backends.H5NetCDFStore(filename_or_obj, group=group, - autoclose=autoclose) + autoclose=autoclose, + **backend_kwargs) elif engine == 'pynio': store = backends.NioDataStore(filename_or_obj, - autoclose=autoclose) + autoclose=autoclose, + **backend_kwargs) elif engine == 'pseudonetcdf': store = backends.PncDataStore.open(filename_or_obj, autoclose=autoclose, From 231170124a8ef4b19c80790d7ce413462d8c06a9 Mon Sep 17 00:00:00 2001 From: Stephan Hoyer Date: Sun, 15 Apr 2018 19:55:17 -0700 Subject: [PATCH 28/46] Small tweaks to PNC backend --- xarray/backends/__init__.py | 4 +-- xarray/backends/api.py | 5 ++-- xarray/backends/{pnc_.py => pseudonetcdf_.py} | 15 ++++------ xarray/tests/__init__.py | 2 +- xarray/tests/test_backends.py | 30 +++++-------------- 5 files changed, 19 insertions(+), 37 deletions(-) rename xarray/backends/{pnc_.py => pseudonetcdf_.py} (88%) diff --git a/xarray/backends/__init__.py b/xarray/backends/__init__.py index 9799c45dac5..47a2011a3af 100644 --- a/xarray/backends/__init__.py +++ b/xarray/backends/__init__.py @@ -10,7 +10,7 @@ from .pynio_ import NioDataStore from .scipy_ import ScipyDataStore from .h5netcdf_ import H5NetCDFStore -from .pnc_ import PncDataStore +from .pseudonetcdf_ import PseudoNetCDFDataStore from .zarr import ZarrStore __all__ = [ @@ -22,5 +22,5 @@ 'ScipyDataStore', 'H5NetCDFStore', 'ZarrStore', - 'PncDataStore', + 'PseudoNetCDFDataStore', ] diff --git a/xarray/backends/api.py b/xarray/backends/api.py index 2641079a950..f2e76a707bc 100644 --- a/xarray/backends/api.py +++ b/xarray/backends/api.py @@ -322,9 +322,8 @@ def maybe_decode_store(store, lock=False): autoclose=autoclose, **backend_kwargs) elif engine == 'pseudonetcdf': - store = backends.PncDataStore.open(filename_or_obj, - autoclose=autoclose, - **backend_kwargs) + store = backends.PseudoNetCDFDataStore.open( + filename_or_obj, autoclose=autoclose, **backend_kwargs) else: raise ValueError('unrecognized engine for open_dataset: %r' % engine) diff --git a/xarray/backends/pnc_.py b/xarray/backends/pseudonetcdf_.py similarity index 88% rename from xarray/backends/pnc_.py rename to xarray/backends/pseudonetcdf_.py index 25cc3618da1..c481bf848b9 100644 --- a/xarray/backends/pnc_.py +++ b/xarray/backends/pseudonetcdf_.py @@ -7,6 +7,7 @@ import numpy as np from .. import Variable +from ..core.pycompat import OrderedDict from ..core.utils import (FrozenOrderedDict, Frozen) from ..core import indexing @@ -39,7 +40,7 @@ def __getitem__(self, key): return array -class PncDataStore(AbstractDataStore, DataStorePickleMixin): +class PseudoNetCDFDataStore(AbstractDataStore, DataStorePickleMixin): """Store for accessing datasets via PseudoNetCDF """ @classmethod @@ -62,20 +63,16 @@ def __init__(self, pnc_dataset, mode='r', writer=None, opener=None, self._autoclose = autoclose self._isopen = True self._opener = opener - super(PncDataStore, self).__init__() - if not hasattr(self, '_mode'): - try: - self._mode = mode - except Exception: - pass + self._mode = mode + super(PseudoNetCDFDataStore, self).__init__() def open_store_variable(self, name, var): with self.ensure_open(autoclose=False): data = indexing.LazilyOuterIndexedArray( PncArrayWrapper(name, self) ) - return Variable(var.dimensions, data, dict([(k, getattr(var, k)) - for k in var.ncattrs()])) + attrs = OrderedDict((k, getattr(var, k)) for k in var.ncattrs()) + return Variable(var.dimensions, data, attrs) def get_variables(self): with self.ensure_open(autoclose=False): diff --git a/xarray/tests/__init__.py b/xarray/tests/__init__.py index 675061d7577..0925d46c932 100644 --- a/xarray/tests/__init__.py +++ b/xarray/tests/__init__.py @@ -68,7 +68,7 @@ def _importorskip(modname, minversion=None): has_netCDF4, requires_netCDF4 = _importorskip('netCDF4') has_h5netcdf, requires_h5netcdf = _importorskip('h5netcdf') has_pynio, requires_pynio = _importorskip('Nio') -has_pnc, requires_pnc = _importorskip('PseudoNetCDF') +has_pseudonetcdf, requires_pseudonetcdf = _importorskip('PseudoNetCDF') has_netcdftime, requires_netcdftime = _importorskip('netcdftime') has_dask, requires_dask = _importorskip('dask') has_bottleneck, requires_bottleneck = _importorskip('bottleneck') diff --git a/xarray/tests/test_backends.py b/xarray/tests/test_backends.py index e4ef74d6fde..3840c8f62ba 100644 --- a/xarray/tests/test_backends.py +++ b/xarray/tests/test_backends.py @@ -32,7 +32,7 @@ assert_identical, flaky, has_netCDF4, has_scipy, network, raises_regex, requires_dask, requires_h5netcdf, requires_netCDF4, requires_pathlib, requires_pydap, requires_pynio, requires_rasterio, requires_scipy, - requires_scipy_or_netCDF4, requires_zarr, requires_pnc) + requires_scipy_or_netCDF4, requires_zarr, requires_pseudonetcdf) from .test_dataset import create_test_data try: @@ -2213,8 +2213,8 @@ class PyNioTestAutocloseTrue(PyNioTest): @requires_netCDF4 -@requires_pnc -class TestPseudoNetCDF(CFEncodedDataTest, NetCDF3Only, TestCase): +@requires_pseudonetcdf +class PseudoNetCDFTest(CFEncodedDataTest, TestCase): def test_write_store(self): # pseudonetcdf is read-only for now pass @@ -2255,38 +2255,24 @@ def test_uamiv_format_read(self): def save(self, dataset, path, **kwargs): dataset.to_netcdf(path, engine='netcdf4', **kwargs) - @pytest.mark.skip(reason='does not support boolean dtype') + @pytest.mark.xfail(reason='https://github.com/pydata/xarray/issues/2061') def test_roundtrip_boolean_dtype(self): - # pnc does not support boolean dtype pass - @pytest.mark.skip(reason='does not support auto masking and scaling') + @pytest.mark.xfail(reason='https://github.com/pydata/xarray/issues/2061') def test_roundtrip_mask_and_scale(self): - # pnc does not support auto masking and scaling pass - @pytest.mark.skip(reason='does not support object types') + @pytest.mark.xfail(reason='https://github.com/pydata/xarray/issues/2061') def test_roundtrip_object_dtype(self): - # pnc does not support object types pass - @pytest.mark.skip(reason='encoding types are not consistent with xarray') + @pytest.mark.xfail(reason='https://github.com/pydata/xarray/issues/2061') def test_roundtrip_string_encoded_characters(self): - # pnc string treatment does not meet xarray expectations - # working on the right approach pass - def test_roundtrip_test_data(self): - expected = create_test_data() - with self.roundtrip(expected) as actual: - # dim3 sting type is Date: Tue, 17 Apr 2018 21:15:48 -0400 Subject: [PATCH 29/46] remove warning and update whats-new --- doc/whats-new.rst | 5 +++-- xarray/tests/test_backends.py | 5 ++++- 2 files changed, 7 insertions(+), 3 deletions(-) diff --git a/doc/whats-new.rst b/doc/whats-new.rst index 37f6ba3a964..8c6d1a074fa 100644 --- a/doc/whats-new.rst +++ b/doc/whats-new.rst @@ -147,8 +147,9 @@ Documentation Enhancements ~~~~~~~~~~~~ -- added backend for many Atmospheric data formats ranging including GEOS-Chem, - CAMx, NOAA arlpacked bit and many others. +- added a PseudoNetCDF backend for many Atmospheric data formats including + GEOS-Chem, CAMx, NOAA arlpacked bit and many others. + By `Barron Henderson `_. - reduce methods such as :py:func:`DataArray.sum()` now accepts ``dtype`` **New functions and methods**: diff --git a/xarray/tests/test_backends.py b/xarray/tests/test_backends.py index e4ef74d6fde..e0755e935bd 100644 --- a/xarray/tests/test_backends.py +++ b/xarray/tests/test_backends.py @@ -2230,7 +2230,10 @@ def test_uamiv_format_read(self): """ Open a CAMx file and test data variables """ - camxfile = open_example_dataset('example.uamiv', engine='pseudonetcdf', + with warnings.catch_warnings(): + warnings.filterwarnings('ignore', category=UserWarning, + message='IOAPI_ISPH is assumed to be 6370000.; consistent with WRF') + camxfile = open_example_dataset('example.uamiv', engine='pseudonetcdf', autoclose=False, backend_kwargs=dict(format='uamiv')) data = np.arange(20, dtype='f').reshape(1, 1, 4, 5) From 68997e00f6fd13ae796a5f26ae1257bd8b32d16a Mon Sep 17 00:00:00 2001 From: Barron Henderson Date: Tue, 17 Apr 2018 21:26:47 -0400 Subject: [PATCH 30/46] Separating isntall and io pnc doc and updating whats new --- doc/installing.rst | 7 +++++-- doc/io.rst | 5 ----- doc/whats-new.rst | 51 ++++++++++++---------------------------------- 3 files changed, 18 insertions(+), 45 deletions(-) diff --git a/doc/installing.rst b/doc/installing.rst index bb42129deea..33f01b8c770 100644 --- a/doc/installing.rst +++ b/doc/installing.rst @@ -28,6 +28,9 @@ For netCDF and IO - `cftime `__: recommended if you want to encode/decode datetimes for non-standard calendars or dates before year 1678 or after year 2262. +- `PseudoNetCDF `__: recommended + for accessing CAMx, GEOS-Chem (bpch), NOAA ARL files, ICARTT files + (ffi1001) and many other. For accelerating xarray ~~~~~~~~~~~~~~~~~~~~~~~ @@ -65,9 +68,9 @@ with its recommended dependencies using the conda command line tool:: .. _conda: http://conda.io/ -We recommend using the community maintained `conda-forge `__ channel if you need difficult\-to\-build dependencies such as cartopy or pynio:: +We recommend using the community maintained `conda-forge `__ channel if you need difficult\-to\-build dependencies such as cartopy, pynio or PseudoNetCDF:: - $ conda install -c conda-forge xarray cartopy pynio + $ conda install -c conda-forge xarray cartopy pynio pseudonetcdf New releases may also appear in conda-forge before being updated in the default channel. diff --git a/doc/io.rst b/doc/io.rst index 2761bb76385..43f4450b283 100644 --- a/doc/io.rst +++ b/doc/io.rst @@ -669,11 +669,6 @@ To use PseudoNetCDF to read such files, supply Add ``backend_kwargs={'format': ''}`` where `` options are listed on the PseudoNetCDF page. -PseudoNetCDF is available via pip and has a recipe awaiting approval -that will enable installing PseudoNetCDF via conda:: - - conda install -c conda-forge PseudoNetCDF - .. _PseuodoNetCDF: http://github.com/barronh/PseudoNetCDF diff --git a/doc/whats-new.rst b/doc/whats-new.rst index 837d01b95e3..9207849f2c7 100644 --- a/doc/whats-new.rst +++ b/doc/whats-new.rst @@ -31,59 +31,38 @@ What's New v0.10.4 (unreleased) -------------------- +Documentation +~~~~~~~~~~~~~ + Enhancements ~~~~~~~~~~~~ -- Support writing lists of strings as netCDF attributes (:issue:`2044`). - By `Dan Nowacki `_. - -Bug fixes -~~~~~~~~~ +- added a PseudoNetCDF backend for many Atmospheric data formats including + GEOS-Chem, CAMx, NOAA arlpacked bit and many others. + By `Barron Henderson `_. .. _whats-new.0.10.3: -v0.10.3 (April 13, 2018) ------------------------- +v0.10.3 (unreleased) +-------------------- -The minor release includes a number of bug-fixes and backwards compatible enhancements. +Documentation +~~~~~~~~~~~~~ Enhancements ~~~~~~~~~~~~ -- :py:meth:`~xarray.DataArray.isin` and :py:meth:`~xarray.Dataset.isin` methods, - which test each value in the array for whether it is contained in the - supplied list, returning a bool array. See :ref:`selecting values with isin` - for full details. Similar to the ``np.isin`` function. - By `Maximilian Roos `_. - -- Some speed improvement to construct :py:class:`~xarray.DataArrayRolling` - object (:issue:`1993`) - By `Keisuke Fujii `_. - -- Handle variables with different values for ``missing_value`` and - ``_FillValue`` by masking values for both attributes; previously this - resulted in a ``ValueError``. (:issue:`2016`) - By `Ryan May `_. + - Some speed improvement to construct :py:class:`~xarray.DataArrayRolling` + object (:issue:`1993`) + By `Keisuke Fujii `_. Bug fixes ~~~~~~~~~ -- Fixed ``decode_cf`` function to operate lazily on dask arrays - (:issue:`1372`). By `Ryan Abernathey `_. - Fixed labeled indexing with slice bounds given by xarray objects with datetime64 or timedelta64 dtypes (:issue:`1240`). By `Stephan Hoyer `_. -- Attempting to convert an xarray.Dataset into a numpy array now raises an - informative error message. - By `Stephan Hoyer `_. -- Fixed a bug in decode_cf_datetime where ``int32`` arrays weren't parsed - correctly (:issue:`2002`). - By `Fabien Maussion `_. -- When calling `xr.auto_combine()` or `xr.open_mfdataset()` with a `concat_dim`, - the resulting dataset will have that one-element dimension (it was - silently dropped, previously) (:issue:`1988`). - By `Ben Root `_. .. _whats-new.0.10.2: @@ -184,10 +163,6 @@ Documentation Enhancements ~~~~~~~~~~~~ -- added a PseudoNetCDF backend for many Atmospheric data formats including - GEOS-Chem, CAMx, NOAA arlpacked bit and many others. - By `Barron Henderson `_. -- reduce methods such as :py:func:`DataArray.sum()` now accepts ``dtype`` **New functions and methods**: - Added :py:meth:`DataArray.to_iris` and From 70968ca3c01799cfcbe26c60a04529e9235b6531 Mon Sep 17 00:00:00 2001 From: Barron Henderson Date: Tue, 17 Apr 2018 21:32:49 -0400 Subject: [PATCH 31/46] fixing line length in test --- xarray/tests/test_backends.py | 10 ++++++---- 1 file changed, 6 insertions(+), 4 deletions(-) diff --git a/xarray/tests/test_backends.py b/xarray/tests/test_backends.py index ee7753d6aaf..046abceb491 100644 --- a/xarray/tests/test_backends.py +++ b/xarray/tests/test_backends.py @@ -2266,10 +2266,12 @@ def test_uamiv_format_read(self): """ with warnings.catch_warnings(): warnings.filterwarnings('ignore', category=UserWarning, - message='IOAPI_ISPH is assumed to be 6370000.; consistent with WRF') - camxfile = open_example_dataset('example.uamiv', engine='pseudonetcdf', - autoclose=False, - backend_kwargs=dict(format='uamiv')) + message=('IOAPI_ISPH is assumed to be ' + + '6370000.; consistent with WRF')) + camxfile = open_example_dataset('example.uamiv', + engine='pseudonetcdf', + autoclose=False, + backend_kwargs={'format': 'uamiv'}) data = np.arange(20, dtype='f').reshape(1, 1, 4, 5) expected = xr.Variable(('TSTEP', 'LAY', 'ROW', 'COL'), data, dict(units='ppm', long_name='O3'.ljust(16), From 1f3287e70444375426a106ac33a8eda6e8bf26d2 Mon Sep 17 00:00:00 2001 From: Barron Henderson Date: Fri, 27 Apr 2018 23:36:17 -0400 Subject: [PATCH 32/46] Tests now use non-netcdf files --- xarray/tests/data/example.ict | 31 ++++++ xarray/tests/test_backends.py | 171 ++++++++++++++++++++++++++++------ 2 files changed, 176 insertions(+), 26 deletions(-) create mode 100644 xarray/tests/data/example.ict diff --git a/xarray/tests/data/example.ict b/xarray/tests/data/example.ict new file mode 100644 index 00000000000..bc04888fb80 --- /dev/null +++ b/xarray/tests/data/example.ict @@ -0,0 +1,31 @@ +27, 1001 +Henderson, Barron +U.S. EPA +Example file with artificial data +JUST_A_TEST +1, 1 +2018, 04, 27, 2018, 04, 27 +0 +Start_UTC +7 +1, 1, 1, 1, 1 +-9999, -9999, -9999, -9999, -9999 +lat, degrees_north +lon, degrees_east +elev, meters +TEST_ppbv, ppbv +TESTM_ppbv, ppbv +0 +8 +ULOD_FLAG: -7777 +ULOD_VALUE: N/A +LLOD_FLAG: -8888 +LLOD_VALUE: N/A, N/A, N/A, N/A, 0.025 +OTHER_COMMENTS: www-air.larc.nasa.gov/missions/etc/IcarttDataFormat.htm +REVISION: R0 +R0: No comments for this revision. +Start_UTC, lat, lon, elev, TEST_ppbv, TESTM_ppbv +43200, 41.00000, -71.00000, 5, 1.2345, 2.220 +46800, 42.00000, -72.00000, 15, 2.3456, -9999 +50400, 42.00000, -73.00000, 20, 3.4567, -7777 +50400, 42.00000, -74.00000, 25, 4.5678, -8888 \ No newline at end of file diff --git a/xarray/tests/test_backends.py b/xarray/tests/test_backends.py index c7b3631e67f..429a79e0a4e 100644 --- a/xarray/tests/test_backends.py +++ b/xarray/tests/test_backends.py @@ -2233,12 +2233,9 @@ class PyNioTestAutocloseTrue(PyNioTest): autoclose = True -@requires_netCDF4 @requires_pseudonetcdf -class PseudoNetCDFTest(CFEncodedDataTest, TestCase): - def test_write_store(self): - # pseudonetcdf is read-only for now - pass +class PseudoNetCDFFormatTest(TestCase): + autoclose = False @contextlib.contextmanager def open(self, path, **kwargs): @@ -2247,6 +2244,123 @@ def open(self, path, **kwargs): **kwargs) as ds: yield ds + @contextlib.contextmanager + def roundtrip(self, data, save_kwargs={}, open_kwargs={}, + allow_cleanup_failure=False): + with create_tmp_file( + allow_cleanup_failure=allow_cleanup_failure) as path: + self.save(data, path, **save_kwargs) + with self.open(path, **open_kwargs) as ds: + yield ds + + def test_ict_format(self): + """ + Open a CAMx file and test data variables + """ + ictfile = open_example_dataset('example.ict', + engine='pseudonetcdf', + autoclose=False, + backend_kwargs={'format': 'ffi1001'}) + stdattr = { + 'fill_value': -9999.0, + 'scale': 1, + 'llod_flag': -8888, + 'llod_value': 'N/A', + 'ulod_flag': -7777, + 'ulod_value': 'N/A' + } + + input = { + 'coords': {}, + 'attrs': { + 'fmt': '1001', 'n_header_lines': 27, + 'PI_NAME': 'Henderson, Barron', + 'ORGANIZATION_NAME': 'U.S. EPA', + 'SOURCE_DESCRIPTION': 'Example file with artificial data', + 'MISSION_NAME': 'JUST_A_TEST', + 'VOLUME_INFO': '1, 1', + 'SDATE': '2018, 04, 27', 'WDATE': '2018, 04, 27', + 'TIME_INTERVAL': '0', + 'INDEPENDENT_VARIABLE': 'Start_UTC', + 'ULOD_FLAG': '-7777', 'ULOD_VALUE': 'N/A', + 'LLOD_FLAG': '-8888', + 'LLOD_VALUE': ('N/A, N/A, N/A, N/A, 0.025'), + 'OTHER_COMMENTS': ('www-air.larc.nasa.gov/missions/etc/' + + 'IcarttDataFormat.htm'), + 'REVISION': 'R0', + 'R0': 'No comments for this revision.', + 'TFLAG': 'Start_UTC' + }, + 'dims': {'POINTS': 4}, + 'data_vars': { + 'Start_UTC': { + 'data': [43200.0, 46800.0, 50400.0, 50400.0], + 'dims': ('POINTS',), + 'attrs': { + 'units': 'Start_UTC', + 'standard_name': 'Start_UTC', + **stdattr + } + }, + 'lat': { + 'data': [41.0, 42.0, 42.0, 42.0], + 'dims': ('POINTS',), + 'attrs': { + 'units': 'degrees_north', + 'standard_name': 'lat', + **stdattr + } + }, + 'lon': { + 'data': [-71.0, -72.0, -73.0, -74.], + 'dims': ('POINTS',), + 'attrs': { + 'units': 'degrees_east', + 'standard_name': 'lon', + **stdattr + } + }, + 'elev': { + 'data': [5.0, 15.0, 20.0, 25.0], + 'dims': ('POINTS',), + 'attrs': { + 'units': 'meters', + 'standard_name': 'elev', + **stdattr + } + }, + 'TEST_ppbv': { + 'data': [1.2345, 2.3456, 3.4567, 4.5678], + 'dims': ('POINTS',), + 'attrs': { + 'units': 'ppbv', 'standard_name': 'TEST_ppbv', + **stdattr + } + }, + 'TESTM_ppbv': { + 'data': [2.22, np.nan, -7777.0, -8888.0], + 'dims': ('POINTS',), + 'attrs': { + **stdattr, + 'units': 'ppbv', 'standard_name': 'TESTM_ppbv', + 'llod_value': 0.025 + } + } + } + } + ictfile.to_dict()['data_vars']['TESTM_ppbv']; input['data_vars']['TESTM_ppbv'] + chkfile = Dataset.from_dict(input) + assert_identical(ictfile, chkfile) + + def test_ict_format_write(self): + expected = open_example_dataset('example.ict', + engine='pseudonetcdf', + autoclose=False, + backend_kwargs={'format': 'ffi1001'}) + with self.roundtrip(expected, save_kwargs=dict(format='ffi1001'), + open_kwargs=dict(decode_times=False)) as actual: + assert_identical(expected, actual) + def test_uamiv_format_read(self): """ Open a CAMx file and test data variables @@ -2278,28 +2392,33 @@ def test_uamiv_format_read(self): actual = camxfile.variables['time'] assert_allclose(expected, actual) - def save(self, dataset, path, **kwargs): - dataset.to_netcdf(path, engine='netcdf4', **kwargs) - - @pytest.mark.xfail(reason='https://github.com/pydata/xarray/issues/2061') - def test_roundtrip_boolean_dtype(self): - pass - - @pytest.mark.xfail(reason='https://github.com/pydata/xarray/issues/2061') - def test_roundtrip_mask_and_scale(self): - pass - - @pytest.mark.xfail(reason='https://github.com/pydata/xarray/issues/2061') - def test_roundtrip_object_dtype(self): - pass - - @pytest.mark.xfail(reason='https://github.com/pydata/xarray/issues/2061') - def test_roundtrip_string_encoded_characters(self): - pass + def test_uamiv_format_write(self): + with warnings.catch_warnings(): + warnings.filterwarnings('ignore', category=UserWarning, + message=('IOAPI_ISPH is assumed to be ' + + '6370000.; consistent with WRF')) + expected = open_example_dataset('example.uamiv', + engine='pseudonetcdf', + autoclose=False, + backend_kwargs={'format': 'uamiv'}) + with self.roundtrip(expected, + save_kwargs=dict(format='uamiv')) as actual: + assert_identical(expected, actual) - @pytest.mark.xfail(reason='https://github.com/pydata/xarray/issues/2061') - def test_unsigned_roundtrip_mask_and_scale(self): - pass + def save(self, dataset, path, **save_kwargs): + import PseudoNetCDF as pnc + pncf = pnc.PseudoNetCDFFile() + pncf.dimensions = {k: pnc.PseudoNetCDFDimension(pncf, k, v) + for k, v in dataset.dims.items()} + pncf.variables = {k: pnc.PseudoNetCDFVariable(pncf, k, v.dtype.char, + v.dims, + values=v.data[...], + **v.attrs) + for k, v in dataset.variables.items()} + for pk, pv in dataset.attrs.items(): + setattr(pncf, pk, pv) + + pnc.pncwrite(pncf, path, **save_kwargs) @requires_rasterio From abacc1db22c815ee44735c98c6296ed1b75df21f Mon Sep 17 00:00:00 2001 From: Barron Henderson Date: Sat, 28 Apr 2018 10:36:14 -0400 Subject: [PATCH 33/46] Removing unknown meta-data netcdf support. --- xarray/backends/pseudonetcdf_.py | 22 +++++++++++++++++-- xarray/tests/test_backends.py | 36 +++++++++++++++++++++++--------- 2 files changed, 46 insertions(+), 12 deletions(-) diff --git a/xarray/backends/pseudonetcdf_.py b/xarray/backends/pseudonetcdf_.py index c481bf848b9..c54d8a28b54 100644 --- a/xarray/backends/pseudonetcdf_.py +++ b/xarray/backends/pseudonetcdf_.py @@ -39,6 +39,13 @@ def __getitem__(self, key): array = indexing.NumpyIndexingAdapter(array)[np_inds] return array +_genericncf = ('Dataset', 'netcdf', 'ncf', 'nc') + +class _notnetcdf: + def __eq__(self, lhs): + return not lhs in _genericncf + + class PseudoNetCDFDataStore(AbstractDataStore, DataStorePickleMixin): """Store for accessing datasets via PseudoNetCDF @@ -46,8 +53,19 @@ class PseudoNetCDFDataStore(AbstractDataStore, DataStorePickleMixin): @classmethod def open(cls, filename, format=None, writer=None, autoclose=False, **format_kwds): - from PseudoNetCDF import pncopen - opener = functools.partial(pncopen, filename, **format_kwds) + from PseudoNetCDF._getreader import getreader, getreaderdict + readerdict = getreaderdict() + reader = getreader(filename, format=format, **format_kwds) + _genreaders = tuple([readerdict[rn] for rn in _genericncf]) + if isinstance(reader, _genreaders): + raise ValueError(('In xarray, PseudoNetCDF should not be used ' + + 'to read netcdf files with unknown metadata. ' + + 'Instead, use netcdf4. If this is a known ' + + 'format, specify it using the format keyword ' + + '(or backend_kwargs={\'format\': } from ' + + 'open_dataset).')) + + opener = functools.partial(reader, filename, **format_kwds) ds = opener() mode = format_kwds.get('mode', 'r') return cls(ds, mode=mode, writer=writer, opener=opener, diff --git a/xarray/tests/test_backends.py b/xarray/tests/test_backends.py index 429a79e0a4e..32811eb28aa 100644 --- a/xarray/tests/test_backends.py +++ b/xarray/tests/test_backends.py @@ -2237,12 +2237,25 @@ class PyNioTestAutocloseTrue(PyNioTest): class PseudoNetCDFFormatTest(TestCase): autoclose = False - @contextlib.contextmanager def open(self, path, **kwargs): - with open_dataset(path, engine='pseudonetcdf', - autoclose=self.autoclose, - **kwargs) as ds: - yield ds + return open_dataset(path, engine='pseudonetcdf', + autoclose=self.autoclose, + **kwargs) + + @pytest.mark.xfail(reason=("PseudoNetCDF should not be used with " + + "netcdf metaddata")) + def test_open_ncf(self): + ncffile = open_example_dataset('example_1.nc', + engine='pseudonetcdf', + autoclose=False, + backend_kwargs={'format': 'ncf'}) + + @pytest.mark.xfail(reason=("PseudoNetCDF should not be used with " + + "netcdf metaddata")) + def test_open_dyn_ncf(self): + ncffile = open_example_dataset('example_1.nc', + engine='pseudonetcdf', + autoclose=False) @contextlib.contextmanager def roundtrip(self, data, save_kwargs={}, open_kwargs={}, @@ -2353,12 +2366,13 @@ def test_ict_format(self): assert_identical(ictfile, chkfile) def test_ict_format_write(self): + fmtkw = {'format': 'ffi1001'} expected = open_example_dataset('example.ict', engine='pseudonetcdf', autoclose=False, - backend_kwargs={'format': 'ffi1001'}) - with self.roundtrip(expected, save_kwargs=dict(format='ffi1001'), - open_kwargs=dict(decode_times=False)) as actual: + backend_kwargs=fmtkw) + with self.roundtrip(expected, save_kwargs=fmtkw, + open_kwargs={'backend_kwargs': fmtkw}) as actual: assert_identical(expected, actual) def test_uamiv_format_read(self): @@ -2393,6 +2407,7 @@ def test_uamiv_format_read(self): assert_allclose(expected, actual) def test_uamiv_format_write(self): + fmtkw = {'format': 'uamiv'} with warnings.catch_warnings(): warnings.filterwarnings('ignore', category=UserWarning, message=('IOAPI_ISPH is assumed to be ' + @@ -2400,9 +2415,10 @@ def test_uamiv_format_write(self): expected = open_example_dataset('example.uamiv', engine='pseudonetcdf', autoclose=False, - backend_kwargs={'format': 'uamiv'}) + backend_kwargs=fmtkw) with self.roundtrip(expected, - save_kwargs=dict(format='uamiv')) as actual: + save_kwargs=fmtkw, + open_kwargs={'backend_kwargs': fmtkw}) as actual: assert_identical(expected, actual) def save(self, dataset, path, **save_kwargs): From 7d8a8eebec093eb749ea48ca28ca52cb4e6d56f2 Mon Sep 17 00:00:00 2001 From: Barron Henderson Date: Sat, 28 Apr 2018 10:40:46 -0400 Subject: [PATCH 34/46] flake8 cleanup --- xarray/backends/pseudonetcdf_.py | 5 +++-- xarray/tests/test_backends.py | 21 ++++++++++----------- 2 files changed, 13 insertions(+), 13 deletions(-) diff --git a/xarray/backends/pseudonetcdf_.py b/xarray/backends/pseudonetcdf_.py index c54d8a28b54..092ccca91c1 100644 --- a/xarray/backends/pseudonetcdf_.py +++ b/xarray/backends/pseudonetcdf_.py @@ -39,12 +39,13 @@ def __getitem__(self, key): array = indexing.NumpyIndexingAdapter(array)[np_inds] return array + _genericncf = ('Dataset', 'netcdf', 'ncf', 'nc') + class _notnetcdf: def __eq__(self, lhs): - return not lhs in _genericncf - + return lhs not in _genericncf class PseudoNetCDFDataStore(AbstractDataStore, DataStorePickleMixin): diff --git a/xarray/tests/test_backends.py b/xarray/tests/test_backends.py index eddd6ac23c5..42ff1fc9434 100644 --- a/xarray/tests/test_backends.py +++ b/xarray/tests/test_backends.py @@ -2244,21 +2244,21 @@ def open(self, path, **kwargs): return open_dataset(path, engine='pseudonetcdf', autoclose=self.autoclose, **kwargs) - - @pytest.mark.xfail(reason=("PseudoNetCDF should not be used with " + + + @pytest.mark.xfail(reason=("PseudoNetCDF should not be used with " + "netcdf metaddata")) def test_open_ncf(self): - ncffile = open_example_dataset('example_1.nc', - engine='pseudonetcdf', - autoclose=False, - backend_kwargs={'format': 'ncf'}) + open_example_dataset('example_1.nc', + engine='pseudonetcdf', + autoclose=False, + backend_kwargs={'format': 'ncf'}) - @pytest.mark.xfail(reason=("PseudoNetCDF should not be used with " + + @pytest.mark.xfail(reason=("PseudoNetCDF should not be used with " + "netcdf metaddata")) def test_open_dyn_ncf(self): - ncffile = open_example_dataset('example_1.nc', - engine='pseudonetcdf', - autoclose=False) + open_example_dataset('example_1.nc', + engine='pseudonetcdf', + autoclose=False) @contextlib.contextmanager def roundtrip(self, data, save_kwargs={}, open_kwargs={}, @@ -2364,7 +2364,6 @@ def test_ict_format(self): } } } - ictfile.to_dict()['data_vars']['TESTM_ppbv']; input['data_vars']['TESTM_ppbv'] chkfile = Dataset.from_dict(input) assert_identical(ictfile, chkfile) From 24c83767e12a84c28304cde9f4f1f5130a2dc244 Mon Sep 17 00:00:00 2001 From: Barron Henderson Date: Sat, 28 Apr 2018 10:48:04 -0400 Subject: [PATCH 35/46] Using python 2 and 3 compat testing --- xarray/tests/test_backends.py | 59 ++++++++++++++++++----------------- 1 file changed, 30 insertions(+), 29 deletions(-) diff --git a/xarray/tests/test_backends.py b/xarray/tests/test_backends.py index 42ff1fc9434..10f0f416ae6 100644 --- a/xarray/tests/test_backends.py +++ b/xarray/tests/test_backends.py @@ -2286,6 +2286,11 @@ def test_ict_format(self): 'ulod_value': 'N/A' } + def myatts(**attrs): + outattr = stdattr.copy() + outattr.update(attrs) + return outattr + input = { 'coords': {}, 'attrs': { @@ -2312,55 +2317,51 @@ def test_ict_format(self): 'Start_UTC': { 'data': [43200.0, 46800.0, 50400.0, 50400.0], 'dims': ('POINTS',), - 'attrs': { - 'units': 'Start_UTC', - 'standard_name': 'Start_UTC', - **stdattr - } + 'attrs': myatts( + units='Start_UTC', + standard_name='Start_UTC', + ) }, 'lat': { 'data': [41.0, 42.0, 42.0, 42.0], 'dims': ('POINTS',), - 'attrs': { - 'units': 'degrees_north', - 'standard_name': 'lat', - **stdattr - } + 'attrs': myatts( + units='degrees_north', + standard_name='lat', + ) }, 'lon': { 'data': [-71.0, -72.0, -73.0, -74.], 'dims': ('POINTS',), - 'attrs': { - 'units': 'degrees_east', - 'standard_name': 'lon', - **stdattr - } + 'attrs': myatts( + units='degrees_east', + standard_name='lon', + ) }, 'elev': { 'data': [5.0, 15.0, 20.0, 25.0], 'dims': ('POINTS',), - 'attrs': { - 'units': 'meters', - 'standard_name': 'elev', - **stdattr - } + 'attrs': myatts( + units='meters', + standard_name='elev', + ) }, 'TEST_ppbv': { 'data': [1.2345, 2.3456, 3.4567, 4.5678], 'dims': ('POINTS',), - 'attrs': { - 'units': 'ppbv', 'standard_name': 'TEST_ppbv', - **stdattr - } + 'attrs': myatts( + units='ppbv', + standard_name='TEST_ppbv', + ) }, 'TESTM_ppbv': { 'data': [2.22, np.nan, -7777.0, -8888.0], 'dims': ('POINTS',), - 'attrs': { - **stdattr, - 'units': 'ppbv', 'standard_name': 'TESTM_ppbv', - 'llod_value': 0.025 - } + 'attrs': myatts( + units='ppbv', + standard_name='TESTM_ppbv', + llod_value=0.025 + ) } } } From 214f51c073f3d0896f1e12a012ff167934985928 Mon Sep 17 00:00:00 2001 From: Barron Henderson Date: Sat, 28 Apr 2018 10:57:22 -0400 Subject: [PATCH 36/46] Disabling mask_and_scale by default prevents inadvertent double scaling in PNC formats --- xarray/backends/api.py | 9 +++++++-- xarray/tests/test_backends.py | 3 ++- 2 files changed, 9 insertions(+), 3 deletions(-) diff --git a/xarray/backends/api.py b/xarray/backends/api.py index fcab08a09ce..9a71385e723 100644 --- a/xarray/backends/api.py +++ b/xarray/backends/api.py @@ -145,7 +145,7 @@ def _get_lock(engine, scheduler, format, path_or_file): def open_dataset(filename_or_obj, group=None, decode_cf=True, - mask_and_scale=True, decode_times=True, autoclose=False, + mask_and_scale=None, decode_times=True, autoclose=False, concat_characters=True, decode_coords=True, engine=None, chunks=None, lock=None, cache=None, drop_variables=None, backend_kwargs=None): @@ -172,7 +172,8 @@ def open_dataset(filename_or_obj, group=None, decode_cf=True, taken from variable attributes (if they exist). If the `_FillValue` or `missing_value` attribute contains multiple values a warning will be issued and all array values matching one of the multiple values will - be replaced by NA. + be replaced by NA. mask_and_scale defaults to True except for the + pseudonetcdf backend. decode_times : bool, optional If True, decode times encoded in the standard NetCDF datetime format into datetime objects. Otherwise, leave them encoded as numbers. @@ -227,6 +228,10 @@ def open_dataset(filename_or_obj, group=None, decode_cf=True, -------- open_mfdataset """ + + if mask_and_scale is None: + mask_and_scale = not engine == 'pseudonetcdf' + if not decode_cf: mask_and_scale = False decode_times = False diff --git a/xarray/tests/test_backends.py b/xarray/tests/test_backends.py index 10f0f416ae6..249a623690b 100644 --- a/xarray/tests/test_backends.py +++ b/xarray/tests/test_backends.py @@ -2279,6 +2279,7 @@ def test_ict_format(self): backend_kwargs={'format': 'ffi1001'}) stdattr = { 'fill_value': -9999.0, + 'missing_value': -9999, 'scale': 1, 'llod_flag': -8888, 'llod_value': 'N/A', @@ -2355,7 +2356,7 @@ def myatts(**attrs): ) }, 'TESTM_ppbv': { - 'data': [2.22, np.nan, -7777.0, -8888.0], + 'data': [2.22, -9999.0, -7777.0, -8888.0], 'dims': ('POINTS',), 'attrs': myatts( units='ppbv', From 5786291fdec3bf89311f2c5b7b067ef718975d94 Mon Sep 17 00:00:00 2001 From: Barron Henderson Date: Wed, 2 May 2018 07:42:43 -0400 Subject: [PATCH 37/46] consistent with 3.0.0 Updates in 3.0.1 will fix close in uamiv. --- xarray/backends/pseudonetcdf_.py | 4 ++-- xarray/tests/test_backends.py | 12 +++++------- 2 files changed, 7 insertions(+), 9 deletions(-) diff --git a/xarray/backends/pseudonetcdf_.py b/xarray/backends/pseudonetcdf_.py index 092ccca91c1..f71859415be 100644 --- a/xarray/backends/pseudonetcdf_.py +++ b/xarray/backends/pseudonetcdf_.py @@ -40,7 +40,7 @@ def __getitem__(self, key): return array -_genericncf = ('Dataset', 'netcdf', 'ncf', 'nc') +_genericncf = ('Dataset', 'netcdf', 'ncf', 'nc', 'notreal') class _notnetcdf: @@ -57,7 +57,7 @@ def open(cls, filename, format=None, writer=None, from PseudoNetCDF._getreader import getreader, getreaderdict readerdict = getreaderdict() reader = getreader(filename, format=format, **format_kwds) - _genreaders = tuple([readerdict[rn] for rn in _genericncf]) + _genreaders = tuple([readerdict[rn] for rn in _genericncf if rn in readerdict]) if isinstance(reader, _genreaders): raise ValueError(('In xarray, PseudoNetCDF should not be used ' + 'to read netcdf files with unknown metadata. ' + diff --git a/xarray/tests/test_backends.py b/xarray/tests/test_backends.py index 249a623690b..f00a7847f0c 100644 --- a/xarray/tests/test_backends.py +++ b/xarray/tests/test_backends.py @@ -2260,14 +2260,12 @@ def test_open_dyn_ncf(self): engine='pseudonetcdf', autoclose=False) - @contextlib.contextmanager def roundtrip(self, data, save_kwargs={}, open_kwargs={}, allow_cleanup_failure=False): with create_tmp_file( allow_cleanup_failure=allow_cleanup_failure) as path: self.save(data, path, **save_kwargs) - with self.open(path, **open_kwargs) as ds: - yield ds + return self.open(path, **open_kwargs) def test_ict_format(self): """ @@ -2420,10 +2418,10 @@ def test_uamiv_format_write(self): engine='pseudonetcdf', autoclose=False, backend_kwargs=fmtkw) - with self.roundtrip(expected, - save_kwargs=fmtkw, - open_kwargs={'backend_kwargs': fmtkw}) as actual: - assert_identical(expected, actual) + actual = self.roundtrip(expected, + save_kwargs=fmtkw, + open_kwargs={'backend_kwargs': fmtkw}) + assert_identical(expected, actual) def save(self, dataset, path, **save_kwargs): import PseudoNetCDF as pnc From 066cdd529154dad15caf05a7e2744dec4e978453 Mon Sep 17 00:00:00 2001 From: Barron Henderson Date: Wed, 2 May 2018 07:45:01 -0400 Subject: [PATCH 38/46] Updating readers and line length --- xarray/backends/pseudonetcdf_.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/xarray/backends/pseudonetcdf_.py b/xarray/backends/pseudonetcdf_.py index f71859415be..a1d2869e497 100644 --- a/xarray/backends/pseudonetcdf_.py +++ b/xarray/backends/pseudonetcdf_.py @@ -40,7 +40,8 @@ def __getitem__(self, key): return array -_genericncf = ('Dataset', 'netcdf', 'ncf', 'nc', 'notreal') +_genericncf = ('Dataset', 'netcdf', 'ncf', 'nc') +_genreaders = tuple([readerdict[rn] for rn in _genericncf if rn in readerdict]) class _notnetcdf: @@ -57,7 +58,6 @@ def open(cls, filename, format=None, writer=None, from PseudoNetCDF._getreader import getreader, getreaderdict readerdict = getreaderdict() reader = getreader(filename, format=format, **format_kwds) - _genreaders = tuple([readerdict[rn] for rn in _genericncf if rn in readerdict]) if isinstance(reader, _genreaders): raise ValueError(('In xarray, PseudoNetCDF should not be used ' + 'to read netcdf files with unknown metadata. ' + From 9231e3f9de4da2c36136f44c981a1aaad1593d74 Mon Sep 17 00:00:00 2001 From: Barron Henderson Date: Wed, 2 May 2018 07:46:16 -0400 Subject: [PATCH 39/46] Updating readers and line length --- xarray/backends/pseudonetcdf_.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/xarray/backends/pseudonetcdf_.py b/xarray/backends/pseudonetcdf_.py index a1d2869e497..0496e7160dc 100644 --- a/xarray/backends/pseudonetcdf_.py +++ b/xarray/backends/pseudonetcdf_.py @@ -41,7 +41,6 @@ def __getitem__(self, key): _genericncf = ('Dataset', 'netcdf', 'ncf', 'nc') -_genreaders = tuple([readerdict[rn] for rn in _genericncf if rn in readerdict]) class _notnetcdf: @@ -56,6 +55,8 @@ class PseudoNetCDFDataStore(AbstractDataStore, DataStorePickleMixin): def open(cls, filename, format=None, writer=None, autoclose=False, **format_kwds): from PseudoNetCDF._getreader import getreader, getreaderdict + _genreaders = tuple([readerdict[rn] for rn in _genericncf + if rn in readerdict]) readerdict = getreaderdict() reader = getreader(filename, format=format, **format_kwds) if isinstance(reader, _genreaders): From 80d03a70101803cb83034d869b26df747dbf0888 Mon Sep 17 00:00:00 2001 From: Barron Henderson Date: Wed, 2 May 2018 07:46:48 -0400 Subject: [PATCH 40/46] Updating readers and line length --- xarray/backends/pseudonetcdf_.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/xarray/backends/pseudonetcdf_.py b/xarray/backends/pseudonetcdf_.py index 0496e7160dc..196671b7590 100644 --- a/xarray/backends/pseudonetcdf_.py +++ b/xarray/backends/pseudonetcdf_.py @@ -55,9 +55,9 @@ class PseudoNetCDFDataStore(AbstractDataStore, DataStorePickleMixin): def open(cls, filename, format=None, writer=None, autoclose=False, **format_kwds): from PseudoNetCDF._getreader import getreader, getreaderdict + readerdict = getreaderdict() _genreaders = tuple([readerdict[rn] for rn in _genericncf if rn in readerdict]) - readerdict = getreaderdict() reader = getreader(filename, format=format, **format_kwds) if isinstance(reader, _genreaders): raise ValueError(('In xarray, PseudoNetCDF should not be used ' + From d2c01de2a18832f342daf9f75a87d529cf46869d Mon Sep 17 00:00:00 2001 From: Barron Henderson Date: Sun, 13 May 2018 10:28:48 -0400 Subject: [PATCH 41/46] Adding open_mfdataset test Testing by opening same file twice and stacking it. --- xarray/backends/api.py | 5 ++-- xarray/tests/test_backends.py | 46 ++++++++++++++++++++++++++++++++--- 2 files changed, 45 insertions(+), 6 deletions(-) diff --git a/xarray/backends/api.py b/xarray/backends/api.py index 9a71385e723..99b80b2f263 100644 --- a/xarray/backends/api.py +++ b/xarray/backends/api.py @@ -347,7 +347,7 @@ def maybe_decode_store(store, lock=False): return maybe_decode_store(store) -def open_dataarray(filename_or_obj, group=None, decode_cf=True, +def open_dataarray(filename_or_obj, group=None, decode_cf=None, mask_and_scale=True, decode_times=True, autoclose=False, concat_characters=True, decode_coords=True, engine=None, chunks=None, lock=None, cache=None, drop_variables=None, @@ -378,7 +378,8 @@ def open_dataarray(filename_or_obj, group=None, decode_cf=True, taken from variable attributes (if they exist). If the `_FillValue` or `missing_value` attribute contains multiple values a warning will be issued and all array values matching one of the multiple values will - be replaced by NA. + be replaced by NA. mask_and_scale defaults to True except for the + pseudonetcdf backend. decode_times : bool, optional If True, decode times encoded in the standard NetCDF datetime format into datetime objects. Otherwise, leave them encoded as numbers. diff --git a/xarray/tests/test_backends.py b/xarray/tests/test_backends.py index f00a7847f0c..5b6d4a4c39a 100644 --- a/xarray/tests/test_backends.py +++ b/xarray/tests/test_backends.py @@ -62,6 +62,13 @@ def open_example_dataset(name, *args, **kwargs): *args, **kwargs) +def open_example_mfdataset(names, *args, **kwargs): + return open_mfdataset( + [os.path.join(os.path.dirname(__file__), 'data', name) + for name in names], + *args, **kwargs) + + def create_masked_and_scaled_data(): x = np.array([np.nan, np.nan, 10, 10.1, 10.2], dtype=np.float32) encoding = {'_FillValue': -1, 'add_offset': 10, @@ -2396,10 +2403,41 @@ def test_uamiv_format_read(self): actual = camxfile.variables['O3'] assert_allclose(expected, actual) - data = np.array( - ['2002-06-03T00:00:00.000000000'], - dtype='datetime64[ns]' - ) + data = np.array([1.02306240e+09]) + expected = xr.Variable(('TSTEP',), data, + dict(bounds='time_bounds', + long_name=('synthesized time coordinate ' + + 'from SDATE, STIME, STEP ' + + 'global attributes'))) + actual = camxfile.variables['time'] + assert_allclose(expected, actual) + + def test_uamiv_format_mfread(self): + """ + Open a CAMx file and test data variables + """ + with warnings.catch_warnings(): + warnings.filterwarnings('ignore', category=UserWarning, + message=('IOAPI_ISPH is assumed to be ' + + '6370000.; consistent with WRF')) + camxfile = open_example_mfdataset( + ['example.uamiv', + 'example.uamiv'], + engine='pseudonetcdf', + autoclose=False, + concat_dim='TSTEP', + backend_kwargs={'format': 'uamiv'}) + + data1 = np.arange(20, dtype='f').reshape(1, 1, 4, 5) + data = np.concatenate([data1] * 2, axis=0) + expected = xr.Variable(('TSTEP', 'LAY', 'ROW', 'COL'), data, + dict(units='ppm', long_name='O3'.ljust(16), + var_desc='O3'.ljust(80))) + actual = camxfile.variables['O3'] + assert_allclose(expected, actual) + + data1 = np.array([1.02306240e+09]) + data = np.concatenate([data1] * 2, axis=0) expected = xr.Variable(('TSTEP',), data, dict(bounds='time_bounds', long_name=('synthesized time coordinate ' + From eaa37fe37d8c2a45d1da03d360c95ea08b325ba2 Mon Sep 17 00:00:00 2001 From: Barron Henderson Date: Tue, 29 May 2018 21:15:14 -0400 Subject: [PATCH 42/46] Using conda version of PseudoNetCDF --- ci/requirements-py36.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/ci/requirements-py36.yml b/ci/requirements-py36.yml index a54b9a99015..fd63fe26130 100644 --- a/ci/requirements-py36.yml +++ b/ci/requirements-py36.yml @@ -20,9 +20,9 @@ dependencies: - rasterio - bottleneck - zarr + - pseudonetcdf>=3.0.1 - pip: - coveralls - pytest-cov - pydap - lxml - - PseudoNetCDF From 590e9192e3c35006ffd1d0f3c3b6b402728881cc Mon Sep 17 00:00:00 2001 From: Barron Henderson Date: Tue, 29 May 2018 21:31:14 -0400 Subject: [PATCH 43/46] Removing xfail for netcdf Mask and scale with PseudoNetCDF and NetCDF4 is not supported, but not prevented. --- xarray/backends/pseudonetcdf_.py | 24 ++-------------------- xarray/tests/test_backends.py | 35 +++++++++++--------------------- 2 files changed, 14 insertions(+), 45 deletions(-) diff --git a/xarray/backends/pseudonetcdf_.py b/xarray/backends/pseudonetcdf_.py index 196671b7590..c481bf848b9 100644 --- a/xarray/backends/pseudonetcdf_.py +++ b/xarray/backends/pseudonetcdf_.py @@ -40,34 +40,14 @@ def __getitem__(self, key): return array -_genericncf = ('Dataset', 'netcdf', 'ncf', 'nc') - - -class _notnetcdf: - def __eq__(self, lhs): - return lhs not in _genericncf - - class PseudoNetCDFDataStore(AbstractDataStore, DataStorePickleMixin): """Store for accessing datasets via PseudoNetCDF """ @classmethod def open(cls, filename, format=None, writer=None, autoclose=False, **format_kwds): - from PseudoNetCDF._getreader import getreader, getreaderdict - readerdict = getreaderdict() - _genreaders = tuple([readerdict[rn] for rn in _genericncf - if rn in readerdict]) - reader = getreader(filename, format=format, **format_kwds) - if isinstance(reader, _genreaders): - raise ValueError(('In xarray, PseudoNetCDF should not be used ' + - 'to read netcdf files with unknown metadata. ' + - 'Instead, use netcdf4. If this is a known ' + - 'format, specify it using the format keyword ' + - '(or backend_kwargs={\'format\': } from ' + - 'open_dataset).')) - - opener = functools.partial(reader, filename, **format_kwds) + from PseudoNetCDF import pncopen + opener = functools.partial(pncopen, filename, **format_kwds) ds = opener() mode = format_kwds.get('mode', 'r') return cls(ds, mode=mode, writer=writer, opener=opener, diff --git a/xarray/tests/test_backends.py b/xarray/tests/test_backends.py index a6973c7c499..da8e83121d1 100644 --- a/xarray/tests/test_backends.py +++ b/xarray/tests/test_backends.py @@ -2431,34 +2431,21 @@ class PyNioTestAutocloseTrue(PyNioTest): @requires_pseudonetcdf class PseudoNetCDFFormatTest(TestCase): - autoclose = False + autoclose = True def open(self, path, **kwargs): return open_dataset(path, engine='pseudonetcdf', autoclose=self.autoclose, **kwargs) - @pytest.mark.xfail(reason=("PseudoNetCDF should not be used with " + - "netcdf metaddata")) - def test_open_ncf(self): - open_example_dataset('example_1.nc', - engine='pseudonetcdf', - autoclose=False, - backend_kwargs={'format': 'ncf'}) - - @pytest.mark.xfail(reason=("PseudoNetCDF should not be used with " + - "netcdf metaddata")) - def test_open_dyn_ncf(self): - open_example_dataset('example_1.nc', - engine='pseudonetcdf', - autoclose=False) - + @contextlib.contextmanager def roundtrip(self, data, save_kwargs={}, open_kwargs={}, allow_cleanup_failure=False): with create_tmp_file( allow_cleanup_failure=allow_cleanup_failure) as path: self.save(data, path, **save_kwargs) - return self.open(path, **open_kwargs) + with self.open(path, **open_kwargs) as ds: + yield ds def test_ict_format(self): """ @@ -2580,7 +2567,7 @@ def test_uamiv_format_read(self): '6370000.; consistent with WRF')) camxfile = open_example_dataset('example.uamiv', engine='pseudonetcdf', - autoclose=False, + autoclose=True, backend_kwargs={'format': 'uamiv'}) data = np.arange(20, dtype='f').reshape(1, 1, 4, 5) expected = xr.Variable(('TSTEP', 'LAY', 'ROW', 'COL'), data, @@ -2597,6 +2584,7 @@ def test_uamiv_format_read(self): 'global attributes'))) actual = camxfile.variables['time'] assert_allclose(expected, actual) + camxfile.close() def test_uamiv_format_mfread(self): """ @@ -2610,7 +2598,7 @@ def test_uamiv_format_mfread(self): ['example.uamiv', 'example.uamiv'], engine='pseudonetcdf', - autoclose=False, + autoclose=True, concat_dim='TSTEP', backend_kwargs={'format': 'uamiv'}) @@ -2631,6 +2619,7 @@ def test_uamiv_format_mfread(self): 'global attributes'))) actual = camxfile.variables['time'] assert_allclose(expected, actual) + camxfile.close() def test_uamiv_format_write(self): fmtkw = {'format': 'uamiv'} @@ -2642,10 +2631,10 @@ def test_uamiv_format_write(self): engine='pseudonetcdf', autoclose=False, backend_kwargs=fmtkw) - actual = self.roundtrip(expected, - save_kwargs=fmtkw, - open_kwargs={'backend_kwargs': fmtkw}) - assert_identical(expected, actual) + with self.roundtrip(expected, + save_kwargs=fmtkw, + open_kwargs={'backend_kwargs': fmtkw}) as actual: + assert_identical(expected, actual) def save(self, dataset, path, **save_kwargs): import PseudoNetCDF as pnc From 989fa4b2456b17d6165d3f0f6e6543825969e2bb Mon Sep 17 00:00:00 2001 From: Barron Henderson Date: Tue, 29 May 2018 21:44:04 -0400 Subject: [PATCH 44/46] Moving pseudonetcdf to v0.15 --- doc/whats-new.rst | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/doc/whats-new.rst b/doc/whats-new.rst index cc51de6b629..4e85a984d09 100644 --- a/doc/whats-new.rst +++ b/doc/whats-new.rst @@ -37,6 +37,10 @@ Documentation Enhancements ~~~~~~~~~~~~ +- added a PseudoNetCDF backend for many Atmospheric data formats including + GEOS-Chem, CAMx, NOAA arlpacked bit and many others. + By `Barron Henderson `_. + - :py:meth:`~DataArray.cumsum` and :py:meth:`~DataArray.cumprod` now support aggregation over multiple dimensions at the same time. This is the default behavior when dimensions are not specified (previously this raised an error). @@ -117,9 +121,6 @@ Documentation Enhancements ~~~~~~~~~~~~ -- added a PseudoNetCDF backend for many Atmospheric data formats including - GEOS-Chem, CAMx, NOAA arlpacked bit and many others. - By `Barron Henderson `_. - Add an option for using a ``CFTimeIndex`` for indexing times with non-standard calendars and/or outside the Timestamp-valid range; this index enables a subset of the functionality of a standard From d71bb6081a5a31d4308337b9e10aa7320fc6f29c Mon Sep 17 00:00:00 2001 From: Barron Henderson Date: Tue, 29 May 2018 22:12:27 -0400 Subject: [PATCH 45/46] Updating what's new --- doc/whats-new.rst | 19 +++++++++++++++---- 1 file changed, 15 insertions(+), 4 deletions(-) diff --git a/doc/whats-new.rst b/doc/whats-new.rst index 4e85a984d09..3a40566e7fe 100644 --- a/doc/whats-new.rst +++ b/doc/whats-new.rst @@ -190,11 +190,10 @@ Bug fixes .. _whats-new.0.10.3: -v0.10.3 (unreleased) --------------------- +v0.10.3 (April 13, 2018) +------------------------ -Documentation -~~~~~~~~~~~~~ +The minor release includes a number of bug-fixes and backwards compatible enhancements. Enhancements ~~~~~~~~~~~~ @@ -215,9 +214,21 @@ Enhancements Bug fixes ~~~~~~~~~ +- Fixed ``decode_cf`` function to operate lazily on dask arrays + (:issue:`1372`). By `Ryan Abernathey `_. - Fixed labeled indexing with slice bounds given by xarray objects with datetime64 or timedelta64 dtypes (:issue:`1240`). By `Stephan Hoyer `_. +- Attempting to convert an xarray.Dataset into a numpy array now raises an + informative error message. + By `Stephan Hoyer `_. +- Fixed a bug in decode_cf_datetime where ``int32`` arrays weren't parsed + correctly (:issue:`2002`). + By `Fabien Maussion `_. +- When calling `xr.auto_combine()` or `xr.open_mfdataset()` with a `concat_dim`, + the resulting dataset will have that one-element dimension (it was + silently dropped, previously) (:issue:`1988`). + By `Ben Root `_. .. _whats-new.0.10.2: From b9b64cad5266d7275bf778e39715e325cbae3ceb Mon Sep 17 00:00:00 2001 From: Barron Henderson Date: Tue, 29 May 2018 22:13:39 -0400 Subject: [PATCH 46/46] Fixing open_dataarray CF options mask_and_scale is None (diagnosed by open_dataset) and decode_cf should be True --- xarray/backends/api.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/xarray/backends/api.py b/xarray/backends/api.py index adec980e9a6..753f8394a7b 100644 --- a/xarray/backends/api.py +++ b/xarray/backends/api.py @@ -354,8 +354,8 @@ def maybe_decode_store(store, lock=False): return maybe_decode_store(store) -def open_dataarray(filename_or_obj, group=None, decode_cf=None, - mask_and_scale=True, decode_times=True, autoclose=False, +def open_dataarray(filename_or_obj, group=None, decode_cf=True, + mask_and_scale=None, decode_times=True, autoclose=False, concat_characters=True, decode_coords=True, engine=None, chunks=None, lock=None, cache=None, drop_variables=None, backend_kwargs=None):