Skip to content

Commit 32365b3

Browse files
committed
Merge pull request #532 from markelg/master
Add a --drop-variables flag to xray.open_dataset to exclude certain variables
2 parents 43315a9 + 3406de3 commit 32365b3

File tree

4 files changed

+44
-9
lines changed

4 files changed

+44
-9
lines changed

doc/whats-new.rst

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -87,7 +87,9 @@ v0.5.3 (unreleased)
8787
da = xray.DataArray(np.random.random_sample(size=(5, 4)))
8888
da.where(da < 0.5)
8989
da.where(da < 0.5).to_masked_array(copy=True)
90-
90+
- Added new flag "drop_variables" to :py:meth:`~xray.open_dataset` for
91+
excluding variables from being parsed. This may be useful to drop
92+
variables with problems or inconsistent values.
9193

9294
Bug fixes
9395
~~~~~~~~~

xray/backends/api.py

Lines changed: 7 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -64,7 +64,7 @@ def _default_lock(filename, engine):
6464
def open_dataset(filename_or_obj, group=None, decode_cf=True,
6565
mask_and_scale=True, decode_times=True,
6666
concat_characters=True, decode_coords=True, engine=None,
67-
chunks=None, lock=None):
67+
chunks=None, lock=None, drop_variables=None):
6868
"""Load and decode a dataset from a file or file-like object.
6969
7070
Parameters
@@ -114,6 +114,10 @@ def open_dataset(filename_or_obj, group=None, decode_cf=True,
114114
used when reading data from netCDF files with the netcdf4 and h5netcdf
115115
engines to avoid issues with concurrent access when using dask's
116116
multithreaded backend.
117+
drop_variables: string or iterable, optional
118+
A variable or list of variables to exclude from being parsed from the
119+
dataset. This may be useful to drop variables with problems or
120+
inconsistent values.
117121
118122
Returns
119123
-------
@@ -133,7 +137,8 @@ def open_dataset(filename_or_obj, group=None, decode_cf=True,
133137
def maybe_decode_store(store, lock=False):
134138
ds = conventions.decode_cf(
135139
store, mask_and_scale=mask_and_scale, decode_times=decode_times,
136-
concat_characters=concat_characters, decode_coords=decode_coords)
140+
concat_characters=concat_characters, decode_coords=decode_coords,
141+
drop_variables=drop_variables)
137142
if chunks is not None:
138143
ds = ds.chunk(chunks, lock=lock)
139144
return ds

xray/conventions.py

Lines changed: 17 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -12,7 +12,7 @@
1212
from .core.formatting import format_timestamp, first_n_items
1313
from .core.variable import as_variable, Variable
1414
from .core.pycompat import (iteritems, bytes_type, unicode_type, OrderedDict,
15-
PY3)
15+
PY3, basestring)
1616

1717

1818
# standard calendars recognized by netcdftime
@@ -783,7 +783,7 @@ def decode_cf_variable(var, concat_characters=True, mask_and_scale=True,
783783

784784
def decode_cf_variables(variables, attributes, concat_characters=True,
785785
mask_and_scale=True, decode_times=True,
786-
decode_coords=True):
786+
decode_coords=True, drop_variables=None):
787787
"""
788788
Decode a several CF encoded variables.
789789
@@ -805,8 +805,16 @@ def stackable(dim):
805805

806806
coord_names = set()
807807

808+
if isinstance(drop_variables, basestring):
809+
drop_variables = [drop_variables]
810+
elif drop_variables is None:
811+
drop_variables = []
812+
drop_variables = set(drop_variables)
813+
808814
new_vars = OrderedDict()
809815
for k, v in iteritems(variables):
816+
if k in drop_variables:
817+
continue
810818
concat = (concat_characters and v.dtype.kind == 'S' and v.ndim > 0 and
811819
stackable(v.dims[-1]))
812820
new_vars[k] = decode_cf_variable(
@@ -828,7 +836,7 @@ def stackable(dim):
828836

829837

830838
def decode_cf(obj, concat_characters=True, mask_and_scale=True,
831-
decode_times=True, decode_coords=True):
839+
decode_times=True, decode_coords=True, drop_variables=None):
832840
"""Decode the given Dataset or Datastore according to CF conventions into
833841
a new Dataset.
834842
@@ -848,7 +856,11 @@ def decode_cf(obj, concat_characters=True, mask_and_scale=True,
848856
decode_coords : bool, optional
849857
Use the 'coordinates' attribute on variable (or the dataset itself) to
850858
identify coordinates.
851-
859+
drop_variables: string or iterable, optional
860+
A variable or list of variables to exclude from being parsed from the
861+
dataset.This may be useful to drop variables with problems or
862+
inconsistent values.
863+
852864
Returns
853865
-------
854866
decoded : Dataset
@@ -870,7 +882,7 @@ def decode_cf(obj, concat_characters=True, mask_and_scale=True,
870882

871883
vars, attrs, coord_names = decode_cf_variables(
872884
vars, attrs, concat_characters, mask_and_scale, decode_times,
873-
decode_coords)
885+
decode_coords, drop_variables=drop_variables)
874886
ds = Dataset(vars, attrs=attrs)
875887
ds = ds.set_coords(coord_names.union(extra_coords))
876888
ds._file_obj = file_obj

xray/test/test_conventions.py

Lines changed: 17 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -491,7 +491,23 @@ def test_decode_cf_with_multiple_missing_values(self):
491491
actual = conventions.decode_cf_variable(original)
492492
self.assertDatasetIdentical(expected, actual)
493493
self.assertIn('variable has multiple fill', str(w[0].message))
494-
494+
def test_decode_cf_with_drop_variables(self):
495+
original = Dataset({
496+
't': ('t', [0, 1, 2], {'units': 'days since 2000-01-01'}),
497+
'x' : ("x", [9, 8, 7], {'units' : 'km'}),
498+
'foo': (('t', 'x'), [[0, 0, 0], [1, 1, 1], [2, 2, 2]], {'units': 'bar'}),
499+
'y': ('t', [5, 10, -999], {'_FillValue': -999})
500+
})
501+
expected = Dataset({
502+
't': pd.date_range('2000-01-01', periods=3),
503+
'x' : ("x", [0, 1, 2]),
504+
'foo': (('t', 'x'), [[0, 0, 0], [1, 1, 1], [2, 2, 2]], {'units': 'bar'}),
505+
'y': ('t', [5, 10, np.nan])
506+
})
507+
actual = conventions.decode_cf(original, drop_variables=("x",))
508+
actual2 = conventions.decode_cf(original, drop_variables="x")
509+
self.assertDatasetIdentical(expected, actual)
510+
self.assertDatasetIdentical(expected, actual2)
495511

496512
class CFEncodedInMemoryStore(InMemoryDataStore):
497513
def store(self, variables, attributes):

0 commit comments

Comments
 (0)