Skip to content

Tweaks for opening datasets #895

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Closed
wants to merge 2 commits into from
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
15 changes: 12 additions & 3 deletions xarray/backends/api.py
Original file line number Diff line number Diff line change
Expand Up @@ -81,7 +81,8 @@ def check_name(name):
def open_dataset(filename_or_obj, group=None, decode_cf=True,
mask_and_scale=True, decode_times=True,
concat_characters=True, decode_coords=True, engine=None,
chunks=None, lock=None, drop_variables=None):
chunks=None, lock=None, drop_variables=None,
only_variables=None, format=''):
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

The default should be format=None.

"""Load and decode a dataset from a file or file-like object.

Parameters
Expand Down Expand Up @@ -135,6 +136,14 @@ def open_dataset(filename_or_obj, group=None, decode_cf=True,
A variable or list of variables to exclude from being parsed from the
dataset. This may be useful to drop variables with problems or
inconsistent values.
only_variables: string or iterable, optional
A variable or list of variables to load from the dataset. This is
useful if you don't need all the variables in the file and don't want
to spend time loading them. Default is to load all variables.
format: string, optional
The format of the file to open (PyNIO engine only). This may be useful
Copy link
Member

@shoyer shoyer Jul 12, 2016

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I would simply say "currently only used by the PyNIO engine". We might eventually use this for other reasons, too, e.g., for selecting the fastest engine to use when open netCDF3 files (#887).

for files with malformed names. Acceptable values are those formats
handled by PyNIO; default is to let it autodetect the format.

Returns
-------
Expand All @@ -155,7 +164,7 @@ def maybe_decode_store(store, lock=False):
ds = conventions.decode_cf(
store, mask_and_scale=mask_and_scale, decode_times=decode_times,
concat_characters=concat_characters, decode_coords=decode_coords,
drop_variables=drop_variables)
drop_variables=drop_variables, only_variables=only_variables)

if chunks is not None:
try:
Expand Down Expand Up @@ -214,7 +223,7 @@ def maybe_decode_store(store, lock=False):
elif engine == 'h5netcdf':
store = backends.H5NetCDFStore(filename_or_obj, group=group)
elif engine == 'pynio':
store = backends.NioDataStore(filename_or_obj)
store = backends.NioDataStore(filename_or_obj, format=format)
else:
raise ValueError('unrecognized engine for open_dataset: %r'
% engine)
Expand Down
4 changes: 2 additions & 2 deletions xarray/backends/pynio_.py
Original file line number Diff line number Diff line change
Expand Up @@ -25,9 +25,9 @@ def __getitem__(self, key):
class NioDataStore(AbstractDataStore):
"""Store for accessing datasets via PyNIO
"""
def __init__(self, filename, mode='r'):
def __init__(self, filename, format=None, mode='r'):
import Nio
self.ds = Nio.open_file(filename, mode=mode)
self.ds = Nio.open_file(filename, format=format, mode=mode)

def open_store_variable(self, var):
data = indexing.LazilyIndexedArray(NioArrayWrapper(var, self.ds))
Expand Down
20 changes: 16 additions & 4 deletions xarray/conventions.py
Original file line number Diff line number Diff line change
Expand Up @@ -824,7 +824,8 @@ def decode_cf_variable(var, concat_characters=True, mask_and_scale=True,

def decode_cf_variables(variables, attributes, concat_characters=True,
mask_and_scale=True, decode_times=True,
decode_coords=True, drop_variables=None):
decode_coords=True, drop_variables=None,
only_variables=None):
"""
Decode a several CF encoded variables.

Expand Down Expand Up @@ -852,9 +853,15 @@ def stackable(dim):
drop_variables = []
drop_variables = set(drop_variables)

if isinstance(only_variables, basestring):
only_variables = [only_variables]
elif only_variables is None:
only_variables = []
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This is not quite the right logic here. If a user supplies only_variables=[], then we should give them an empty dataset. We need to retain a distinction between None and [] for keeping variables.

only_variables = set(only_variables)

new_vars = OrderedDict()
for k, v in iteritems(variables):
if k in drop_variables:
if (only_variables != set([]) and k not in only_variables) or k in drop_variables:
continue
concat = (concat_characters and v.dtype.kind == 'S' and v.ndim > 0 and
stackable(v.dims[-1]))
Expand All @@ -879,7 +886,8 @@ def stackable(dim):


def decode_cf(obj, concat_characters=True, mask_and_scale=True,
decode_times=True, decode_coords=True, drop_variables=None):
decode_times=True, decode_coords=True, drop_variables=None,
only_variables=None):
"""Decode the given Dataset or Datastore according to CF conventions into
a new Dataset.

Expand All @@ -903,6 +911,10 @@ def decode_cf(obj, concat_characters=True, mask_and_scale=True,
A variable or list of variables to exclude from being parsed from the
dataset.This may be useful to drop variables with problems or
inconsistent values.
only_variables: string or iterable, optional
A variable or list of variables to load from the dataset. This is
useful if you don't need all the variables in the file and don't want
to spend time loading them. Default is to load all variables.

Returns
-------
Expand All @@ -925,7 +937,7 @@ def decode_cf(obj, concat_characters=True, mask_and_scale=True,

vars, attrs, coord_names = decode_cf_variables(
vars, attrs, concat_characters, mask_and_scale, decode_times,
decode_coords, drop_variables=drop_variables)
decode_coords, drop_variables=drop_variables, only_variables=only_variables)
ds = Dataset(vars, attrs=attrs)
ds = ds.set_coords(coord_names.union(extra_coords))
ds._file_obj = file_obj
Expand Down