-
-
Notifications
You must be signed in to change notification settings - Fork 1.1k
Tweaks for opening datasets #895
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Changes from all commits
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -81,7 +81,8 @@ def check_name(name): | |
def open_dataset(filename_or_obj, group=None, decode_cf=True, | ||
mask_and_scale=True, decode_times=True, | ||
concat_characters=True, decode_coords=True, engine=None, | ||
chunks=None, lock=None, drop_variables=None): | ||
chunks=None, lock=None, drop_variables=None, | ||
only_variables=None, format=''): | ||
"""Load and decode a dataset from a file or file-like object. | ||
|
||
Parameters | ||
|
@@ -135,6 +136,14 @@ def open_dataset(filename_or_obj, group=None, decode_cf=True, | |
A variable or list of variables to exclude from being parsed from the | ||
dataset. This may be useful to drop variables with problems or | ||
inconsistent values. | ||
only_variables: string or iterable, optional | ||
A variable or list of variables to load from the dataset. This is | ||
useful if you don't need all the variables in the file and don't want | ||
to spend time loading them. Default is to load all variables. | ||
format: string, optional | ||
The format of the file to open (PyNIO engine only). This may be useful | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I would simply say "currently only used by the PyNIO engine". We might eventually use this for other reasons, too, e.g., for selecting the fastest engine to use when open netCDF3 files (#887). |
||
for files with malformed names. Acceptable values are those formats | ||
handled by PyNIO; default is to let it autodetect the format. | ||
|
||
Returns | ||
------- | ||
|
@@ -155,7 +164,7 @@ def maybe_decode_store(store, lock=False): | |
ds = conventions.decode_cf( | ||
store, mask_and_scale=mask_and_scale, decode_times=decode_times, | ||
concat_characters=concat_characters, decode_coords=decode_coords, | ||
drop_variables=drop_variables) | ||
drop_variables=drop_variables, only_variables=only_variables) | ||
|
||
if chunks is not None: | ||
try: | ||
|
@@ -214,7 +223,7 @@ def maybe_decode_store(store, lock=False): | |
elif engine == 'h5netcdf': | ||
store = backends.H5NetCDFStore(filename_or_obj, group=group) | ||
elif engine == 'pynio': | ||
store = backends.NioDataStore(filename_or_obj) | ||
store = backends.NioDataStore(filename_or_obj, format=format) | ||
else: | ||
raise ValueError('unrecognized engine for open_dataset: %r' | ||
% engine) | ||
|
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -824,7 +824,8 @@ def decode_cf_variable(var, concat_characters=True, mask_and_scale=True, | |
|
||
def decode_cf_variables(variables, attributes, concat_characters=True, | ||
mask_and_scale=True, decode_times=True, | ||
decode_coords=True, drop_variables=None): | ||
decode_coords=True, drop_variables=None, | ||
only_variables=None): | ||
""" | ||
Decode a several CF encoded variables. | ||
|
||
|
@@ -852,9 +853,15 @@ def stackable(dim): | |
drop_variables = [] | ||
drop_variables = set(drop_variables) | ||
|
||
if isinstance(only_variables, basestring): | ||
only_variables = [only_variables] | ||
elif only_variables is None: | ||
only_variables = [] | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. This is not quite the right logic here. If a user supplies |
||
only_variables = set(only_variables) | ||
|
||
new_vars = OrderedDict() | ||
for k, v in iteritems(variables): | ||
if k in drop_variables: | ||
if (only_variables != set([]) and k not in only_variables) or k in drop_variables: | ||
continue | ||
concat = (concat_characters and v.dtype.kind == 'S' and v.ndim > 0 and | ||
stackable(v.dims[-1])) | ||
|
@@ -879,7 +886,8 @@ def stackable(dim): | |
|
||
|
||
def decode_cf(obj, concat_characters=True, mask_and_scale=True, | ||
decode_times=True, decode_coords=True, drop_variables=None): | ||
decode_times=True, decode_coords=True, drop_variables=None, | ||
only_variables=None): | ||
"""Decode the given Dataset or Datastore according to CF conventions into | ||
a new Dataset. | ||
|
||
|
@@ -903,6 +911,10 @@ def decode_cf(obj, concat_characters=True, mask_and_scale=True, | |
A variable or list of variables to exclude from being parsed from the | ||
dataset.This may be useful to drop variables with problems or | ||
inconsistent values. | ||
only_variables: string or iterable, optional | ||
A variable or list of variables to load from the dataset. This is | ||
useful if you don't need all the variables in the file and don't want | ||
to spend time loading them. Default is to load all variables. | ||
|
||
Returns | ||
------- | ||
|
@@ -925,7 +937,7 @@ def decode_cf(obj, concat_characters=True, mask_and_scale=True, | |
|
||
vars, attrs, coord_names = decode_cf_variables( | ||
vars, attrs, concat_characters, mask_and_scale, decode_times, | ||
decode_coords, drop_variables=drop_variables) | ||
decode_coords, drop_variables=drop_variables, only_variables=only_variables) | ||
ds = Dataset(vars, attrs=attrs) | ||
ds = ds.set_coords(coord_names.union(extra_coords)) | ||
ds._file_obj = file_obj | ||
|
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
The default should be
format=None
.