Skip to content

Don't convert time data to timedelta by default #940

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Closed
wants to merge 2 commits into from
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
22 changes: 14 additions & 8 deletions xarray/backends/api.py
Original file line number Diff line number Diff line change
Expand Up @@ -80,8 +80,9 @@ def check_name(name):


def open_dataset(filename_or_obj, group=None, decode_cf=True,
mask_and_scale=True, decode_times=True,
concat_characters=True, decode_coords=True, engine=None,
mask_and_scale=True, decode_datetimes=True,
decode_timedeltas=False, concat_characters=True,
decode_coords=True, engine=None,
chunks=None, lock=None, drop_variables=None):
"""Load and decode a dataset from a file or file-like object.

Expand All @@ -107,9 +108,12 @@ def open_dataset(filename_or_obj, group=None, decode_cf=True,
`missing_value` attribute contains multiple values a warning will be
issued and all array values matching one of the multiple values will
be replaced by NA.
decode_times : bool, optional
decode_datetimes : bool, optional
If True, decode times encoded in the standard NetCDF datetime format
into datetime objects. Otherwise, leave them encoded as numbers.
decode_timedeltas : bool, optional
If True, decode time data encoded in the standard NetCDF datetime format
into timedelta objects. Otherwise, leave them encoded as numbers.
concat_characters : bool, optional
If True, concatenate along the last dimension of character arrays to
form string arrays. Dimensions will only be concatenated over (and
Expand Down Expand Up @@ -148,15 +152,16 @@ def open_dataset(filename_or_obj, group=None, decode_cf=True,
"""
if not decode_cf:
mask_and_scale = False
decode_times = False
decode_datetimes = False
decode_timedeltas = False
concat_characters = False
decode_coords = False

def maybe_decode_store(store, lock=False):
ds = conventions.decode_cf(
store, mask_and_scale=mask_and_scale, decode_times=decode_times,
concat_characters=concat_characters, decode_coords=decode_coords,
drop_variables=drop_variables)
store, mask_and_scale=mask_and_scale, decode_datetimes=decode_datetimes,
decode_timedeltas=decode_timedeltas, concat_characters=concat_characters,
decode_coords=decode_coords, drop_variables=drop_variables)

if chunks is not None:
try:
Expand All @@ -174,7 +179,8 @@ def maybe_decode_store(store, lock=False):
else:
file_arg = filename_or_obj
token = tokenize(file_arg, group, decode_cf, mask_and_scale,
decode_times, concat_characters, decode_coords,
decode_datetimes, decode_timedeltas,
concat_characters, decode_coords,
engine, chunks, drop_variables)
name_prefix = '%s:%s/' % (filename_or_obj, group or '')
ds2 = ds.chunk(chunks, name_prefix=name_prefix, token=token,
Expand Down
43 changes: 27 additions & 16 deletions xarray/conventions.py
Original file line number Diff line number Diff line change
Expand Up @@ -379,7 +379,7 @@ def __init__(self, array, units, calendar=None):
calendar_msg = ('the default calendar' if calendar is None
else 'calendar %r' % calendar)
msg = ('unable to decode time units %r with %s. Try '
'opening your dataset with decode_times=False.'
'opening your dataset with decode_datestimes=False.'
% (units, calendar_msg))
if not PY3:
msg += ' Full traceback:\n' + traceback.format_exc()
Expand Down Expand Up @@ -718,7 +718,8 @@ def encode_cf_variable(var, needs_copy=True, name=None):


def decode_cf_variable(var, concat_characters=True, mask_and_scale=True,
decode_times=True, decode_endianness=True):
decode_datetimes=True, decode_timedeltas=False,
decode_endianness=True):
"""
Decodes a variable which may hold CF encoded information.

Expand All @@ -737,8 +738,10 @@ def decode_cf_variable(var, concat_characters=True, mask_and_scale=True,
mask_and_scale: bool
Lazily scale (using scale_factor and add_offset) and mask
(using _FillValue).
decode_times : bool
decode_datetimes : bool
Decode cf times ('hours since 2000-01-01') to np.datetime64.
decode_timedeltas : bool
Decode cf time data ('seconds') to np.timedelta64.
decode_endianness : bool
Decode arrays from non-native to native endianness.

Expand Down Expand Up @@ -792,13 +795,13 @@ def decode_cf_variable(var, concat_characters=True, mask_and_scale=True,
data = MaskedAndScaledArray(data, fill_value, scale_factor,
add_offset, dtype)

if decode_times and 'units' in attributes:
if 'since' in attributes['units']:
if any([decode_datetimes, decode_timedeltas]) and 'units' in attributes:
if decode_datetimes and 'since' in attributes['units']:
# datetime
units = pop_to(attributes, encoding, 'units')
calendar = pop_to(attributes, encoding, 'calendar')
data = DecodedCFDatetimeArray(data, units, calendar)
elif attributes['units'] in TIME_UNITS:
elif decode_timedeltas and attributes['units'] in TIME_UNITS:
# timedelta
units = pop_to(attributes, encoding, 'units')
data = DecodedCFTimedeltaArray(data, units)
Expand All @@ -823,8 +826,9 @@ def decode_cf_variable(var, concat_characters=True, mask_and_scale=True,


def decode_cf_variables(variables, attributes, concat_characters=True,
mask_and_scale=True, decode_times=True,
decode_coords=True, drop_variables=None):
mask_and_scale=True, decode_datetimes=True,
decode_timedeltas=False, decode_coords=True,
drop_variables=None):
"""
Decode a several CF encoded variables.

Expand Down Expand Up @@ -860,7 +864,8 @@ def stackable(dim):
stackable(v.dims[-1]))
new_vars[k] = decode_cf_variable(
v, concat_characters=concat, mask_and_scale=mask_and_scale,
decode_times=decode_times)
decode_datetimes=decode_datetimes,
decode_timedeltas=decode_timedeltas)
if decode_coords:
var_attrs = new_vars[k].attrs
if 'coordinates' in var_attrs:
Expand All @@ -879,7 +884,8 @@ def stackable(dim):


def decode_cf(obj, concat_characters=True, mask_and_scale=True,
decode_times=True, decode_coords=True, drop_variables=None):
decode_datetimes=True, decode_timedeltas=False,
decode_coords=True, drop_variables=None):
"""Decode the given Dataset or Datastore according to CF conventions into
a new Dataset.

Expand All @@ -893,9 +899,11 @@ def decode_cf(obj, concat_characters=True, mask_and_scale=True,
mask_and_scale: bool, optional
Lazily scale (using scale_factor and add_offset) and mask
(using _FillValue).
decode_times : bool, optional
decode_datetimes : bool, optional
Decode cf times (e.g., integers since 'hours since 2000-01-01') to
np.datetime64.
decode_timedeltas : bool, optional
Decode cf time data (e.g., 'seconds') to np.timedelta64.
decode_coords : bool, optional
Use the 'coordinates' attribute on variable (or the dataset itself) to
identify coordinates.
Expand Down Expand Up @@ -924,8 +932,8 @@ def decode_cf(obj, concat_characters=True, mask_and_scale=True,
raise TypeError('can only decode Dataset or DataStore objects')

vars, attrs, coord_names = decode_cf_variables(
vars, attrs, concat_characters, mask_and_scale, decode_times,
decode_coords, drop_variables=drop_variables)
vars, attrs, concat_characters, mask_and_scale, decode_datetimes,
decode_timedeltas, decode_coords, drop_variables=drop_variables)
ds = Dataset(vars, attrs=attrs)
ds = ds.set_coords(coord_names.union(extra_coords))
ds._file_obj = file_obj
Expand All @@ -934,7 +942,7 @@ def decode_cf(obj, concat_characters=True, mask_and_scale=True,

def cf_decoder(variables, attributes,
concat_characters=True, mask_and_scale=True,
decode_times=True):
decode_datetimes=True, decode_timedeltas=False):
"""
Decode a set of CF encoded variables and attributes.

Expand All @@ -952,8 +960,10 @@ def cf_decoder(variables, attributes,
mask_and_scale: bool
Lazily scale (using scale_factor and add_offset) and mask
(using _FillValue).
decode_times : bool
decode_datetimes : bool
Decode cf times ('hours since 2000-01-01') to np.datetime64.
decode_timedeltas : bool
Decode cf time data ('seconds') to np.timedelta64.

Returns
-------
Expand All @@ -963,7 +973,8 @@ def cf_decoder(variables, attributes,
A dictionary mapping from attribute name to values.
"""
variables, attributes, _ = decode_cf_variables(
variables, attributes, concat_characters, mask_and_scale, decode_times)
variables, attributes, concat_characters, mask_and_scale,
decode_datetimes, decode_timedeltas)
return variables, attributes


Expand Down
3 changes: 2 additions & 1 deletion xarray/test/test_backends.py
Original file line number Diff line number Diff line change
Expand Up @@ -201,7 +201,8 @@ def test_roundtrip_datetime_data(self):
def test_roundtrip_timedelta_data(self):
time_deltas = pd.to_timedelta(['1h', '2h', 'NaT'])
expected = Dataset({'td': ('td', time_deltas), 'td0': time_deltas[0]})
with self.roundtrip(expected) as actual:
with self.roundtrip(
expected, open_kwargs={'decode_timedeltas': True}) as actual:
self.assertDatasetIdentical(expected, actual)

def test_roundtrip_float64_data(self):
Expand Down
42 changes: 42 additions & 0 deletions xarray/test/test_conventions.py
Original file line number Diff line number Diff line change
Expand Up @@ -228,6 +228,13 @@ def test_decode_cf_with_conflicting_fill_missing_value(self):
self.assertRaisesRegexp(ValueError, "_FillValue and missing_value",
lambda: conventions.decode_cf_variable(var))

def test_decoded_timedelta_array(self):
actual = conventions.DecodedCFTimedeltaArray(
np.array([0, 1, 2]), 'seconds')
expected = pd.to_timedelta(['0s', '1s', '2s']).values
self.assertEqual(actual.dtype, np.dtype('timedelta64[ns]'))
self.assertArrayEqual(actual, expected)

@requires_netCDF4
def test_decode_cf_datetime_non_iso_strings(self):
# datetime strings that are _almost_ ISO compliant but not quite,
Expand Down Expand Up @@ -537,6 +544,41 @@ def test_decode_cf_with_drop_variables(self):
self.assertDatasetIdentical(expected, actual)
self.assertDatasetIdentical(expected, actual2)

def test_datetimes_true(self):
original = Dataset({
'time': ('time', [0, 1, 2], {'units': 'days since 2000-01-01'}),
'period': ('time', [0, 1, 2], {'coordinates': 'time', 'units': 'seconds'}),
})
expected = Dataset({
'time': pd.date_range('2000-01-01', periods=3),
'period': ('time', [0, 1, 2], {'units': 'seconds'}),
})
actual = conventions.decode_cf(original, decode_datetimes=True)
self.assertDatasetIdentical(expected, actual)

def test_timedeltas_true(self):
original = Dataset({
'time': ('time', [0, 1, 2], {'units': 'days since 2000-01-01'}),
'period': ('time', [0, 1, 2], {'coordinates': 'time', 'units': 'seconds'}),
})
expected = Dataset({
'time': pd.date_range('2000-01-01', periods=3),
'period': ('time', pd.to_timedelta(['0s', '1s', '2s'])),
})
actual = conventions.decode_cf(original, decode_timedeltas=True)
self.assertDatasetIdentical(expected, actual)

def test_datetimes_false(self):
original = Dataset({
'time': ('time', [0, 1, 2], {'units': 'days since 2000-01-01'}),
'period': ('time', [0, 1, 2], {'coordinates': 'time', 'units': 'seconds'}),
})
expected = Dataset({
'time': [0, 1, 2],
'period': ('time', [0, 1, 2], {'units': 'seconds'}),
})
actual = conventions.decode_cf(original, decode_datetimes=False)
self.assertArrayEqual(expected['time'], actual['time'])

class CFEncodedInMemoryStore(WritableCFDataStore, InMemoryDataStore):
pass
Expand Down