From fb5b288fc0c27879d4625a114e32f7b3395b3952 Mon Sep 17 00:00:00 2001 From: ocefpaf Date: Wed, 3 Aug 2016 23:21:30 -0300 Subject: [PATCH 1/2] Don't convert time data to timedelta by default Closes #843 --- xarray/backends/api.py | 22 +++++++++++------- xarray/conventions.py | 43 ++++++++++++++++++++++-------------- xarray/test/test_backends.py | 3 ++- 3 files changed, 43 insertions(+), 25 deletions(-) diff --git a/xarray/backends/api.py b/xarray/backends/api.py index 055310245f3..4cb119f6d46 100644 --- a/xarray/backends/api.py +++ b/xarray/backends/api.py @@ -80,8 +80,9 @@ def check_name(name): def open_dataset(filename_or_obj, group=None, decode_cf=True, - mask_and_scale=True, decode_times=True, - concat_characters=True, decode_coords=True, engine=None, + mask_and_scale=True, decode_datetimes=True, + decode_timedeltas=False, concat_characters=True, + decode_coords=True, engine=None, chunks=None, lock=None, drop_variables=None): """Load and decode a dataset from a file or file-like object. @@ -107,9 +108,12 @@ def open_dataset(filename_or_obj, group=None, decode_cf=True, `missing_value` attribute contains multiple values a warning will be issued and all array values matching one of the multiple values will be replaced by NA. - decode_times : bool, optional + decode_datetimes : bool, optional If True, decode times encoded in the standard NetCDF datetime format into datetime objects. Otherwise, leave them encoded as numbers. + decode_timedeltas : bool, optional + If True, decode time data encoded in the standard NetCDF datetime format + into timedelta objects. Otherwise, leave them encoded as numbers. concat_characters : bool, optional If True, concatenate along the last dimension of character arrays to form string arrays. Dimensions will only be concatenated over (and @@ -148,15 +152,16 @@ def open_dataset(filename_or_obj, group=None, decode_cf=True, """ if not decode_cf: mask_and_scale = False - decode_times = False + decode_datetimes = False + decode_timedeltas = False concat_characters = False decode_coords = False def maybe_decode_store(store, lock=False): ds = conventions.decode_cf( - store, mask_and_scale=mask_and_scale, decode_times=decode_times, - concat_characters=concat_characters, decode_coords=decode_coords, - drop_variables=drop_variables) + store, mask_and_scale=mask_and_scale, decode_datetimes=decode_datetimes, + decode_timedeltas=decode_timedeltas, concat_characters=concat_characters, + decode_coords=decode_coords, drop_variables=drop_variables) if chunks is not None: try: @@ -174,7 +179,8 @@ def maybe_decode_store(store, lock=False): else: file_arg = filename_or_obj token = tokenize(file_arg, group, decode_cf, mask_and_scale, - decode_times, concat_characters, decode_coords, + decode_datetimes, decode_timedeltas, + concat_characters, decode_coords, engine, chunks, drop_variables) name_prefix = '%s:%s/' % (filename_or_obj, group or '') ds2 = ds.chunk(chunks, name_prefix=name_prefix, token=token, diff --git a/xarray/conventions.py b/xarray/conventions.py index 6ca947c9f32..d4882cee063 100644 --- a/xarray/conventions.py +++ b/xarray/conventions.py @@ -379,7 +379,7 @@ def __init__(self, array, units, calendar=None): calendar_msg = ('the default calendar' if calendar is None else 'calendar %r' % calendar) msg = ('unable to decode time units %r with %s. Try ' - 'opening your dataset with decode_times=False.' + 'opening your dataset with decode_datestimes=False.' % (units, calendar_msg)) if not PY3: msg += ' Full traceback:\n' + traceback.format_exc() @@ -718,7 +718,8 @@ def encode_cf_variable(var, needs_copy=True, name=None): def decode_cf_variable(var, concat_characters=True, mask_and_scale=True, - decode_times=True, decode_endianness=True): + decode_datetimes=True, decode_timedeltas=False, + decode_endianness=True): """ Decodes a variable which may hold CF encoded information. @@ -737,8 +738,10 @@ def decode_cf_variable(var, concat_characters=True, mask_and_scale=True, mask_and_scale: bool Lazily scale (using scale_factor and add_offset) and mask (using _FillValue). - decode_times : bool + decode_datetimes : bool Decode cf times ('hours since 2000-01-01') to np.datetime64. + decode_timedeltas : bool + Decode cf time data ('seconds') to np.timedelta64. decode_endianness : bool Decode arrays from non-native to native endianness. @@ -792,13 +795,13 @@ def decode_cf_variable(var, concat_characters=True, mask_and_scale=True, data = MaskedAndScaledArray(data, fill_value, scale_factor, add_offset, dtype) - if decode_times and 'units' in attributes: - if 'since' in attributes['units']: + if any([decode_datetimes, decode_timedeltas]) and 'units' in attributes: + if decode_datetimes and 'since' in attributes['units']: # datetime units = pop_to(attributes, encoding, 'units') calendar = pop_to(attributes, encoding, 'calendar') data = DecodedCFDatetimeArray(data, units, calendar) - elif attributes['units'] in TIME_UNITS: + elif decode_timedeltas and attributes['units'] in TIME_UNITS: # timedelta units = pop_to(attributes, encoding, 'units') data = DecodedCFTimedeltaArray(data, units) @@ -823,8 +826,9 @@ def decode_cf_variable(var, concat_characters=True, mask_and_scale=True, def decode_cf_variables(variables, attributes, concat_characters=True, - mask_and_scale=True, decode_times=True, - decode_coords=True, drop_variables=None): + mask_and_scale=True, decode_datetimes=True, + decode_timedeltas=False, decode_coords=True, + drop_variables=None): """ Decode a several CF encoded variables. @@ -860,7 +864,8 @@ def stackable(dim): stackable(v.dims[-1])) new_vars[k] = decode_cf_variable( v, concat_characters=concat, mask_and_scale=mask_and_scale, - decode_times=decode_times) + decode_datetimes=decode_datetimes, + decode_timedeltas=decode_timedeltas) if decode_coords: var_attrs = new_vars[k].attrs if 'coordinates' in var_attrs: @@ -879,7 +884,8 @@ def stackable(dim): def decode_cf(obj, concat_characters=True, mask_and_scale=True, - decode_times=True, decode_coords=True, drop_variables=None): + decode_datetimes=True, decode_timedeltas=False, + decode_coords=True, drop_variables=None): """Decode the given Dataset or Datastore according to CF conventions into a new Dataset. @@ -893,9 +899,11 @@ def decode_cf(obj, concat_characters=True, mask_and_scale=True, mask_and_scale: bool, optional Lazily scale (using scale_factor and add_offset) and mask (using _FillValue). - decode_times : bool, optional + decode_datetimes : bool, optional Decode cf times (e.g., integers since 'hours since 2000-01-01') to np.datetime64. + decode_timedeltas : bool, optional + Decode cf time data (e.g., 'seconds') to np.timedelta64. decode_coords : bool, optional Use the 'coordinates' attribute on variable (or the dataset itself) to identify coordinates. @@ -924,8 +932,8 @@ def decode_cf(obj, concat_characters=True, mask_and_scale=True, raise TypeError('can only decode Dataset or DataStore objects') vars, attrs, coord_names = decode_cf_variables( - vars, attrs, concat_characters, mask_and_scale, decode_times, - decode_coords, drop_variables=drop_variables) + vars, attrs, concat_characters, mask_and_scale, decode_datetimes, + decode_timedeltas, decode_coords, drop_variables=drop_variables) ds = Dataset(vars, attrs=attrs) ds = ds.set_coords(coord_names.union(extra_coords)) ds._file_obj = file_obj @@ -934,7 +942,7 @@ def decode_cf(obj, concat_characters=True, mask_and_scale=True, def cf_decoder(variables, attributes, concat_characters=True, mask_and_scale=True, - decode_times=True): + decode_datetimes=True, decode_timedeltas=False): """ Decode a set of CF encoded variables and attributes. @@ -952,8 +960,10 @@ def cf_decoder(variables, attributes, mask_and_scale: bool Lazily scale (using scale_factor and add_offset) and mask (using _FillValue). - decode_times : bool + decode_datetimes : bool Decode cf times ('hours since 2000-01-01') to np.datetime64. + decode_timedeltas : bool + Decode cf time data ('seconds') to np.timedelta64. Returns ------- @@ -963,7 +973,8 @@ def cf_decoder(variables, attributes, A dictionary mapping from attribute name to values. """ variables, attributes, _ = decode_cf_variables( - variables, attributes, concat_characters, mask_and_scale, decode_times) + variables, attributes, concat_characters, mask_and_scale, + decode_datetimes, decode_timedeltas) return variables, attributes diff --git a/xarray/test/test_backends.py b/xarray/test/test_backends.py index 257c880284e..613b615fa9a 100644 --- a/xarray/test/test_backends.py +++ b/xarray/test/test_backends.py @@ -201,7 +201,8 @@ def test_roundtrip_datetime_data(self): def test_roundtrip_timedelta_data(self): time_deltas = pd.to_timedelta(['1h', '2h', 'NaT']) expected = Dataset({'td': ('td', time_deltas), 'td0': time_deltas[0]}) - with self.roundtrip(expected) as actual: + with self.roundtrip( + expected, open_kwargs={'decode_timedeltas': True}) as actual: self.assertDatasetIdentical(expected, actual) def test_roundtrip_float64_data(self): From 98ed32ec9d81036763ff498c8f1ab643150115b9 Mon Sep 17 00:00:00 2001 From: ocefpaf Date: Thu, 4 Aug 2016 10:18:00 -0300 Subject: [PATCH 2/2] Add a few tests --- xarray/test/test_conventions.py | 42 +++++++++++++++++++++++++++++++++ 1 file changed, 42 insertions(+) diff --git a/xarray/test/test_conventions.py b/xarray/test/test_conventions.py index 8298feaa5e8..6de17840d7e 100644 --- a/xarray/test/test_conventions.py +++ b/xarray/test/test_conventions.py @@ -228,6 +228,13 @@ def test_decode_cf_with_conflicting_fill_missing_value(self): self.assertRaisesRegexp(ValueError, "_FillValue and missing_value", lambda: conventions.decode_cf_variable(var)) + def test_decoded_timedelta_array(self): + actual = conventions.DecodedCFTimedeltaArray( + np.array([0, 1, 2]), 'seconds') + expected = pd.to_timedelta(['0s', '1s', '2s']).values + self.assertEqual(actual.dtype, np.dtype('timedelta64[ns]')) + self.assertArrayEqual(actual, expected) + @requires_netCDF4 def test_decode_cf_datetime_non_iso_strings(self): # datetime strings that are _almost_ ISO compliant but not quite, @@ -537,6 +544,41 @@ def test_decode_cf_with_drop_variables(self): self.assertDatasetIdentical(expected, actual) self.assertDatasetIdentical(expected, actual2) + def test_datetimes_true(self): + original = Dataset({ + 'time': ('time', [0, 1, 2], {'units': 'days since 2000-01-01'}), + 'period': ('time', [0, 1, 2], {'coordinates': 'time', 'units': 'seconds'}), + }) + expected = Dataset({ + 'time': pd.date_range('2000-01-01', periods=3), + 'period': ('time', [0, 1, 2], {'units': 'seconds'}), + }) + actual = conventions.decode_cf(original, decode_datetimes=True) + self.assertDatasetIdentical(expected, actual) + + def test_timedeltas_true(self): + original = Dataset({ + 'time': ('time', [0, 1, 2], {'units': 'days since 2000-01-01'}), + 'period': ('time', [0, 1, 2], {'coordinates': 'time', 'units': 'seconds'}), + }) + expected = Dataset({ + 'time': pd.date_range('2000-01-01', periods=3), + 'period': ('time', pd.to_timedelta(['0s', '1s', '2s'])), + }) + actual = conventions.decode_cf(original, decode_timedeltas=True) + self.assertDatasetIdentical(expected, actual) + + def test_datetimes_false(self): + original = Dataset({ + 'time': ('time', [0, 1, 2], {'units': 'days since 2000-01-01'}), + 'period': ('time', [0, 1, 2], {'coordinates': 'time', 'units': 'seconds'}), + }) + expected = Dataset({ + 'time': [0, 1, 2], + 'period': ('time', [0, 1, 2], {'units': 'seconds'}), + }) + actual = conventions.decode_cf(original, decode_datetimes=False) + self.assertArrayEqual(expected['time'], actual['time']) class CFEncodedInMemoryStore(WritableCFDataStore, InMemoryDataStore): pass