From f7842b3a29220d121d7bf2772e540bf2348128cb Mon Sep 17 00:00:00 2001 From: Neil Parley Date: Fri, 1 Apr 2016 10:31:17 +0100 Subject: [PATCH 01/13] Added a first go at a converter from xarray.DataArrays objects to Iris.cube.Cube objects. Uses the same template as the cdms2 conversion. --- .travis.yml | 2 +- ... => requirements-py27-cdat+iris+pynio.yml} | 2 + xarray/convert.py | 111 ++++++++++++++++-- xarray/core/dataarray.py | 13 ++ xarray/test/test_dataarray.py | 44 +++++++ 5 files changed, 159 insertions(+), 13 deletions(-) rename ci/{requirements-py27-cdat+pynio.yml => requirements-py27-cdat+iris+pynio.yml} (89%) diff --git a/.travis.yml b/.travis.yml index 3af687b3f79..57da8637ed9 100644 --- a/.travis.yml +++ b/.travis.yml @@ -12,7 +12,7 @@ matrix: - python: 2.7 env: CONDA_ENV=py27-min - python: 2.7 - env: CONDA_ENV=py27-cdat+pynio + env: CONDA_ENV=py27-cdat+iris+pynio - python: 3.4 env: CONDA_ENV=py34 - python: 3.5 diff --git a/ci/requirements-py27-cdat+pynio.yml b/ci/requirements-py27-cdat+iris+pynio.yml similarity index 89% rename from ci/requirements-py27-cdat+pynio.yml rename to ci/requirements-py27-cdat+iris+pynio.yml index feedb684cc8..75c5c4e6d7e 100644 --- a/ci/requirements-py27-cdat+pynio.yml +++ b/ci/requirements-py27-cdat+iris+pynio.yml @@ -2,6 +2,7 @@ name: test_env channels: - ajdawson # cdat - dbrown # pynio + - scitools # iris dependencies: - python=2.7 - cdat-lite @@ -11,6 +12,7 @@ dependencies: - pandas>=0.15.0 - pynio - scipy + - iris - pip: - coveralls - pytest-cov diff --git a/xarray/convert.py b/xarray/convert.py index 5c4624f2d01..b7bef8ce6b8 100644 --- a/xarray/convert.py +++ b/xarray/convert.py @@ -3,24 +3,36 @@ import numpy as np from .core.dataarray import DataArray +from .core.pycompat import OrderedDict from .conventions import ( maybe_encode_timedelta, maybe_encode_datetime, decode_cf) -ignored_attrs = set(['name', 'tileIndex']) +cdms2_ignored_attrs = {'name', 'tileIndex'} +iris_forbidden_keys = {'standard_name', 'long_name', 'units', 'bounds', 'axis', + 'calendar', 'leap_month', 'leap_year', 'month_lengths', + 'coordinates', 'grid_mapping', 'climatology', + 'cell_methods', 'formula_terms', 'compress', + 'missing_value', 'add_offset', 'scale_factor', + 'valid_max', 'valid_min', 'valid_range', '_FillValue'} + + +def encode(var): + return maybe_encode_timedelta(maybe_encode_datetime(var.variable)) + + +def filter_attrs(_attrs, ignored_attrs): + return dict((k, v) for k, v in _attrs.items() if k not in ignored_attrs) def from_cdms2(variable): """Convert a cdms2 variable into an DataArray """ - def get_cdms2_attrs(var): - return dict((k, v) for k, v in var.attributes.items() - if k not in ignored_attrs) - values = np.asarray(variable) name = variable.id - coords = [(v.id, np.asarray(v), get_cdms2_attrs(v)) + coords = [(v.id, np.asarray(v), + filter_attrs(v.attributes, cdms2_ignored_attrs)) for v in variable.getAxisList()] - attrs = get_cdms2_attrs(variable) + attrs = filter_attrs(variable.attributes, cdms2_ignored_attrs) dataarray = DataArray(values, coords=coords, name=name, attrs=attrs) return decode_cf(dataarray.to_dataset())[dataarray.name] @@ -31,12 +43,9 @@ def to_cdms2(dataarray): # we don't want cdms2 to be a hard dependency import cdms2 - def encode(var): - return maybe_encode_timedelta(maybe_encode_datetime(var.variable)) - - def set_cdms2_attrs(var, attrs): + def set_cdms2_attrs(_var, attrs): for k, v in attrs.items(): - setattr(var, k, v) + setattr(_var, k, v) axes = [] for dim in dataarray.dims: @@ -49,3 +58,81 @@ def set_cdms2_attrs(var, attrs): cdms2_var = cdms2.createVariable(var.values, axes=axes, id=dataarray.name) set_cdms2_attrs(cdms2_var, var.attrs) return cdms2_var + + +# TODO: Add converting bounds from xarray to Iris and back +# TODO: Cell methods are not converted between Iris and xarray +def to_iris(dataarray): + """Convert a DataArray into a Iris Cube + """ + # Iris not a hard dependency + import iris + # iris.unit is deprecated in Iris v1.9 + import cf_units + + def check_attrs(attrs, keys): + return dict((k, v) for k, v in attrs.items() if k in keys) + + def get_args(attrs): + _args = {'attributes': filter_attrs(attrs, iris_forbidden_keys)} + _args.update(check_attrs(attrs, ('standard_name', 'long_name',))) + _unit_args = check_attrs(coord.attrs, ('calendar',)) + if attrs.has_key('units'): + _args['units'] = cf_units.Unit(attrs['units'], **_unit_args) + return _args + + dim_coords = [] + aux_coords = [] + + for coord_name in dataarray.coords: + coord = encode(dataarray.coords[coord_name]) + coord_args = get_args(coord.attrs) + coord_args['var_name'] = coord_name + iris_coord = iris.coords.DimCoord(coord.values, **coord_args) + axis = None + if coord.dims: + axis = dataarray.get_axis_num(coord.dims) + if coord_name in dataarray.dims: + dim_coords.append((iris_coord, axis)) + else: + aux_coords.append((iris_coord, axis)) + + args = get_args(dataarray.attrs) + args['var_name'] = dataarray.name + args['dim_coords_and_dims'] = dim_coords + args['aux_coords_and_dims'] = aux_coords + + cube = iris.cube.Cube(dataarray.to_masked_array(), **args) + return cube + + +def from_iris(cube): + """Convert a Iris cube into an DataArray + """ + def get_attr(_obj): + attrs = {'standard_name': _obj.standard_name, + 'long_name': _obj.long_name} + if _obj.units.calendar: + attrs['calendar'] = _obj.units.calendar + if _obj.units.origin != '1': + attrs['units'] = _obj.units.origin + attrs.update(_obj.attributes) + return dict((k, v) for k, v in attrs.items() if v is not None) + + name = cube.var_name + dims = [dim.var_name for dim in cube.dim_coords] + coords = OrderedDict() + + for coord in cube.coords(): + coord_attrs = get_attr(coord) + coord_dims = [cube.coords()[i].var_name for i in cube.coord_dims(coord)] + if coord_dims: + coords[coord.var_name] = (coord_dims, coord.points, coord_attrs) + else: + coords[coord.var_name] = ((), + np.asscalar(coord.points), coord_attrs) + + array_attrs = get_attr(cube) + dataarray = DataArray(cube.data, coords=coords, name=name, + attrs=array_attrs, dims=dims) + return decode_cf(dataarray.to_dataset())[dataarray.name] \ No newline at end of file diff --git a/xarray/core/dataarray.py b/xarray/core/dataarray.py index cfeda17eb91..9d8c4601a67 100644 --- a/xarray/core/dataarray.py +++ b/xarray/core/dataarray.py @@ -1099,6 +1099,19 @@ def from_cdms2(cls, variable): from ..convert import from_cdms2 return from_cdms2(variable) + def to_iris(self): + """Convert this array into a iris.cube.Cube + """ + from ..convert import to_iris + return to_iris(self) + + @classmethod + def from_iris(cls, cube): + """Convert a iris.cube.Cube into an xarray.DataArray + """ + from ..convert import from_iris + return from_iris(cube) + def _all_compat(self, other, compat_str): """Helper function for equals and identical""" def compat(x, y): diff --git a/xarray/test/test_dataarray.py b/xarray/test/test_dataarray.py index 7c5081c92ac..3d0531c61d3 100644 --- a/xarray/test/test_dataarray.py +++ b/xarray/test/test_dataarray.py @@ -1639,6 +1639,50 @@ def test_to_and_from_cdms2(self): roundtripped = DataArray.from_cdms2(actual) self.assertDataArrayIdentical(original, roundtripped) + def test_to_and_from_iris(self): + try: + import iris + except ImportError: + raise unittest.SkipTest('iris not installed') + + coord_dict = OrderedDict() + coord_dict['distance'] = ('distance', [-2, 2], {'units': 'meters'}) + coord_dict['time'] = ('time', pd.date_range('2000-01-01', periods=3)) + coord_dict['height'] = 10 + coord_dict['distance2'] = ('distance', [0, 1]) + + original = DataArray(np.arange(6).reshape(2, 3), coord_dict, + name='Temperature', attrs={'baz': 123, + 'units': 'Kelvin', + 'standard_name': + 'fire_temperature', + 'long_name': + 'Fire Temperature'}, + dims=('distance', 'time')) + + expected_coords = [Coordinate('distance', [-2, 2]), + Coordinate('time', [0, 1, 2]), + Coordinate('height', [10]), + Coordinate('distance2', [0, 1])] + + actual = original.to_iris() + self.assertArrayEqual(actual.data, original.data) + self.assertEqual(actual.var_name, original.name) + self.assertItemsEqual([d.var_name for d in actual.dim_coords], + original.dims) + + for coord, expected_coord in zip((actual.coords()), expected_coords): + self.assertEqual(coord.var_name, expected_coord.name) + self.assertArrayEqual(coord.points, expected_coord.values) + self.assertEqual(actual.coord_dims(coord), + original.get_axis_num + (original.coords[coord.var_name].dims)) + self.assertEqual(actual.attributes['baz'], original.attrs['baz']) + self.assertEqual(actual.standard_name, original.attrs['standard_name']) + + roundtripped = DataArray.from_iris(actual) + self.assertDataArrayIdentical(original, roundtripped) + def test_to_dataset_whole(self): unnamed = DataArray([1, 2], dims='x') with self.assertRaisesRegexp(ValueError, 'unable to convert unnamed'): From e0498c525ba8da3eb66935b6cdff1a20919d22c3 Mon Sep 17 00:00:00 2001 From: Neil Parley Date: Fri, 1 Apr 2016 19:53:30 +0100 Subject: [PATCH 02/13] Update tests to use original.coords and add extra tests for coord attributes --- xarray/test/test_dataarray.py | 30 +++++++++++++++++------------- 1 file changed, 17 insertions(+), 13 deletions(-) diff --git a/xarray/test/test_dataarray.py b/xarray/test/test_dataarray.py index 3d0531c61d3..e2e94285d04 100644 --- a/xarray/test/test_dataarray.py +++ b/xarray/test/test_dataarray.py @@ -8,6 +8,7 @@ Coordinate, Variable) from xarray.core.pycompat import iteritems, OrderedDict from xarray.core.common import _full_like +from xarray.conventions import maybe_encode_datetime from . import (TestCase, ReturnItem, source_ndarray, unittest, requires_dask, requires_bottleneck) @@ -1621,15 +1622,15 @@ def test_to_and_from_cdms2(self): [('distance', [-2, 2], {'units': 'meters'}), ('time', pd.date_range('2000-01-01', periods=3))], name='foo', attrs={'baz': 123}) - expected_coords = [Coordinate('distance', [-2, 2]), - Coordinate('time', [0, 1, 2])] + actual = original.to_cdms2() self.assertArrayEqual(actual, original) self.assertEqual(actual.id, original.name) self.assertItemsEqual(actual.getAxisIds(), original.dims) - for axis, coord in zip(actual.getAxisList(), expected_coords): + for axis, coord_key in zip(actual.getAxisList(), original.coords): + coord = original.coords[coord_key] self.assertEqual(axis.id, coord.name) - self.assertArrayEqual(axis, coord.values) + self.assertArrayEqual(axis, maybe_encode_datetime(coord).values) self.assertEqual(actual.baz, original.attrs['baz']) component_times = actual.getAxis(1).asComponentTime() @@ -1642,6 +1643,7 @@ def test_to_and_from_cdms2(self): def test_to_and_from_iris(self): try: import iris + import cf_units except ImportError: raise unittest.SkipTest('iris not installed') @@ -1649,7 +1651,7 @@ def test_to_and_from_iris(self): coord_dict['distance'] = ('distance', [-2, 2], {'units': 'meters'}) coord_dict['time'] = ('time', pd.date_range('2000-01-01', periods=3)) coord_dict['height'] = 10 - coord_dict['distance2'] = ('distance', [0, 1]) + coord_dict['distance2'] = ('distance', [0, 1], {'foo': 'bar'}) original = DataArray(np.arange(6).reshape(2, 3), coord_dict, name='Temperature', attrs={'baz': 123, @@ -1660,23 +1662,25 @@ def test_to_and_from_iris(self): 'Fire Temperature'}, dims=('distance', 'time')) - expected_coords = [Coordinate('distance', [-2, 2]), - Coordinate('time', [0, 1, 2]), - Coordinate('height', [10]), - Coordinate('distance2', [0, 1])] - actual = original.to_iris() self.assertArrayEqual(actual.data, original.data) self.assertEqual(actual.var_name, original.name) self.assertItemsEqual([d.var_name for d in actual.dim_coords], original.dims) - for coord, expected_coord in zip((actual.coords()), expected_coords): - self.assertEqual(coord.var_name, expected_coord.name) - self.assertArrayEqual(coord.points, expected_coord.values) + for coord, orginal_key in zip((actual.coords()), original.coords): + original_coord = original.coords[orginal_key] + self.assertEqual(coord.var_name, original_coord.name) + self.assertArrayEqual(coord.points, + maybe_encode_datetime(original_coord).values) self.assertEqual(actual.coord_dims(coord), original.get_axis_num (original.coords[coord.var_name].dims)) + + self.assertEqual(actual.coord('distance2').attributes['foo'], + original.coords['distance2'].attrs['foo']) + self.assertEqual(actual.coord('distance').units, + cf_units.Unit(original.coords['distance'].units)) self.assertEqual(actual.attributes['baz'], original.attrs['baz']) self.assertEqual(actual.standard_name, original.attrs['standard_name']) From c26184507a25c1bdb93e423f1db4f7d61da69fa3 Mon Sep 17 00:00:00 2001 From: Neil Parley Date: Sat, 2 Apr 2016 18:40:20 +0100 Subject: [PATCH 03/13] Remove set literals just in case. Replace has_key with in. Use AuxCoord and DimCoord correctly so 2d coords will work. Use dims variable to convert dimension numbers into names. Add 2d coord to test DataArray. --- xarray/convert.py | 24 +++++++++++++----------- xarray/test/test_dataarray.py | 1 + 2 files changed, 14 insertions(+), 11 deletions(-) diff --git a/xarray/convert.py b/xarray/convert.py index b7bef8ce6b8..7cb7ca56416 100644 --- a/xarray/convert.py +++ b/xarray/convert.py @@ -7,13 +7,14 @@ from .conventions import ( maybe_encode_timedelta, maybe_encode_datetime, decode_cf) -cdms2_ignored_attrs = {'name', 'tileIndex'} -iris_forbidden_keys = {'standard_name', 'long_name', 'units', 'bounds', 'axis', - 'calendar', 'leap_month', 'leap_year', 'month_lengths', - 'coordinates', 'grid_mapping', 'climatology', - 'cell_methods', 'formula_terms', 'compress', - 'missing_value', 'add_offset', 'scale_factor', - 'valid_max', 'valid_min', 'valid_range', '_FillValue'} +cdms2_ignored_attrs = set(['name', 'tileIndex']) +iris_forbidden_keys = set( + ['standard_name', 'long_name', 'units', 'bounds', 'axis', + 'calendar', 'leap_month', 'leap_year', 'month_lengths', + 'coordinates', 'grid_mapping', 'climatology', + 'cell_methods', 'formula_terms', 'compress', + 'missing_value', 'add_offset', 'scale_factor', + 'valid_max', 'valid_min', 'valid_range', '_FillValue']) def encode(var): @@ -77,7 +78,7 @@ def get_args(attrs): _args = {'attributes': filter_attrs(attrs, iris_forbidden_keys)} _args.update(check_attrs(attrs, ('standard_name', 'long_name',))) _unit_args = check_attrs(coord.attrs, ('calendar',)) - if attrs.has_key('units'): + if 'units' in attrs: _args['units'] = cf_units.Unit(attrs['units'], **_unit_args) return _args @@ -88,13 +89,14 @@ def get_args(attrs): coord = encode(dataarray.coords[coord_name]) coord_args = get_args(coord.attrs) coord_args['var_name'] = coord_name - iris_coord = iris.coords.DimCoord(coord.values, **coord_args) axis = None if coord.dims: axis = dataarray.get_axis_num(coord.dims) if coord_name in dataarray.dims: + iris_coord = iris.coords.DimCoord(coord.values, **coord_args) dim_coords.append((iris_coord, axis)) else: + iris_coord = iris.coords.AuxCoord(coord.values, **coord_args) aux_coords.append((iris_coord, axis)) args = get_args(dataarray.attrs) @@ -125,7 +127,7 @@ def get_attr(_obj): for coord in cube.coords(): coord_attrs = get_attr(coord) - coord_dims = [cube.coords()[i].var_name for i in cube.coord_dims(coord)] + coord_dims = [dims[i] for i in cube.coord_dims(coord)] if coord_dims: coords[coord.var_name] = (coord_dims, coord.points, coord_attrs) else: @@ -135,4 +137,4 @@ def get_attr(_obj): array_attrs = get_attr(cube) dataarray = DataArray(cube.data, coords=coords, name=name, attrs=array_attrs, dims=dims) - return decode_cf(dataarray.to_dataset())[dataarray.name] \ No newline at end of file + return decode_cf(dataarray.to_dataset())[dataarray.name] diff --git a/xarray/test/test_dataarray.py b/xarray/test/test_dataarray.py index e2e94285d04..1c0e8b08933 100644 --- a/xarray/test/test_dataarray.py +++ b/xarray/test/test_dataarray.py @@ -1652,6 +1652,7 @@ def test_to_and_from_iris(self): coord_dict['time'] = ('time', pd.date_range('2000-01-01', periods=3)) coord_dict['height'] = 10 coord_dict['distance2'] = ('distance', [0, 1], {'foo': 'bar'}) + coord_dict['time2'] = (('distance', 'time'), [[0, 1, 2], [2, 3, 4]]) original = DataArray(np.arange(6).reshape(2, 3), coord_dict, name='Temperature', attrs={'baz': 123, From edae053a04fa4d1642dc7e210e9eb6595d5ec1dc Mon Sep 17 00:00:00 2001 From: Neil Parley Date: Mon, 4 Apr 2016 10:10:46 +0100 Subject: [PATCH 04/13] Create dimensions if the Iris cube does not have any --- xarray/convert.py | 2 ++ 1 file changed, 2 insertions(+) diff --git a/xarray/convert.py b/xarray/convert.py index 7cb7ca56416..2cc46ed2210 100644 --- a/xarray/convert.py +++ b/xarray/convert.py @@ -123,6 +123,8 @@ def get_attr(_obj): name = cube.var_name dims = [dim.var_name for dim in cube.dim_coords] + if not dims: + dims = ["dim{}".format(i) for i in range(cube.data.ndim)] coords = OrderedDict() for coord in cube.coords(): From cd92bca6e228c44e14d6e0b090240f0c5eef96de Mon Sep 17 00:00:00 2001 From: Neil Parley Date: Fri, 8 Apr 2016 15:41:30 +0100 Subject: [PATCH 05/13] Add code to convert cell_methods --- xarray/convert.py | 63 ++++++++++++++++++++++++++++++++++++++++++++++- 1 file changed, 62 insertions(+), 1 deletion(-) diff --git a/xarray/convert.py b/xarray/convert.py index 2cc46ed2210..a4963a114b3 100644 --- a/xarray/convert.py +++ b/xarray/convert.py @@ -15,6 +15,9 @@ 'cell_methods', 'formula_terms', 'compress', 'missing_value', 'add_offset', 'scale_factor', 'valid_max', 'valid_min', 'valid_range', '_FillValue']) +cell_methods_strings = set(['point', 'sum', 'maximum', 'median', 'mid_range', + 'minimum', 'mean', 'mode', 'standard_deviation', + 'variance']) def encode(var): @@ -62,7 +65,6 @@ def set_cdms2_attrs(_var, attrs): # TODO: Add converting bounds from xarray to Iris and back -# TODO: Cell methods are not converted between Iris and xarray def to_iris(dataarray): """Convert a DataArray into a Iris Cube """ @@ -82,6 +84,45 @@ def get_args(attrs): _args['units'] = cf_units.Unit(attrs['units'], **_unit_args) return _args + def get_cell_methods(cell_methods_str): + """Converts string to iris cell method objects""" + cell_methods = [] + _cell_method_words = [w.strip() for w in cell_methods_str.split(':')] + cm = {'coords': [], 'method': '', 'interval': [], 'comment': []} + skip = False + for i, word in enumerate(_cell_method_words): + # If this value is a comment or an interval don't read + if skip: + skip = False + continue + # If this word is an axis + if word not in cell_methods_strings | set(['interval', 'comment']): + # If we already have a method this must be the next cell_method + if cm['method']: + cell_methods.append( + iris.coords.CellMethod(cm['method'], + coords=cm['coords'], + intervals=cm['interval'], + comments=cm['comment'])) + cm = {'coords': [], 'method': '', 'interval': [], + 'comment': []} + cm['coords'].append(word) + continue + else: + cm['coords'].append(word) + elif word in ['interval', 'comment']: + cm[word].append(_cell_method_words[i + 1]) + skip = True + continue + else: + cm['method'] = word + else: + cell_methods.append( + iris.coords.CellMethod(cm['method'], coords=cm['coords'], + intervals=cm['interval'], + comments=cm['comment'])) + return cell_methods + dim_coords = [] aux_coords = [] @@ -103,6 +144,8 @@ def get_args(attrs): args['var_name'] = dataarray.name args['dim_coords_and_dims'] = dim_coords args['aux_coords_and_dims'] = aux_coords + if 'cell_methods' in dataarray.attrs: + args['cell_methods'] = get_cell_methods(dataarray.attrs['cell_methods']) cube = iris.cube.Cube(dataarray.to_masked_array(), **args) return cube @@ -111,6 +154,7 @@ def get_args(attrs): def from_iris(cube): """Convert a Iris cube into an DataArray """ + def get_attr(_obj): attrs = {'standard_name': _obj.standard_name, 'long_name': _obj.long_name} @@ -121,6 +165,20 @@ def get_attr(_obj): attrs.update(_obj.attributes) return dict((k, v) for k, v in attrs.items() if v is not None) + def get_cell_methods(cell_methods_obj): + _cell_methods = [] + for cell_method in cell_methods_obj: + names = ''.join(['{}: '.format(n) for n in cell_method.coord_names]) + intervals = ' '.join(['interval: {}'.format(interval) + for interval in cell_method.intervals]) + comments = ' '.join(['comment: {}'.format(comment) + for comment in cell_method.comments]) + extra = ' '.join([intervals, comments]).strip() + if extra: + extra += ' ' + _cell_methods.append(names + cell_method.method + extra) + return ' '.join(_cell_methods) + name = cube.var_name dims = [dim.var_name for dim in cube.dim_coords] if not dims: @@ -137,6 +195,9 @@ def get_attr(_obj): np.asscalar(coord.points), coord_attrs) array_attrs = get_attr(cube) + cell_methods = get_cell_methods(cube.cell_methods) + if cell_methods: + array_attrs['cell_methods'] = cell_methods dataarray = DataArray(cube.data, coords=coords, name=name, attrs=array_attrs, dims=dims) return decode_cf(dataarray.to_dataset())[dataarray.name] From 44930af72964b78c64d14b198b930c8d6a759a78 Mon Sep 17 00:00:00 2001 From: Neil Parley Date: Mon, 11 Apr 2016 10:25:58 +0100 Subject: [PATCH 06/13] Don't append blank cell method --- xarray/convert.py | 9 +++++---- 1 file changed, 5 insertions(+), 4 deletions(-) diff --git a/xarray/convert.py b/xarray/convert.py index a4963a114b3..c08d1082da1 100644 --- a/xarray/convert.py +++ b/xarray/convert.py @@ -117,10 +117,11 @@ def get_cell_methods(cell_methods_str): else: cm['method'] = word else: - cell_methods.append( - iris.coords.CellMethod(cm['method'], coords=cm['coords'], - intervals=cm['interval'], - comments=cm['comment'])) + if cm['method']: + cell_methods.append( + iris.coords.CellMethod(cm['method'], coords=cm['coords'], + intervals=cm['interval'], + comments=cm['comment'])) return cell_methods dim_coords = [] From 6bed3062e37c8b1a4e24014913f757e025a37688 Mon Sep 17 00:00:00 2001 From: Neil Parley Date: Wed, 20 Apr 2016 14:01:45 +0100 Subject: [PATCH 07/13] Update cell method code to use internal Iris functions. Also add tests. --- xarray/convert.py | 47 +++++------------------------------ xarray/test/test_dataarray.py | 6 +++++ 2 files changed, 12 insertions(+), 41 deletions(-) diff --git a/xarray/convert.py b/xarray/convert.py index c08d1082da1..190dfd8e5ac 100644 --- a/xarray/convert.py +++ b/xarray/convert.py @@ -70,6 +70,8 @@ def to_iris(dataarray): """ # Iris not a hard dependency import iris + import iris.fileformats._pyke_rules.compiled_krb.fc_rules_cf_fc \ + as iris_fc_rules_cf_fc # iris.unit is deprecated in Iris v1.9 import cf_units @@ -84,45 +86,7 @@ def get_args(attrs): _args['units'] = cf_units.Unit(attrs['units'], **_unit_args) return _args - def get_cell_methods(cell_methods_str): - """Converts string to iris cell method objects""" - cell_methods = [] - _cell_method_words = [w.strip() for w in cell_methods_str.split(':')] - cm = {'coords': [], 'method': '', 'interval': [], 'comment': []} - skip = False - for i, word in enumerate(_cell_method_words): - # If this value is a comment or an interval don't read - if skip: - skip = False - continue - # If this word is an axis - if word not in cell_methods_strings | set(['interval', 'comment']): - # If we already have a method this must be the next cell_method - if cm['method']: - cell_methods.append( - iris.coords.CellMethod(cm['method'], - coords=cm['coords'], - intervals=cm['interval'], - comments=cm['comment'])) - cm = {'coords': [], 'method': '', 'interval': [], - 'comment': []} - cm['coords'].append(word) - continue - else: - cm['coords'].append(word) - elif word in ['interval', 'comment']: - cm[word].append(_cell_method_words[i + 1]) - skip = True - continue - else: - cm['method'] = word - else: - if cm['method']: - cell_methods.append( - iris.coords.CellMethod(cm['method'], coords=cm['coords'], - intervals=cm['interval'], - comments=cm['comment'])) - return cell_methods + get_cell_methods = iris_fc_rules_cf_fc._parse_cell_methods dim_coords = [] aux_coords = [] @@ -146,7 +110,8 @@ def get_cell_methods(cell_methods_str): args['dim_coords_and_dims'] = dim_coords args['aux_coords_and_dims'] = aux_coords if 'cell_methods' in dataarray.attrs: - args['cell_methods'] = get_cell_methods(dataarray.attrs['cell_methods']) + args['cell_methods'] = get_cell_methods(dataarray.name, + dataarray.attrs['cell_methods']) cube = iris.cube.Cube(dataarray.to_masked_array(), **args) return cube @@ -176,7 +141,7 @@ def get_cell_methods(cell_methods_obj): for comment in cell_method.comments]) extra = ' '.join([intervals, comments]).strip() if extra: - extra += ' ' + extra = ' ({})'.format(extra) _cell_methods.append(names + cell_method.method + extra) return ' '.join(_cell_methods) diff --git a/xarray/test/test_dataarray.py b/xarray/test/test_dataarray.py index 1c0e8b08933..30bcfaaed1f 100644 --- a/xarray/test/test_dataarray.py +++ b/xarray/test/test_dataarray.py @@ -1663,11 +1663,17 @@ def test_to_and_from_iris(self): 'Fire Temperature'}, dims=('distance', 'time')) + original.attrs['cell_methods'] = 'height: mean (comment: A cell method)' actual = original.to_iris() self.assertArrayEqual(actual.data, original.data) self.assertEqual(actual.var_name, original.name) self.assertItemsEqual([d.var_name for d in actual.dim_coords], original.dims) + self.assertEqual(actual.cell_methods, + (iris.coords.CellMethod(method='mean', + coords=('height',), + intervals=(), + comments=('A cell method',)),)) for coord, orginal_key in zip((actual.coords()), original.coords): original_coord = original.coords[orginal_key] From cd06a2e23e77fff6c96ad00c6285a5966bc63b1f Mon Sep 17 00:00:00 2001 From: Neil Parley Date: Thu, 21 Apr 2016 19:39:31 +0100 Subject: [PATCH 08/13] Update the API for IRIS change --- xarray/convert.py | 15 +++++++++------ 1 file changed, 9 insertions(+), 6 deletions(-) diff --git a/xarray/convert.py b/xarray/convert.py index 190dfd8e5ac..fd97350d236 100644 --- a/xarray/convert.py +++ b/xarray/convert.py @@ -70,8 +70,13 @@ def to_iris(dataarray): """ # Iris not a hard dependency import iris - import iris.fileformats._pyke_rules.compiled_krb.fc_rules_cf_fc \ - as iris_fc_rules_cf_fc + try: + from iris.fileformats.netcdf import parse_cell_methods + except ImportError: + # prior to v1.10 + from iris.fileformats._pyke_rules.compiled_krb.fc_rules_cf_fc \ + import _parse_cell_methods as parse_cell_methods + # iris.unit is deprecated in Iris v1.9 import cf_units @@ -86,8 +91,6 @@ def get_args(attrs): _args['units'] = cf_units.Unit(attrs['units'], **_unit_args) return _args - get_cell_methods = iris_fc_rules_cf_fc._parse_cell_methods - dim_coords = [] aux_coords = [] @@ -110,8 +113,8 @@ def get_args(attrs): args['dim_coords_and_dims'] = dim_coords args['aux_coords_and_dims'] = aux_coords if 'cell_methods' in dataarray.attrs: - args['cell_methods'] = get_cell_methods(dataarray.name, - dataarray.attrs['cell_methods']) + args['cell_methods'] = \ + parse_cell_methods(dataarray.name, dataarray.attrs['cell_methods']) cube = iris.cube.Cube(dataarray.to_masked_array(), **args) return cube From e7f9cb1028ab5ffaf2b0c7de5093a673f5b8a311 Mon Sep 17 00:00:00 2001 From: Neil Parley Date: Wed, 25 May 2016 12:00:43 +0100 Subject: [PATCH 09/13] Move helper functions outside of main functions --- xarray/convert.py | 118 +++++++++++++++++++++++++--------------------- 1 file changed, 65 insertions(+), 53 deletions(-) diff --git a/xarray/convert.py b/xarray/convert.py index fd97350d236..15efa6eabbe 100644 --- a/xarray/convert.py +++ b/xarray/convert.py @@ -24,8 +24,10 @@ def encode(var): return maybe_encode_timedelta(maybe_encode_datetime(var.variable)) -def filter_attrs(_attrs, ignored_attrs): - return dict((k, v) for k, v in _attrs.items() if k not in ignored_attrs) +def _filter_attrs(attrs, ignored_attrs): + """ Return attrs that are not in ignored_attrs + """ + return dict((k, v) for k, v in attrs.items() if k not in ignored_attrs) def from_cdms2(variable): @@ -34,9 +36,9 @@ def from_cdms2(variable): values = np.asarray(variable) name = variable.id coords = [(v.id, np.asarray(v), - filter_attrs(v.attributes, cdms2_ignored_attrs)) + _filter_attrs(v.attributes, cdms2_ignored_attrs)) for v in variable.getAxisList()] - attrs = filter_attrs(variable.attributes, cdms2_ignored_attrs) + attrs = _filter_attrs(variable.attributes, cdms2_ignored_attrs) dataarray = DataArray(values, coords=coords, name=name, attrs=attrs) return decode_cf(dataarray.to_dataset())[dataarray.name] @@ -47,9 +49,9 @@ def to_cdms2(dataarray): # we don't want cdms2 to be a hard dependency import cdms2 - def set_cdms2_attrs(_var, attrs): + def set_cdms2_attrs(var, attrs): for k, v in attrs.items(): - setattr(_var, k, v) + setattr(var, k, v) axes = [] for dim in dataarray.dims: @@ -64,9 +66,28 @@ def set_cdms2_attrs(_var, attrs): return cdms2_var +def _pick_attrs(attrs, keys): + """ Return attrs with keys in keys list + """ + return dict((k, v) for k, v in attrs.items() if k in keys) + + +def _get_iris_args(attrs): + """ Converts the xarray attrs into args that can be passed into Iris + """ + # iris.unit is deprecated in Iris v1.9 + import cf_units + args = {'attributes': _filter_attrs(attrs, iris_forbidden_keys)} + args.update(_pick_attrs(attrs, ('standard_name', 'long_name',))) + unit_args = _pick_attrs(attrs, ('calendar',)) + if 'units' in attrs: + args['units'] = cf_units.Unit(attrs['units'], **unit_args) + return args + + # TODO: Add converting bounds from xarray to Iris and back def to_iris(dataarray): - """Convert a DataArray into a Iris Cube + """ Convert a DataArray into a Iris Cube """ # Iris not a hard dependency import iris @@ -77,26 +98,12 @@ def to_iris(dataarray): from iris.fileformats._pyke_rules.compiled_krb.fc_rules_cf_fc \ import _parse_cell_methods as parse_cell_methods - # iris.unit is deprecated in Iris v1.9 - import cf_units - - def check_attrs(attrs, keys): - return dict((k, v) for k, v in attrs.items() if k in keys) - - def get_args(attrs): - _args = {'attributes': filter_attrs(attrs, iris_forbidden_keys)} - _args.update(check_attrs(attrs, ('standard_name', 'long_name',))) - _unit_args = check_attrs(coord.attrs, ('calendar',)) - if 'units' in attrs: - _args['units'] = cf_units.Unit(attrs['units'], **_unit_args) - return _args - dim_coords = [] aux_coords = [] for coord_name in dataarray.coords: coord = encode(dataarray.coords[coord_name]) - coord_args = get_args(coord.attrs) + coord_args = _get_iris_args(coord.attrs) coord_args['var_name'] = coord_name axis = None if coord.dims: @@ -108,46 +115,51 @@ def get_args(attrs): iris_coord = iris.coords.AuxCoord(coord.values, **coord_args) aux_coords.append((iris_coord, axis)) - args = get_args(dataarray.attrs) + args = _get_iris_args(dataarray.attrs) args['var_name'] = dataarray.name args['dim_coords_and_dims'] = dim_coords args['aux_coords_and_dims'] = aux_coords if 'cell_methods' in dataarray.attrs: - args['cell_methods'] = \ - parse_cell_methods(dataarray.name, dataarray.attrs['cell_methods']) + args['cell_methods'] = parse_cell_methods( + dataarray.name, dataarray.attrs['cell_methods']) cube = iris.cube.Cube(dataarray.to_masked_array(), **args) return cube -def from_iris(cube): - """Convert a Iris cube into an DataArray +def _iris_obj_to_attrs(obj): + """ Return a dictionary of attrs when given a Iris object + """ + attrs = {'standard_name': obj.standard_name, + 'long_name': obj.long_name} + if obj.units.calendar: + attrs['calendar'] = obj.units.calendar + if obj.units.origin != '1': + attrs['units'] = obj.units.origin + attrs.update(obj.attributes) + return dict((k, v) for k, v in attrs.items() if v is not None) + + +def _iris_cell_methods_to_str(cell_methods_obj): + """ Converts a Iris cell methods into a string """ + cell_methods = [] + for cell_method in cell_methods_obj: + names = ''.join(['{}: '.format(n) for n in cell_method.coord_names]) + intervals = ' '.join(['interval: {}'.format(interval) + for interval in cell_method.intervals]) + comments = ' '.join(['comment: {}'.format(comment) + for comment in cell_method.comments]) + extra = ' '.join([intervals, comments]).strip() + if extra: + extra = ' ({})'.format(extra) + cell_methods.append(names + cell_method.method + extra) + return ' '.join(cell_methods) - def get_attr(_obj): - attrs = {'standard_name': _obj.standard_name, - 'long_name': _obj.long_name} - if _obj.units.calendar: - attrs['calendar'] = _obj.units.calendar - if _obj.units.origin != '1': - attrs['units'] = _obj.units.origin - attrs.update(_obj.attributes) - return dict((k, v) for k, v in attrs.items() if v is not None) - - def get_cell_methods(cell_methods_obj): - _cell_methods = [] - for cell_method in cell_methods_obj: - names = ''.join(['{}: '.format(n) for n in cell_method.coord_names]) - intervals = ' '.join(['interval: {}'.format(interval) - for interval in cell_method.intervals]) - comments = ' '.join(['comment: {}'.format(comment) - for comment in cell_method.comments]) - extra = ' '.join([intervals, comments]).strip() - if extra: - extra = ' ({})'.format(extra) - _cell_methods.append(names + cell_method.method + extra) - return ' '.join(_cell_methods) +def from_iris(cube): + """ Convert a Iris cube into an DataArray + """ name = cube.var_name dims = [dim.var_name for dim in cube.dim_coords] if not dims: @@ -155,7 +167,7 @@ def get_cell_methods(cell_methods_obj): coords = OrderedDict() for coord in cube.coords(): - coord_attrs = get_attr(coord) + coord_attrs = _iris_obj_to_attrs(coord) coord_dims = [dims[i] for i in cube.coord_dims(coord)] if coord_dims: coords[coord.var_name] = (coord_dims, coord.points, coord_attrs) @@ -163,8 +175,8 @@ def get_cell_methods(cell_methods_obj): coords[coord.var_name] = ((), np.asscalar(coord.points), coord_attrs) - array_attrs = get_attr(cube) - cell_methods = get_cell_methods(cube.cell_methods) + array_attrs = _iris_obj_to_attrs(cube) + cell_methods = _iris_cell_methods_to_str(cube.cell_methods) if cell_methods: array_attrs['cell_methods'] = cell_methods dataarray = DataArray(cube.data, coords=coords, name=name, From 877d06f17379f2879f334304cdc62328f684d900 Mon Sep 17 00:00:00 2001 From: Neil Parley Date: Tue, 9 Aug 2016 15:49:01 +0100 Subject: [PATCH 10/13] Update to build dims with mix of Dimension and Auxiliary coordinates --- xarray/convert.py | 16 +++++++++++++--- 1 file changed, 13 insertions(+), 3 deletions(-) diff --git a/xarray/convert.py b/xarray/convert.py index 15efa6eabbe..75e672b3ba0 100644 --- a/xarray/convert.py +++ b/xarray/convert.py @@ -160,10 +160,20 @@ def _iris_cell_methods_to_str(cell_methods_obj): def from_iris(cube): """ Convert a Iris cube into an DataArray """ + import iris.exceptions name = cube.var_name - dims = [dim.var_name for dim in cube.dim_coords] - if not dims: - dims = ["dim{}".format(i) for i in range(cube.data.ndim)] + dims = [] + for dim in xrange(cube.ndim): + try: + dim_coord = cube.coord(dim_coords=True, dimensions=(dim,)) + dims.append(dim_coord.var_name) + except iris.exceptions.CoordinateNotFoundError: + index_coord = range(cube.shape[dim]) + dims.append("dim{}".format(index_coord)) + + # dims = [dim.var_name for dim in cube.dim_coords] + # if not dims: + # dims = ["dim{}".format(i) for i in range(cube.data.ndim)] coords = OrderedDict() for coord in cube.coords(): From e42aeb2adacc0d3be9e93c94f07b2dde6bae3429 Mon Sep 17 00:00:00 2001 From: Neil Parley Date: Tue, 9 Aug 2016 15:52:49 +0100 Subject: [PATCH 11/13] Fix import after merge --- xarray/test/test_dataarray.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/xarray/test/test_dataarray.py b/xarray/test/test_dataarray.py index e873280721b..adca0f47a87 100644 --- a/xarray/test/test_dataarray.py +++ b/xarray/test/test_dataarray.py @@ -10,7 +10,7 @@ Coordinate, Variable) from xarray.core.pycompat import iteritems, OrderedDict from xarray.core.common import _full_like - +from xarray.conventions import maybe_encode_datetime from xarray.test import (TestCase, ReturnItem, source_ndarray, unittest, requires_dask, requires_bottleneck) From 338ef6b7242c06d5107d2224c1df4cdb2a4b951c Mon Sep 17 00:00:00 2001 From: Neil Parley Date: Wed, 10 Aug 2016 19:56:34 +0100 Subject: [PATCH 12/13] Bug fix / refactoring --- xarray/convert.py | 7 +++---- 1 file changed, 3 insertions(+), 4 deletions(-) diff --git a/xarray/convert.py b/xarray/convert.py index 75e672b3ba0..a09497a11fd 100644 --- a/xarray/convert.py +++ b/xarray/convert.py @@ -163,13 +163,12 @@ def from_iris(cube): import iris.exceptions name = cube.var_name dims = [] - for dim in xrange(cube.ndim): + for i in xrange(cube.ndim): try: - dim_coord = cube.coord(dim_coords=True, dimensions=(dim,)) + dim_coord = cube.coord(dim_coords=True, dimensions=(i,)) dims.append(dim_coord.var_name) except iris.exceptions.CoordinateNotFoundError: - index_coord = range(cube.shape[dim]) - dims.append("dim{}".format(index_coord)) + dims.append("dim_{}".format(i)) # dims = [dim.var_name for dim in cube.dim_coords] # if not dims: From 46f68ff315064e600e5da1aa10252ea227012e54 Mon Sep 17 00:00:00 2001 From: Neil Parley Date: Thu, 11 Aug 2016 14:19:06 +0100 Subject: [PATCH 13/13] Change the dencode_cf method and raise error if coord has no var_name --- xarray/convert.py | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/xarray/convert.py b/xarray/convert.py index a09497a11fd..d06f2836821 100644 --- a/xarray/convert.py +++ b/xarray/convert.py @@ -170,14 +170,13 @@ def from_iris(cube): except iris.exceptions.CoordinateNotFoundError: dims.append("dim_{}".format(i)) - # dims = [dim.var_name for dim in cube.dim_coords] - # if not dims: - # dims = ["dim{}".format(i) for i in range(cube.data.ndim)] coords = OrderedDict() for coord in cube.coords(): coord_attrs = _iris_obj_to_attrs(coord) coord_dims = [dims[i] for i in cube.coord_dims(coord)] + if not coord.var_name: + raise ValueError('Coordinate has no var_name') if coord_dims: coords[coord.var_name] = (coord_dims, coord.points, coord_attrs) else: @@ -190,4 +189,5 @@ def from_iris(cube): array_attrs['cell_methods'] = cell_methods dataarray = DataArray(cube.data, coords=coords, name=name, attrs=array_attrs, dims=dims) - return decode_cf(dataarray.to_dataset())[dataarray.name] + decoded_ds = decode_cf(dataarray._to_temp_dataset()) + return dataarray._from_temp_dataset(decoded_ds)