From 27c50f2efb744138d7942ce0c6354bc4fce6b384 Mon Sep 17 00:00:00 2001
From: Stephan Hoyer <shoyer@climate.com>
Date: Wed, 17 Dec 2014 23:08:27 -0800
Subject: [PATCH 1/3] add test for out of order reindex

---
 xray/test/test_dataset.py | 5 +++++
 1 file changed, 5 insertions(+)

diff --git a/xray/test/test_dataset.py b/xray/test/test_dataset.py
index 77d5ad0ba6e..a1a208c716e 100644
--- a/xray/test/test_dataset.py
+++ b/xray/test/test_dataset.py
@@ -518,6 +518,11 @@ def test_reindex(self):
         with self.assertRaisesRegexp(ValueError, 'dictionary'):
             data.reindex('foo')
 
+        # out of order
+        expected = data.sel(dim1=data['dim1'][:10:-1])
+        actual = data.reindex(dim1=data['dim1'][:10:-1])
+        self.assertDatasetIdentical(actual, expected)
+
     def test_align(self):
         left = create_test_data()
         right = left.copy(deep=True)

From 3130c4aa940638220510056473fd12929766279e Mon Sep 17 00:00:00 2001
From: Stephan Hoyer <shoyer@climate.com>
Date: Wed, 17 Dec 2014 23:43:13 -0800
Subject: [PATCH 2/3] Fastpath for variable construction

---
 xray/core/variable.py | 117 ++++++++++++++++++++++++------------------
 1 file changed, 66 insertions(+), 51 deletions(-)

diff --git a/xray/core/variable.py b/xray/core/variable.py
index 5bfc717ebf3..64a8e12a1b5 100644
--- a/xray/core/variable.py
+++ b/xray/core/variable.py
@@ -50,26 +50,43 @@ def as_variable(obj, key=None, strict=True):
     return obj
 
 
-def _as_compatible_data(data):
+def _maybe_wrap_data(data):
+    """
+    Put pandas.Index and numpy.ndarray arguments in adapter objects to ensure
+    they can be indexed properly.
+
+    NumpyArrayAdapter, PandasIndexAdapter and LazilyIndexedArray should
+    all pass through unmodified.
+    """
+    if isinstance(data, pd.Index):
+        # check pd.Index first since it may be an ndarray subclass
+        return PandasIndexAdapter(data)
+    if isinstance(data, np.ndarray):
+        return NumpyArrayAdapter(data)
+    return data
+
+
+def _as_compatible_data(data, fastpath=False):
     """Prepare and wrap data to put in a Variable.
 
-    Prepare the data:
     - If data does not have the necessary attributes, convert it to ndarray.
     - If data has dtype=datetime64, ensure that it has ns precision. If it's a
       pandas.Timestamp, convert it to datetime64.
     - If data is already a pandas or xray object (other than an Index), just
       use the values.
 
-    Wrap it up:
-    - Finally, put pandas.Index and numpy.ndarray arguments in adapter objects
-      to ensure they can be indexed properly.
-    - NumpyArrayAdapter, PandasIndexAdapter and LazilyIndexedArray should
-      all pass through unmodified.
+    Finally, wrap it up with an adapter if necessary.
     """
-    if isinstance(data, pd.MultiIndex):
-        raise NotImplementedError(
-            'no support yet for using a pandas.MultiIndex in an '
-            'xray.Coordinate')
+    if fastpath and getattr(data, 'ndim', 0) > 0:
+        # can't use fastpath (yet) for scalars
+        return _maybe_wrap_data(data)
+
+    if isinstance(data, pd.Index):
+        if isinstance(data, pd.MultiIndex):
+            raise NotImplementedError(
+                'no support yet for using a pandas.MultiIndex in an '
+                'xray.Coordinate')
+        return _maybe_wrap_data(data)
 
     if isinstance(data, pd.Timestamp):
         # TODO: convert, handle datetime objects, too
@@ -85,32 +102,26 @@ def _as_compatible_data(data):
         # data must be ndarray-like
         data = np.asarray(data)
 
-    # ensure data is properly wrapped up
-    if isinstance(data, pd.Index):
-        # check pd.Index first since it may be an ndarray subclass
-        data = PandasIndexAdapter(data)
-    else:
-        # we don't want nested self-described arrays
-        data = getattr(data, 'values', data)
-
-        if isinstance(data, np.ma.MaskedArray):
-            mask = np.ma.getmaskarray(data)
-            if mask.any():
-                dtype, fill_value = common._maybe_promote(data.dtype)
-                data = np.asarray(data, dtype=dtype)
-                data[mask] = fill_value
-            else:
-                data = np.asarray(data)
+    # we don't want nested self-described arrays
+    data = getattr(data, 'values', data)
+
+    if isinstance(data, np.ma.MaskedArray):
+        mask = np.ma.getmaskarray(data)
+        if mask.any():
+            dtype, fill_value = common._maybe_promote(data.dtype)
+            data = np.asarray(data, dtype=dtype)
+            data[mask] = fill_value
+        else:
+            data = np.asarray(data)
 
-        if isinstance(data, np.ndarray):
-            if data.dtype.kind == 'M':
-                # TODO: automatically cast arrays of datetime objects as well
-                data = np.asarray(data, 'datetime64[ns]')
-            if data.dtype.kind == 'm':
-                data = np.asarray(data, 'timedelta64[ns]')
-            data = NumpyArrayAdapter(data)
+    if isinstance(data, np.ndarray):
+        if data.dtype.kind == 'M':
+            # TODO: automatically cast arrays of datetime objects as well
+            data = np.asarray(data, 'datetime64[ns]')
+        if data.dtype.kind == 'm':
+            data = np.asarray(data, 'timedelta64[ns]')
 
-    return data
+    return _maybe_wrap_data(data)
 
 
 class NumpyArrayAdapter(utils.NDArrayMixin):
@@ -237,7 +248,7 @@ class Variable(common.AbstractArray):
     form of a Dataset or DataArray should almost always be preferred, because
     they can use more complete metadata in context of coordinate labels.
     """
-    def __init__(self, dims, data, attrs=None, encoding=None):
+    def __init__(self, dims, data, attrs=None, encoding=None, fastpath=False):
         """
         Parameters
         ----------
@@ -257,7 +268,7 @@ def __init__(self, dims, data, attrs=None, encoding=None):
             Well behaviored code to serialize a Variable should ignore
             unrecognized encoding items.
         """
-        self._data = _as_compatible_data(data)
+        self._data = _as_compatible_data(data, fastpath=fastpath)
         self._dims = self._parse_dimensions(dims)
         self._attrs = None
         self._encoding = None
@@ -329,8 +340,8 @@ def values(self, values):
 
     def to_coord(self):
         """Return this variable as an xray.Coordinate"""
-        return Coordinate(self.dims, self._data, self.attrs,
-                          encoding=self.encoding)
+        return Coordinate(self.dims, self._data, self._attrs,
+                          encoding=self._encoding, fastpath=True)
 
     @property
     def as_index(self):
@@ -391,15 +402,15 @@ def __getitem__(self, key):
         """
         key = self._item_key_to_tuple(key)
         key = indexing.expanded_indexer(key, self.ndim)
-        dims = [dim for k, dim in zip(key, self.dims)
-                if not isinstance(k, (int, np.integer))]
+        dims = tuple(dim for k, dim in zip(key, self.dims)
+                     if not isinstance(k, (int, np.integer)))
         values = self._data[key]
         # orthogonal indexing should ensure the dimensionality is consistent
         if hasattr(values, 'ndim'):
             assert values.ndim == len(dims), (values.ndim, len(dims))
         else:
             assert len(dims) == 0, len(dims)
-        return type(self)(dims, values, self.attrs)
+        return type(self)(dims, values, self._attrs, fastpath=True)
 
     def __setitem__(self, key, value):
         """__setitem__ is overloaded to access the underlying numpy values with
@@ -454,7 +465,8 @@ def copy(self, deep=True):
         # note:
         # dims is already an immutable tuple
         # attributes and encoding will be copied when the new Array is created
-        return type(self)(self.dims, data, self.attrs, self.encoding)
+        return type(self)(self.dims, data, self._attrs, self._encoding,
+                          fastpath=True)
 
     def __copy__(self):
         return self.copy(deep=False)
@@ -524,7 +536,7 @@ def transpose(self, *dims):
             dims = self.dims[::-1]
         axes = self.get_axis_num(dims)
         data = self.values.transpose(axes)
-        return type(self)(dims, data, self.attrs, self.encoding)
+        return type(self)(dims, data, self._attrs, self._encoding, fastpath=True)
 
     def squeeze(self, dim=None):
         """Return a new Variable object with squeezed data.
@@ -585,7 +597,8 @@ def set_dims(self, dims):
         self_dims = set(self.dims)
         exp_dims = tuple(d for d in dims if d not in self_dims) + self.dims
         exp_data = utils.as_shape(self, [dims[d] for d in exp_dims])
-        expanded_var = Variable(exp_dims, exp_data, self.attrs, self.encoding)
+        expanded_var = Variable(exp_dims, exp_data, self._attrs,
+                                self._encoding, fastpath=True)
         return expanded_var.transpose(*dims)
 
     def reduce(self, func, dim=None, axis=None, keep_attrs=False,
@@ -634,7 +647,7 @@ def reduce(self, func, dim=None, axis=None, keep_attrs=False,
         dims = [dim for n, dim in enumerate(self.dims)
                 if n not in removed_axes]
 
-        attrs = self.attrs if keep_attrs else None
+        attrs = self._attrs if keep_attrs else None
 
         return Variable(dims, data, attrs=attrs)
 
@@ -827,8 +840,8 @@ class Coordinate(Variable):
     """
     _cache_data_class = PandasIndexAdapter
 
-    def __init__(self, name, data, attrs=None, encoding=None):
-        super(Coordinate, self).__init__(name, data, attrs, encoding)
+    def __init__(self, name, data, attrs=None, encoding=None, fastpath=False):
+        super(Coordinate, self).__init__(name, data, attrs, encoding, fastpath)
         if self.ndim != 1:
             raise ValueError('%s objects must be 1-dimensional' %
                              type(self).__name__)
@@ -837,9 +850,10 @@ def __getitem__(self, key):
         key = self._item_key_to_tuple(key)
         values = self._data[key]
         if not hasattr(values, 'ndim') or values.ndim == 0:
-            return Variable((), values, self.attrs, self.encoding)
+            return Variable((), values, self._attrs, self._encoding)
         else:
-            return type(self)(self.dims, values, self.attrs, self.encoding)
+            return type(self)(self.dims, values, self._attrs, self._encoding,
+                              fastpath=True)
 
     def __setitem__(self, key, value):
         raise TypeError('%s values cannot be modified' % type(self).__name__)
@@ -853,7 +867,8 @@ def copy(self, deep=True):
         # there is no need to copy the index values here even if deep=True
         # since pandas.Index objects are immutable
         data = PandasIndexAdapter(self) if deep else self._data
-        return type(self)(self.dims, data, self.attrs, self.encoding)
+        return type(self)(self.dims, data, self._attrs, self._encoding,
+                          fastpath=True)
 
     def _data_equals(self, other):
         return self.to_index().equals(other.to_index())

From 3925fda821688b8d824b277b0d82798e83dc40e6 Mon Sep 17 00:00:00 2001
From: Stephan Hoyer <shoyer@climate.com>
Date: Thu, 18 Dec 2014 19:34:34 -0800
Subject: [PATCH 3/3] Coerce datetime/timedelta arrays to
 datetime64/timedelta64

---
 xray/core/alignment.py     |   2 +-
 xray/core/common.py        |  11 ++++
 xray/core/variable.py      |   1 +
 xray/test/test_variable.py | 108 ++++++++++++++++++++++---------------
 4 files changed, 79 insertions(+), 43 deletions(-)

diff --git a/xray/core/alignment.py b/xray/core/alignment.py
index f5050d280a9..37c3aa1ef1f 100644
--- a/xray/core/alignment.py
+++ b/xray/core/alignment.py
@@ -156,7 +156,7 @@ def var_indexers(var, indexers):
                 data = np.empty(shape, dtype=dtype)
                 data[:] = fill_value
                 # create a new Variable so we can use orthogonal indexing
-                new_var = Variable(var.dims, data, var.attrs)
+                new_var = Variable(var.dims, data, var.attrs, fastpath=True)
                 new_var[assign_to] = var[assign_from].values
             elif any_not_full_slices(assign_from):
                 # type coercion is not necessary as there are no missing
diff --git a/xray/core/common.py b/xray/core/common.py
index 54cb0107c58..78037e75032 100644
--- a/xray/core/common.py
+++ b/xray/core/common.py
@@ -1,4 +1,5 @@
 import numpy as np
+import pandas as pd
 
 from .pycompat import basestring, iteritems
 from . import formatting
@@ -127,3 +128,13 @@ def _maybe_promote(dtype):
         dtype = object
         fill_value = np.nan
     return dtype, fill_value
+
+
+def _possibly_convert_objects(values):
+    try:
+        converter = pd.core.common._possibly_convert_objects
+    except AttributeError:
+        # our fault for using a private pandas API that has gone missing
+        # this should do the same coercion (though it will be slower)
+        converter = lambda x: np.asarray(pd.Series(x))
+    return converter(values.ravel()).reshape(values.shape)
diff --git a/xray/core/variable.py b/xray/core/variable.py
index 64a8e12a1b5..c3cf495c046 100644
--- a/xray/core/variable.py
+++ b/xray/core/variable.py
@@ -115,6 +115,7 @@ def _as_compatible_data(data, fastpath=False):
             data = np.asarray(data)
 
     if isinstance(data, np.ndarray):
+        data = common._possibly_convert_objects(data)
         if data.dtype.kind == 'M':
             # TODO: automatically cast arrays of datetime objects as well
             data = np.asarray(data, 'datetime64[ns]')
diff --git a/xray/test/test_variable.py b/xray/test/test_variable.py
index b17ba0a7437..e7c8e24c1a5 100644
--- a/xray/test/test_variable.py
+++ b/xray/test/test_variable.py
@@ -88,7 +88,7 @@ def test_index_0d_string(self):
     def test_index_0d_datetime(self):
         d = datetime(2000, 1, 1)
         x = self.cls(['x'], [d])
-        self.assertIndexedLikeNDArray(x, d)
+        self.assertIndexedLikeNDArray(x, np.datetime64(d))
 
         x = self.cls(['x'], [np.datetime64(d)])
         self.assertIndexedLikeNDArray(x, np.datetime64(d), 'datetime64[ns]')
@@ -148,6 +148,42 @@ def test_0d_time_data(self):
         expected = np.datetime64('2000-01-01T00Z', 'ns')
         self.assertEqual(x[0].values, expected)
 
+    def test_datetime64_conversion(self):
+        times = pd.date_range('2000-01-01', periods=3)
+        for values, preserve_source in [
+                (times, False),
+                (times.values, True),
+                (times.values.astype('datetime64[s]'), False),
+                (times.to_pydatetime(), False),
+               ]:
+            v = self.cls(['t'], values)
+            self.assertEqual(v.dtype, np.dtype('datetime64[ns]'))
+            self.assertArrayEqual(v.values, times.values)
+            self.assertEqual(v.values.dtype, np.dtype('datetime64[ns]'))
+            same_source = source_ndarray(v.values) is source_ndarray(values)
+            if preserve_source and self.cls is Variable:
+                self.assertTrue(same_source)
+            else:
+                self.assertFalse(same_source)
+
+    def test_timedelta64_conversion(self):
+        times = pd.timedelta_range(start=0, periods=3)
+        for values, preserve_source in [
+                (times, False),
+                (times.values, True),
+                (times.values.astype('timedelta64[s]'), False),
+                (times.to_pytimedelta(), False),
+               ]:
+            v = self.cls(['t'], values)
+            self.assertEqual(v.dtype, np.dtype('timedelta64[ns]'))
+            self.assertArrayEqual(v.values, times.values)
+            self.assertEqual(v.values.dtype, np.dtype('timedelta64[ns]'))
+            same_source = source_ndarray(v.values) is source_ndarray(values)
+            if preserve_source and self.cls is Variable:
+                self.assertTrue(same_source)
+            else:
+                self.assertFalse(same_source)
+
     def test_pandas_data(self):
         v = self.cls(['x'], pd.Series([0, 1, 2], index=[3, 2, 1]))
         self.assertVariableIdentical(v, v[[0, 1, 2]])
@@ -333,29 +369,29 @@ def test_numpy_same_methods(self):
         v = Coordinate('x', np.arange(5))
         self.assertEqual(2, v.searchsorted(2))
 
-    def test_datetime64_conversion(self):
-        # verify that datetime64 is always converted to ns precision with
-        # sources preserved
-        values = np.datetime64('2000-01-01T00')
-        v = Variable([], values)
-        self.assertEqual(v.dtype, np.dtype('datetime64[ns]'))
-        self.assertEqual(v.values, values)
-        self.assertEqual(v.values.dtype, np.dtype('datetime64[ns]'))
-
-        values = pd.date_range('2000-01-01', periods=3).values.astype(
-            'datetime64[s]')
-        v = Variable(['t'], values)
-        self.assertEqual(v.dtype, np.dtype('datetime64[ns]'))
-        self.assertArrayEqual(v.values, values)
-        self.assertEqual(v.values.dtype, np.dtype('datetime64[ns]'))
-        self.assertIsNot(source_ndarray(v.values), values)
-
-        values = pd.date_range('2000-01-01', periods=3).values.copy()
-        v = Variable(['t'], values)
-        self.assertEqual(v.dtype, np.dtype('datetime64[ns]'))
-        self.assertArrayEqual(v.values, values)
-        self.assertEqual(v.values.dtype, np.dtype('datetime64[ns]'))
-        self.assertIs(source_ndarray(v.values), values)
+    def test_datetime64_conversion_scalar(self):
+        expected = np.datetime64('2000-01-01T00:00:00Z', 'ns')
+        for values in [
+                 np.datetime64('2000-01-01T00Z'),
+                 pd.Timestamp('2000-01-01T00'),
+                 datetime(2000, 1, 1),
+                ]:
+            v = Variable([], values)
+            self.assertEqual(v.dtype, np.dtype('datetime64[ns]'))
+            self.assertEqual(v.values, expected)
+            self.assertEqual(v.values.dtype, np.dtype('datetime64[ns]'))
+
+    def test_timedelta64_conversion_scalar(self):
+        expected = np.timedelta64(24 * 60 * 60 * 10 ** 9, 'ns')
+        for values in [
+                 np.timedelta64(1, 'D'),
+                 pd.Timedelta('1 day'),
+                 timedelta(days=1),
+                ]:
+            v = Variable([], values)
+            self.assertEqual(v.dtype, np.dtype('timedelta64[ns]'))
+            self.assertEqual(v.values, expected)
+            self.assertEqual(v.values.dtype, np.dtype('timedelta64[ns]'))
 
     def test_0d_str(self):
         v = Variable([], u'foo')
@@ -676,18 +712,6 @@ def test_data(self):
         with self.assertRaisesRegexp(TypeError, 'cannot be modified'):
             x[:] = 0
 
-    def test_avoid_index_dtype_inference(self):
-        # verify our work-around for (pandas<0.14):
-        # https://github.com/pydata/pandas/issues/6370
-        data = pd.date_range('2000-01-01', periods=3).to_pydatetime()
-        t = Coordinate('t', data)
-        self.assertArrayEqual(t.values[:2], data[:2])
-        self.assertArrayEqual(t[:2].values, data[:2])
-        self.assertArrayEqual(t.values[:2], data[:2])
-        self.assertArrayEqual(t[:2].values, data[:2])
-        self.assertEqual(t.dtype, object)
-        self.assertEqual(t[:2].dtype, object)
-
     def test_name(self):
         coord = Coordinate('x', [10.0])
         self.assertEqual(coord.name, 'x')
@@ -729,27 +753,27 @@ def test_masked_array(self):
         self.assertEqual(np.dtype(float), actual.dtype)
 
     def test_datetime(self):
-        expected = np.datetime64('2000-01-01T00')
+        expected = np.datetime64('2000-01-01T00Z')
         actual = _as_compatible_data(expected)
         self.assertEqual(expected, actual)
         self.assertEqual(NumpyArrayAdapter, type(actual))
         self.assertEqual(np.dtype('datetime64[ns]'), actual.dtype)
 
-        expected = np.array([np.datetime64('2000-01-01T00')])
+        expected = np.array([np.datetime64('2000-01-01T00Z')])
         actual = _as_compatible_data(expected)
         self.assertEqual(np.asarray(expected), actual)
         self.assertEqual(NumpyArrayAdapter, type(actual))
         self.assertEqual(np.dtype('datetime64[ns]'), actual.dtype)
 
-        expected = np.array([np.datetime64('2000-01-01T00', 'ns')])
+        expected = np.array([np.datetime64('2000-01-01T00Z', 'ns')])
         actual = _as_compatible_data(expected)
         self.assertEqual(np.asarray(expected), actual)
         self.assertEqual(NumpyArrayAdapter, type(actual))
         self.assertEqual(np.dtype('datetime64[ns]'), actual.dtype)
         self.assertIs(expected, source_ndarray(np.asarray(actual)))
 
-        expected = pd.Timestamp('2000-01-01T00').to_datetime()
-        actual = _as_compatible_data(expected)
+        expected = np.datetime64('2000-01-01T00Z', 'ns')
+        actual = _as_compatible_data(datetime(2000, 1, 1))
         self.assertEqual(np.asarray(expected), actual)
         self.assertEqual(NumpyArrayAdapter, type(actual))
-        self.assertEqual(np.dtype('O'), actual.dtype)
+        self.assertEqual(np.dtype('datetime64[ns]'), actual.dtype)