pydata · akleeman · Mar 11, 2014 · Mar 7, 2014 · Mar 7, 2014 · Mar 7, 2014
diff --git a/src/xray/__init__.py b/src/xray/__init__.py
@@ -1,4 +1,4 @@
-from .xarray import XArray, broadcast_xarrays
+from .xarray import as_xarray, XArray, CoordXArray, broadcast_xarrays
 from .dataset import Dataset, open_dataset
 from .dataset_array import DatasetArray, align
 from .utils import (orthogonal_indexer, decode_cf_datetime, encode_cf_datetime,

diff --git a/src/xray/common.py b/src/xray/common.py
@@ -15,25 +15,6 @@ def func(self, dimension=cls._reduce_dimension_default,
 
 
 class AbstractArray(ImplementsReduce):
-    @property
-    def dtype(self):
-        return self._data.dtype
-
-    @property
-    def shape(self):
-        return self._data.shape
-
-    @property
-    def size(self):
-        return self._data.size
-
-    @property
-    def ndim(self):
-        return self._data.ndim
-
-    def __len__(self):
-        return len(self._data)
-
     def __nonzero__(self):
         return bool(self.data)
 

diff --git a/src/xray/conventions.py b/src/xray/conventions.py
@@ -261,14 +261,11 @@ def encode_cf_variable(array):
         attributes['units'] = units
         attributes['calendar'] = calendar
     elif data.dtype == np.dtype('O'):
-        # Unfortunately, pandas.Index arrays often have dtype=object even if
-        # they were created from an array with a sensible datatype (e.g.,
-        # pandas.Float64Index always has dtype=object for some reason). Because
-        # we allow for doing math with coordinates, these object arrays can
-        # propagate onward to other variables, which is why we don't only apply
-        # this check to XArrays with data that is a pandas.Index.
-        # Accordingly, we convert object arrays to the type of their first
-        # variable.
+        # Occasionally, one will end up with variables with dtype=object
+        # (likely because they were created from pandas objects which don't
+        # maintain dtype careful). Thie code makes a best effort attempt to
+        # encode them into a dtype that NETCDF can handle by inspecting the
+        # dtype of the first element.
         dtype = np.array(data.reshape(-1)[0]).dtype
         # N.B. the "astype" call below will fail if data cannot be cast to the
         # type of its first element (which is probably the only sensible thing

diff --git a/src/xray/dataset.py b/src/xray/dataset.py
@@ -53,7 +53,8 @@ class _VariablesDict(OrderedDict):
     """
     def _datetimeindices(self):
         return [k for k, v in self.iteritems()
-                if isinstance(v._data, pd.DatetimeIndex)]
+                if np.issubdtype(v.dtype, np.datetime64)
+                and isinstance(v.index, pd.DatetimeIndex)]
 
     @property
     def virtual(self):
@@ -76,10 +77,10 @@ def _get_virtual_variable(self, key):
             if ref_var in self._datetimeindices():
                 if suffix == 'season':
                     # seasons = np.array(['DJF', 'MAM', 'JJA', 'SON'])
-                    month = self[ref_var].data.month
+                    month = self[ref_var].index.month
                     data = (month // 3) % 4 + 1
                 else:
-                    data = getattr(self[ref_var].data, suffix)
+                    data = getattr(self[ref_var].index, suffix)
                 return xarray.XArray(self[ref_var].dimensions, data)
         raise KeyError('virtual variable %r not found' % key)
 
@@ -130,14 +131,15 @@ def __init__(self, variables=None, attributes=None, decode_cf=False):
 
     def _as_variable(self, name, var, decode_cf=False):
         if isinstance(var, DatasetArray):
-            var = var.array
-        if not isinstance(var, xarray.XArray):
+            var = xarray.as_xarray(var)
+        elif not isinstance(var, xarray.XArray):
             try:
                 var = xarray.XArray(*var)
             except TypeError:
                 raise TypeError('Dataset variables must be of type '
                                 'DatasetArray or XArray, or a sequence of the '
-                                'form (dimensions, data[, attributes])')
+                                'form (dimensions, data[, attributes, '
+                                'encoding])')
         # this will unmask and rescale the data as well as convert
         # time variables to datetime indices.
         if decode_cf:
@@ -147,9 +149,7 @@ def _as_variable(self, name, var, decode_cf=False):
             if var.ndim != 1:
                 raise ValueError('a coordinate variable must be defined with '
                                  '1-dimensional data')
-            # create a new XArray object on which to modify the data
-            var = xarray.XArray(var.dimensions, pd.Index(var.data),
-                                var.attributes, encoding=var.encoding)
+            var = var.to_coord()
         return var
 
     def set_variables(self, variables, decode_cf=False):
@@ -487,7 +487,7 @@ def labeled_by(self, **indexers):
         Dataset.indexed_by
         Array.indexed_by
         """
-        return self.indexed_by(**remap_loc_indexers(self.variables, indexers))
+        return self.indexed_by(**remap_loc_indexers(self, indexers))
 
     def renamed(self, name_dict):
         """Returns a new object with renamed variables and dimensions.
@@ -625,7 +625,8 @@ def unselect(self, *names):
             New dataset based on this dataset. Only the named variables are
             removed.
         """
-        if any(k not in self.variables for k in names):
+        if any(k not in self.variables and k not in self.virtual_variables
+               for k in names):
             raise ValueError('One or more of the specified variable '
                              'names does not exist on this dataset')
         drop = set(names)

diff --git a/src/xray/dataset_array.py b/src/xray/dataset_array.py
@@ -66,28 +66,50 @@ def __init__(self, dataset, focus):
         self.focus = focus
 
     @property
-    def array(self):
+    def variable(self):
         return self.dataset.variables[self.focus]
-    @array.setter
-    def array(self, value):
+    @variable.setter
+    def variable(self, value):
         self.dataset[self.focus] = value
 
-    # _data is necessary for AbstractArray
     @property
-    def _data(self):
-        return self.array._data
+    def dtype(self):
+        return self.variable.dtype
+
+    @property
+    def shape(self):
+        return self.variable.shape
+
+    @property
+    def size(self):
+        return self.variable.size
+
+    @property
+    def ndim(self):
+        return self.variable.ndim
+
+    def __len__(self):
+        return len(self.variable)
 
     @property
     def data(self):
-        """The array's data as a numpy.ndarray"""
-        return self.array.data
+        """The variables's data as a numpy.ndarray"""
+        return self.variable.data
     @data.setter
     def data(self, value):
-        self.array.data = value
+        self.variable.data = value
+
+    @property
+    def index(self):
+        """The variable's data as a pandas.Index"""
+        return self.variable.index
+
+    def is_coord(self):
+        return isinstance(self.variable, xarray.CoordXArray)
 
     @property
     def dimensions(self):
-        return self.array.dimensions
+        return self.variable.dimensions
 
     def _key_to_indexers(self, key):
         return OrderedDict(
@@ -107,7 +129,7 @@ def __setitem__(self, key, value):
             self.dataset[key] = value
         else:
             # orthogonal array indexing
-            self.array[key] = value
+            self.variable[key] = value
 
     def __delitem__(self, key):
         del self.dataset[key]
@@ -127,11 +149,11 @@ def __iter__(self):
 
     @property
     def attributes(self):
-        return self.array.attributes
+        return self.variable.attributes
 
     @property
     def encoding(self):
-        return self.array.encoding
+        return self.variable.encoding
 
     @property
     def variables(self):
@@ -175,10 +197,11 @@ def indexed_by(self, **indexers):
         Dataset.indexed_by
         """
         ds = self.dataset.indexed_by(**indexers)
-        if self.focus not in ds:
+        if self.focus not in ds and self.focus in self.dataset:
             # always keep focus variable in the dataset, even if it was
             # unselected because indexing made it a scaler
-            ds[self.focus] = self.array.indexed_by(**indexers)
+            # don't add back in virtual variables (not found in the dataset)
+            ds[self.focus] = self.variable.indexed_by(**indexers)
         return type(self)(ds, self.focus)
 
     def labeled_by(self, **indexers):
@@ -236,13 +259,8 @@ def refocus(self, new_var, name=None):
         If `new_var` is a dataset array, its contents will be merged in.
         """
         if not hasattr(new_var, 'dimensions'):
-            new_var = type(self.array)(self.array.dimensions, new_var)
-        if self.focus not in self.dimensions:
-            # only unselect the focus from the dataset if it isn't a coordinate
-            # variable
-            ds = self.unselected()
-        else:
-            ds = self.dataset
+            new_var = type(self.variable)(self.variable.dimensions, new_var)
+        ds = self.dataset.copy() if self.is_coord() else self.unselected()
         if name is None:
             name = self.focus + '_'
         ds[name] = new_var
@@ -301,7 +319,7 @@ def transpose(self, *dimensions):
         numpy.transpose
         Array.transpose
         """
-        return self.refocus(self.array.transpose(*dimensions), self.focus)
+        return self.refocus(self.variable.transpose(*dimensions), self.focus)
 
     def squeeze(self, dimension=None):
         """Return a new DatasetArray object with squeezed data.
@@ -361,7 +379,7 @@ def reduce(self, func, dimension=None, axis=None, **kwargs):
             DatasetArray with this object's array replaced with an array with
             summarized data and the indicated dimension(s) removed.
         """
-        var = self.array.reduce(func, dimension, axis, **kwargs)
+        var = self.variable.reduce(func, dimension, axis, **kwargs)
         drop = set(self.dimensions) - set(var.dimensions)
         # For now, take an aggressive strategy of removing all variables
         # associated with any dropped dimensions
@@ -495,13 +513,13 @@ def to_series(self):
         return pd.Series(self.data.reshape(-1), index=index, name=self.focus)
 
     def __array_wrap__(self, obj, context=None):
-        return self.refocus(self.array.__array_wrap__(obj, context))
+        return self.refocus(self.variable.__array_wrap__(obj, context))
 
     @staticmethod
     def _unary_op(f):
         @functools.wraps(f)
         def func(self, *args, **kwargs):
-            return self.refocus(f(self.array, *args, **kwargs),
+            return self.refocus(f(self.variable, *args, **kwargs),
                                 self.focus + '_' + f.__name__)
         return func
 
@@ -520,15 +538,15 @@ def func(self, other):
             # TODO: automatically group by other variable dimensions to allow
             # for broadcasting dimensions like 'dayofyear' against 'time'
             self._check_coordinates_compat(other)
-            ds = self.unselected()
+            ds = self.dataset.copy() if self.is_coord() else self.unselected()
             if hasattr(other, 'unselected'):
                 ds.merge(other.unselected(), inplace=True)
-            other_array = getattr(other, 'array', other)
+            other_array = getattr(other, 'variable', other)
             other_focus = getattr(other, 'focus', 'other')
             focus = self.focus + '_' + f.__name__ + '_' + other_focus
-            ds[focus] = (f(self.array, other_array)
+            ds[focus] = (f(self.variable, other_array)
                          if not reflexive
-                         else f(other_array, self.array))
+                         else f(other_array, self.variable))
             return type(self)(ds, focus)
         return func
 
@@ -537,8 +555,8 @@ def _inplace_binary_op(f):
         @functools.wraps(f)
         def func(self, other):
             self._check_coordinates_compat(other)
-            other_array = getattr(other, 'array', other)
-            self.array = f(self.array, other_array)
+            other_array = getattr(other, 'variable', other)
+            self.variable = f(self.variable, other_array)
             if hasattr(other, 'unselected'):
                 self.dataset.merge(other.unselected(), inplace=True)
             return self
@@ -555,8 +573,9 @@ def align(array1, array2):
     # TODO: automatically align when doing math with arrays, or better yet
     # calculate the union of the indices and fill in the mis-aligned data with
     # NaN.
-    overlapping_coords = {k: (array1.coordinates[k].data
-                              & array2.coordinates[k].data)
+    # TODO: generalize this function to any number of arguments
+    overlapping_coords = {k: (array1.coordinates[k].index
+                              & array2.coordinates[k].index)
                           for k in array1.coordinates
                           if k in array2.coordinates}
     return tuple(ar.labeled_by(**overlapping_coords)