diff --git a/doc/whats-new.rst b/doc/whats-new.rst
index 9d300741955..00701c718af 100644
--- a/doc/whats-new.rst
+++ b/doc/whats-new.rst
@@ -25,6 +25,10 @@ Breaking changes
   merges will now succeed in cases that previously raised
   ``xarray.MergeError``. Set ``compat='broadcast_equals'`` to restore the
   previous default.
+- Pickling an xarray object based on the dask backend, or reading its
+  :py:meth:`values` property, won't automatically convert the array from dask
+  to numpy in the original object anymore.
+  By `Guido Imperiale <https://github.com/crusaderky>`_.
 
 Deprecations
 ~~~~~~~~~~~~
@@ -45,33 +49,33 @@ Deprecations
 Enhancements
 ~~~~~~~~~~~~
 - Add checking of ``attr`` names and values when saving to netCDF, raising useful
-error messages if they are invalid. (:issue:`911`).
-By `Robin Wilson <https://github.com/robintw>`_.
-
+  error messages if they are invalid. (:issue:`911`).
+  By `Robin Wilson <https://github.com/robintw>`_.
 - Added ability to save ``DataArray`` objects directly to netCDF files using
   :py:meth:`~xarray.DataArray.to_netcdf`, and to load directly from netCDF files
   using :py:func:`~xarray.open_dataarray` (:issue:`915`). These remove the need
   to convert a ``DataArray`` to a ``Dataset`` before saving as a netCDF file,
   and deals with names to ensure a perfect 'roundtrip' capability.
   By `Robin Wilson <https://github.com/robintw`_.
-
 - Multi-index levels are now accessible as "virtual" coordinate variables,
   e.g., ``ds['time']`` can pull out the ``'time'`` level of a multi-index
   (see :ref:`coordinates`). ``sel`` also accepts providing multi-index levels
   as keyword arguments, e.g., ``ds.sel(time='2000-01')``
   (see :ref:`multi-level indexing`).
   By `Benoit Bovy <https://github.com/benbovy>`_.
-
 - Added the ``compat`` option ``'no_conflicts'`` to ``merge``, allowing the
   combination of xarray objects with disjoint (:issue:`742`) or
   overlapping (:issue:`835`) coordinates as long as all present data agrees.
   By `Johnnie Gray <https://github.com/jcmgray>`_. See
   :ref:`combining.no_conflicts` for more details.
-
 - It is now possible to set ``concat_dim=None`` explicitly in
   :py:func:`~xarray.open_mfdataset` to disable inferring a dimension along
   which to concatenate.
   By `Stephan Hoyer <https://github.com/shoyer>`_.
+- Added methods :py:meth:`DataArray.compute`, :py:meth:`Dataset.compute`, and
+  :py:meth:`Variable.compute` as a non-mutating alternative to
+  :py:meth:`~DataArray.load`.
+  By `Guido Imperiale <https://github.com/crusaderky>`_.
 
 Bug fixes
 ~~~~~~~~~
@@ -95,6 +99,9 @@ Bug fixes
 - ``.where()`` and ``.fillna()`` now preserve attributes(:issue:`1009`).
   By `Fabien Maussion <https://github.com/fmaussion>`_.
 
+- Applying :py:func:`broadcast()` to an xarray object based on the dask backend
+  won't accidentally convert the array from dask to numpy anymore (:issue:`978`).
+  By `Guido Imperiale <https://github.com/crusaderky>`_.
 .. _whats-new.0.8.2:
 
 v0.8.2 (18 August 2016)
diff --git a/xarray/core/dataarray.py b/xarray/core/dataarray.py
index 44d5865ad37..79fdd7c1100 100644
--- a/xarray/core/dataarray.py
+++ b/xarray/core/dataarray.py
@@ -561,6 +561,19 @@ def load(self):
         self._coords = new._coords
         return self
 
+    def compute(self):
+        """Manually trigger loading of this array's data from disk or a
+        remote source into memory and return a new array. The original is
+        left unaltered.
+
+        Normally, it should not be necessary to call this method in user code,
+        because all xarray functions should either work on deferred data or
+        load data automatically. However, this method can be necessary when
+        working with many file objects on disk.
+        """
+        new = self.copy(deep=False)
+        return new.load()
+
     def copy(self, deep=True):
         """Returns a copy of this array.
 
diff --git a/xarray/core/dataset.py b/xarray/core/dataset.py
index 45650d21501..d060e845ceb 100644
--- a/xarray/core/dataset.py
+++ b/xarray/core/dataset.py
@@ -255,8 +255,11 @@ def load_store(cls, store, decoder=None):
         return obj
 
     def __getstate__(self):
-        """Always load data in-memory before pickling"""
-        self.load()
+        """Load data in-memory before pickling (except for Dask data)"""
+        for v in self.variables.values():
+            if not isinstance(v.data, dask_array_type):
+                v.load()
+
         # self.__dict__ is the default pickle object, we don't need to
         # implement our own __setstate__ method to make pickle work
         state = self.__dict__.copy()
@@ -306,7 +309,7 @@ def load(self):
         """
         # access .data to coerce everything to numpy or dask arrays
         all_data = dict((k, v.data) for k, v in self.variables.items())
-        lazy_data = dict((k, v) for k, v in all_data.items()
+        lazy_data = OrderedDict((k, v) for k, v in all_data.items()
                          if isinstance(v, dask_array_type))
         if lazy_data:
             import dask.array as da
@@ -319,6 +322,19 @@ def load(self):
 
         return self
 
+    def compute(self):
+        """Manually trigger loading of this dataset's data from disk or a
+        remote source into memory and return a new dataset. The original is
+        left unaltered.
+
+        Normally, it should not be necessary to call this method in user code,
+        because all xarray functions should either work on deferred data or
+        load data automatically. However, this method can be necessary when
+        working with many file objects on disk.
+        """
+        new = self.copy(deep=False)
+        return new.load()
+
     @classmethod
     def _construct_direct(cls, variables, coord_names, dims=None, attrs=None,
                           file_obj=None):
@@ -404,11 +420,8 @@ def copy(self, deep=False):
         Otherwise, a shallow copy is made, so each variable in the new dataset
         is also a variable in the original dataset.
         """
-        if deep:
-            variables = OrderedDict((k, v.copy(deep=True))
-                                    for k, v in iteritems(self._variables))
-        else:
-            variables = self._variables.copy()
+        variables = OrderedDict((k, v.copy(deep=deep))
+                                for k, v in iteritems(self._variables))
         # skip __init__ to avoid costly validation
         return self._construct_direct(variables, self._coord_names.copy(),
                                       self._dims.copy(), self._attrs_copy())
diff --git a/xarray/core/variable.py b/xarray/core/variable.py
index ed8d15f03b5..0573387b254 100644
--- a/xarray/core/variable.py
+++ b/xarray/core/variable.py
@@ -119,6 +119,9 @@ def as_compatible_data(data, fastpath=False):
         # can't use fastpath (yet) for scalars
         return _maybe_wrap_data(data)
 
+    if isinstance(data, Variable):
+        return data.data
+
     # add a custom fast-path for dask.array to avoid expensive checks for the
     # dtype attribute
     if isinstance(data, dask_array_type):
@@ -271,10 +274,21 @@ def data(self, data):
                 "replacement data must match the Variable's shape")
         self._data = data
 
+    def _data_cast(self):
+        if isinstance(self._data, (np.ndarray, PandasIndexAdapter)):
+            return self._data
+        else:
+            return np.asarray(self._data)
+
     def _data_cached(self):
-        if not isinstance(self._data, (np.ndarray, PandasIndexAdapter)):
-            self._data = np.asarray(self._data)
-        return self._data
+        """Load data into memory and return it.
+        Do not cache dask arrays automatically; that should
+        require an explicit load() call.
+        """
+        new_data = self._data_cast()
+        if not isinstance(self._data, dask_array_type):
+            self._data = new_data
+        return new_data
 
     @property
     def _indexable_data(self):
@@ -288,11 +302,26 @@ def load(self):
         because all xarray functions should either work on deferred data or
         load data automatically.
         """
-        self._data_cached()
+        new_data = self._data_cached()
+        if isinstance(self._data, dask_array_type):
+            self._data = new_data
         return self
 
+    def compute(self):
+        """Manually trigger loading of this variable's data from disk or a
+        remote source into memory and return a new variable. The original is
+        left unaltered.
+
+        Normally, it should not be necessary to call this method in user code,
+        because all xarray functions should either work on deferred data or
+        load data automatically.
+        """
+        new = self.copy(deep=False)
+        return new.load()
+
     def __getstate__(self):
-        """Always cache data as an in-memory array before pickling"""
+        """Always cache data as an in-memory array before pickling
+        (with the exception of dask backend)"""
         self._data_cached()
         # self.__dict__ is the default pickle object, we don't need to
         # implement our own __setstate__ method to make pickle work
@@ -1090,10 +1119,11 @@ def __init__(self, dims, data, attrs=None, encoding=None, fastpath=False):
             raise ValueError('%s objects must be 1-dimensional' %
                              type(self).__name__)
 
-    def _data_cached(self):
-        if not isinstance(self._data, PandasIndexAdapter):
-            self._data = PandasIndexAdapter(self._data)
-        return self._data
+    def _data_cast(self):
+        if isinstance(self._data, PandasIndexAdapter):
+            return self._data
+        else:
+            return PandasIndexAdapter(self._data)
 
     def __getitem__(self, key):
         key = self._item_key_to_tuple(key)
diff --git a/xarray/test/test_backends.py b/xarray/test/test_backends.py
index eeb5561579b..0ff5cffec26 100644
--- a/xarray/test/test_backends.py
+++ b/xarray/test/test_backends.py
@@ -149,6 +149,24 @@ def assert_loads(vars=None):
             actual = ds.load()
         self.assertDatasetAllClose(expected, actual)
 
+    def test_dataset_compute(self):
+        expected = create_test_data()
+
+        with self.roundtrip(expected) as actual:
+            # Test Dataset.compute()
+            for v in actual.variables.values():
+                self.assertFalse(v._in_memory)
+
+            computed = actual.compute()
+
+            for v in actual.data_vars.values():
+                self.assertFalse(v._in_memory)
+            for v in computed.variables.values():
+                self.assertTrue(v._in_memory)
+
+            self.assertDatasetAllClose(expected, actual)
+            self.assertDatasetAllClose(expected, computed)
+
     def test_roundtrip_None_variable(self):
         expected = Dataset({None: (('x', 'y'), [[0, 1], [2, 3]])})
         with self.roundtrip(expected) as actual:
@@ -230,18 +248,6 @@ def test_roundtrip_coordinates(self):
         with self.roundtrip(expected) as actual:
             self.assertDatasetIdentical(expected, actual)
 
-        expected = original.copy()
-        expected.attrs['coordinates'] = 'something random'
-        with self.assertRaisesRegexp(ValueError, 'cannot serialize'):
-            with self.roundtrip(expected):
-                pass
-
-        expected = original.copy(deep=True)
-        expected['foo'].attrs['coordinates'] = 'something random'
-        with self.assertRaisesRegexp(ValueError, 'cannot serialize'):
-            with self.roundtrip(expected):
-                pass
-
     def test_roundtrip_boolean_dtype(self):
         original = create_boolean_data()
         self.assertEqual(original['x'].dtype, 'bool')
@@ -872,7 +878,26 @@ def test_read_byte_attrs_as_unicode(self):
 @requires_dask
 @requires_scipy
 @requires_netCDF4
-class DaskTest(TestCase):
+class DaskTest(TestCase, DatasetIOTestCases):
+    @contextlib.contextmanager
+    def create_store(self):
+        yield Dataset()
+
+    @contextlib.contextmanager
+    def roundtrip(self, data, save_kwargs={}, open_kwargs={}):
+        yield data.chunk()
+
+    def test_roundtrip_datetime_data(self):
+        # Override method in DatasetIOTestCases - remove not applicable save_kwds
+        times = pd.to_datetime(['2000-01-01', '2000-01-02', 'NaT'])
+        expected = Dataset({'t': ('t', times), 't0': times[0]})
+        with self.roundtrip(expected) as actual:
+            self.assertDatasetIdentical(expected, actual)
+
+    def test_write_store(self):
+        # Override method in DatasetIOTestCases - not applicable to dask
+        pass
+
     def test_open_mfdataset(self):
         original = Dataset({'foo': ('x', np.random.randn(10))})
         with create_tmp_file() as tmp1:
@@ -992,7 +1017,16 @@ def test_deterministic_names(self):
                 self.assertIn(tmp, dask_name)
             self.assertEqual(original_names, repeat_names)
 
-
+    def test_dataarray_compute(self):
+        # Test DataArray.compute() on dask backend.
+        # The test for Dataset.compute() is already in DatasetIOTestCases;
+        # however dask is the only tested backend which supports DataArrays
+        actual = DataArray([1,2]).chunk()
+        computed = actual.compute()
+        self.assertFalse(actual._in_memory)
+        self.assertTrue(computed._in_memory)
+        self.assertDataArrayAllClose(actual, computed)
+        
 @requires_scipy_or_netCDF4
 @requires_pydap
 class PydapTest(TestCase):
diff --git a/xarray/test/test_dask.py b/xarray/test/test_dask.py
index 13a8817ce68..652966770e2 100644
--- a/xarray/test/test_dask.py
+++ b/xarray/test/test_dask.py
@@ -1,3 +1,4 @@
+import pickle
 import numpy as np
 import pandas as pd
 
@@ -321,3 +322,36 @@ def test_dot(self):
         eager = self.eager_array.dot(self.eager_array[0])
         lazy = self.lazy_array.dot(self.lazy_array[0])
         self.assertLazyAndAllClose(eager, lazy)
+
+    def test_dataarray_pickle(self):
+        # Test that pickling/unpickling does not convert the dask
+        # backend to numpy
+        a1 = DataArray([1,2]).chunk()
+        self.assertFalse(a1._in_memory)        
+        a2 = pickle.loads(pickle.dumps(a1))
+        self.assertDataArrayIdentical(a1, a2)
+        self.assertFalse(a1._in_memory)
+        self.assertFalse(a2._in_memory)        
+
+    def test_dataset_pickle(self):
+        ds1 = Dataset({'a': [1,2]}).chunk()
+        self.assertFalse(ds1['a']._in_memory)        
+        ds2 = pickle.loads(pickle.dumps(ds1))
+        self.assertDatasetIdentical(ds1, ds2)
+        self.assertFalse(ds1['a']._in_memory)
+        self.assertFalse(ds2['a']._in_memory)        
+
+    def test_values(self):
+        # Test that invoking the values property does not convert the dask
+        # backend to numpy
+        a = DataArray([1,2]).chunk()
+        self.assertFalse(a._in_memory)
+        self.assertEquals(a.values.tolist(), [1, 2])
+        self.assertFalse(a._in_memory)        
+
+    def test_from_dask_variable(self):
+        # Test array creation from Variable with dask backend.
+        # This is used e.g. in broadcast()
+        a = DataArray(self.lazy_array.variable)
+        self.assertLazyAndIdentical(self.lazy_array, a)
+
diff --git a/xarray/test/test_dataset.py b/xarray/test/test_dataset.py
index b9798dbd611..bac07d68d0f 100644
--- a/xarray/test/test_dataset.py
+++ b/xarray/test/test_dataset.py
@@ -1287,10 +1287,13 @@ def test_copy(self):
 
         for copied in [data.copy(deep=False), copy(data)]:
             self.assertDatasetIdentical(data, copied)
-            for k in data:
+            # Note: IndexVariable objects with string dtype are always
+            # copied because of xarray.core.util.safe_cast_to_index.
+            # Limiting the test to data variables.
+            for k in data.data_vars:
                 v0 = data.variables[k]
                 v1 = copied.variables[k]
-                self.assertIs(v0, v1)
+                assert source_ndarray(v0.data) is source_ndarray(v1.data)
             copied['foo'] = ('z', np.arange(5))
             self.assertNotIn('foo', data)