From 50eab43cd5e5c7d1089a626c8c106d791da7df59 Mon Sep 17 00:00:00 2001
From: keisukefujii <fujiisoup@gmail.com>
Date: Wed, 19 Dec 2018 11:28:51 +0100
Subject: [PATCH 1/7] Fix multiindex selection

---
 doc/whats-new.rst              |  3 ++-
 xarray/core/indexing.py        |  3 +++
 xarray/tests/test_dataarray.py | 14 ++++++++++++++
 3 files changed, 19 insertions(+), 1 deletion(-)

diff --git a/doc/whats-new.rst b/doc/whats-new.rst
index 3ef4375c499..6084c77b220 100644
--- a/doc/whats-new.rst
+++ b/doc/whats-new.rst
@@ -57,7 +57,8 @@ Bug fixes
   By `Martin Raspaud <https://github.com/mraspaud>`_.
 - Fix parsing of ``_Unsigned`` attribute set by OPENDAP servers. (:issue:`2583`).
   By `Deepak Cherian <https://github.com/dcherian>`_
-
+- Fix MultiIndex selection to update label and level (:issue:`2619`).
+  By `Keisuke Fujii <https://github.com/fujiisoup>`_.
 
 .. _whats-new.0.11.0:
 
diff --git a/xarray/core/indexing.py b/xarray/core/indexing.py
index d51da471c8d..66b0b743498 100644
--- a/xarray/core/indexing.py
+++ b/xarray/core/indexing.py
@@ -1265,6 +1265,9 @@ def __getitem__(self, indexer):
         result = self.array[key]
 
         if isinstance(result, pd.Index):
+            # GH2619. For MultiIndex, we need to call remove_unused.
+            if isinstance(result, pd.MultiIndex):
+                result = result.remove_unused_levels()
             result = PandasIndexAdapter(result, dtype=self.dtype)
         else:
             # result is a scalar
diff --git a/xarray/tests/test_dataarray.py b/xarray/tests/test_dataarray.py
index ecb60239b72..0672861da2c 100644
--- a/xarray/tests/test_dataarray.py
+++ b/xarray/tests/test_dataarray.py
@@ -1027,6 +1027,20 @@ def test_sel(lab_indexer, pos_indexer, replaced_idx=False,
         assert_identical(mdata.sel(x={'one': 'a', 'two': 1}),
                          mdata.sel(one='a', two=1))
 
+    def test_selection_multiindex(self):
+        # GH2619. For MultiIndex, we need to call remove_unused.
+        ds = xr.DataArray(np.arange(40).reshape(8, 5), dims=['x', 'y'],
+                          coords={'x': np.arange(8), 'y': np.arange(5)})
+        ds = ds.stack(xy=['x', 'y'])
+        ds_isel = ds.isel(xy=ds['x'] < 4)
+        with pytest.raises(KeyError):
+            ds_isel.sel(x=5)
+
+        actual = ds_isel.unstack()
+        expected = ds.reset_index('xy').isel(xy=ds['x'] < 4)
+        expected = expected.set_index(xy=['x', 'y']).unstack()
+        assert_identical(expected, actual)
+
     def test_virtual_default_coords(self):
         array = DataArray(np.zeros((5,)), dims='x')
         expected = DataArray(range(5), dims='x', name='x')

From 762f4965954e34a9c61dcec8842cc1c5c40c93ea Mon Sep 17 00:00:00 2001
From: keisukefujii <fujiisoup@gmail.com>
Date: Wed, 19 Dec 2018 22:21:52 +0100
Subject: [PATCH 2/7] Support pandas0.19

---
 xarray/core/indexing.py |  9 +++--
 xarray/core/pdcompat.py | 77 +++++++++++++++++++++++++++++++++++++++++
 2 files changed, 84 insertions(+), 2 deletions(-)
 create mode 100644 xarray/core/pdcompat.py

diff --git a/xarray/core/indexing.py b/xarray/core/indexing.py
index 66b0b743498..4e5e934cc2d 100644
--- a/xarray/core/indexing.py
+++ b/xarray/core/indexing.py
@@ -4,11 +4,12 @@
 import operator
 from collections import Hashable, defaultdict
 from datetime import timedelta
+from distutils.version import LooseVersion
 
 import numpy as np
 import pandas as pd
 
-from . import duck_array_ops, nputils, utils
+from . import duck_array_ops, nputils, pdcompat, utils
 from .pycompat import (
     dask_array_type, integer_types, iteritems, range, suppress)
 from .utils import is_dict_like
@@ -1267,7 +1268,11 @@ def __getitem__(self, indexer):
         if isinstance(result, pd.Index):
             # GH2619. For MultiIndex, we need to call remove_unused.
             if isinstance(result, pd.MultiIndex):
-                result = result.remove_unused_levels()
+                if LooseVersion(pd.__version__) >= "0.20":
+                    result = result.remove_unused_levels()
+                else:  # for pandas 0.19
+                    result = pdcompat.remove_unused_levels(result)
+
             result = PandasIndexAdapter(result, dtype=self.dtype)
         else:
             # result is a scalar
diff --git a/xarray/core/pdcompat.py b/xarray/core/pdcompat.py
new file mode 100644
index 00000000000..15e7b9ccdd3
--- /dev/null
+++ b/xarray/core/pdcompat.py
@@ -0,0 +1,77 @@
+import pandas as pd
+
+
+# for pandas 0.19
+def remove_unused_levels(self):
+    """
+    create a new MultiIndex from the current that removing
+    unused levels, meaning that they are not expressed in the labels
+    The resulting MultiIndex will have the same outward
+    appearance, meaning the same .values and ordering. It will also
+    be .equals() to the original.
+    .. versionadded:: 0.20.0
+    Returns
+    -------
+    MultiIndex
+    Examples
+    --------
+    >>> i = pd.MultiIndex.from_product([range(2), list('ab')])
+    MultiIndex(levels=[[0, 1], ['a', 'b']],
+               labels=[[0, 0, 1, 1], [0, 1, 0, 1]])
+    >>> i[2:]
+    MultiIndex(levels=[[0, 1], ['a', 'b']],
+               labels=[[1, 1], [0, 1]])
+    The 0 from the first level is not represented
+    and can be removed
+    >>> i[2:].remove_unused_levels()
+    MultiIndex(levels=[[1], ['a', 'b']],
+               labels=[[0, 0], [0, 1]])
+    """
+
+    new_levels = []
+    new_labels = []
+
+    changed = False
+    for lev, lab in zip(self.levels, self.labels):
+
+        # Since few levels are typically unused, bincount() is more
+        # efficient than unique() - however it only accepts positive values
+        # (and drops order):
+        uniques = np.where(np.bincount(lab + 1) > 0)[0] - 1
+        has_na = int(len(uniques) and (uniques[0] == -1))
+
+        if len(uniques) != len(lev) + has_na:
+            # We have unused levels
+            changed = True
+
+            # Recalculate uniques, now preserving order.
+            # Can easily be cythonized by exploiting the already existing
+            # "uniques" and stop parsing "lab" when all items are found:
+            uniques = algos.unique(lab)
+            if has_na:
+                na_idx = np.where(uniques == -1)[0]
+                # Just ensure that -1 is in first position:
+                uniques[[0, na_idx[0]]] = uniques[[na_idx[0], 0]]
+
+            # labels get mapped from uniques to 0:len(uniques)
+            # -1 (if present) is mapped to last position
+            label_mapping = np.zeros(len(lev) + has_na)
+            # ... and reassigned value -1:
+            label_mapping[uniques] = np.arange(len(uniques)) - has_na
+
+            lab = label_mapping[lab]
+
+            # new levels are simple
+            lev = lev.take(uniques[has_na:])
+
+        new_levels.append(lev)
+        new_labels.append(lab)
+
+    result = self._shallow_copy()
+
+    if changed:
+        result._reset_identity()
+        result._set_levels(new_levels, validate=False)
+        result._set_labels(new_labels, validate=False)
+
+    return result

From 6bb8166b88b4675e2c7c12ae1b993a9f6147d13b Mon Sep 17 00:00:00 2001
From: keisukefujii <fujiisoup@gmail.com>
Date: Wed, 19 Dec 2018 22:36:51 +0100
Subject: [PATCH 3/7] a bugfix

---
 xarray/core/pdcompat.py | 1 +
 1 file changed, 1 insertion(+)

diff --git a/xarray/core/pdcompat.py b/xarray/core/pdcompat.py
index 15e7b9ccdd3..987cca3fc59 100644
--- a/xarray/core/pdcompat.py
+++ b/xarray/core/pdcompat.py
@@ -1,3 +1,4 @@
+import numpy as np
 import pandas as pd
 
 

From a806c64b3885bed5fca077d992d6c7e0e9fce227 Mon Sep 17 00:00:00 2001
From: keisukefujii <fujiisoup@gmail.com>
Date: Thu, 20 Dec 2018 08:31:16 +0100
Subject: [PATCH 4/7] Do remove_unused_levels only once in unstack.

---
 xarray/core/dataset.py  |  8 +++++++-
 xarray/core/indexing.py | 14 +++++---------
 2 files changed, 12 insertions(+), 10 deletions(-)

diff --git a/xarray/core/dataset.py b/xarray/core/dataset.py
index b253d956a80..397a0f72fef 100644
--- a/xarray/core/dataset.py
+++ b/xarray/core/dataset.py
@@ -14,7 +14,7 @@
 
 from . import (
     alignment, computation, duck_array_ops, formatting, groupby, indexing, ops,
-    resample, rolling, utils)
+    pdcompat, resample, rolling, utils)
 from .. import conventions
 from ..coding.cftimeindex import _parse_array_of_cftime_strings
 from .alignment import align
@@ -2425,6 +2425,12 @@ def stack(self, dimensions=None, **dimensions_kwargs):
 
     def _unstack_once(self, dim):
         index = self.get_index(dim)
+        # GH2619. For MultiIndex, we need to call remove_unused.
+        if LooseVersion(pd.__version__) >= "0.20":
+            index = index.remove_unused_levels()
+        else:  # for pandas 0.19
+            index = pdcompat.remove_unused_levels(index)
+
         full_idx = pd.MultiIndex.from_product(index.levels, names=index.names)
 
         # take a shortcut in case the MultiIndex was not modified.
diff --git a/xarray/core/indexing.py b/xarray/core/indexing.py
index 4e5e934cc2d..c7329bc6201 100644
--- a/xarray/core/indexing.py
+++ b/xarray/core/indexing.py
@@ -9,7 +9,7 @@
 import numpy as np
 import pandas as pd
 
-from . import duck_array_ops, nputils, pdcompat, utils
+from . import duck_array_ops, nputils, utils
 from .pycompat import (
     dask_array_type, integer_types, iteritems, range, suppress)
 from .utils import is_dict_like
@@ -160,6 +160,10 @@ def convert_label_indexer(index, label, index_name='', method=None,
             indexer, new_index = index.get_loc_level(
                 tuple(label.values()), level=tuple(label.keys()))
 
+            # GH2619. Raise a KeyError if nothing is chosen
+            if indexer.dtype.kind == 'b' and indexer.sum() == 0:
+                raise KeyError('{} not found'.format(label))
+
     elif isinstance(label, tuple) and isinstance(index, pd.MultiIndex):
         if _is_nested_tuple(label):
             indexer = index.get_locs(label)
@@ -169,7 +173,6 @@ def convert_label_indexer(index, label, index_name='', method=None,
             indexer, new_index = index.get_loc_level(
                 label, level=list(range(len(label)))
             )
-
     else:
         label = (label if getattr(label, 'ndim', 1) > 1  # vectorized-indexing
                  else _asarray_tuplesafe(label))
@@ -1266,13 +1269,6 @@ def __getitem__(self, indexer):
         result = self.array[key]
 
         if isinstance(result, pd.Index):
-            # GH2619. For MultiIndex, we need to call remove_unused.
-            if isinstance(result, pd.MultiIndex):
-                if LooseVersion(pd.__version__) >= "0.20":
-                    result = result.remove_unused_levels()
-                else:  # for pandas 0.19
-                    result = pdcompat.remove_unused_levels(result)
-
             result = PandasIndexAdapter(result, dtype=self.dtype)
         else:
             # result is a scalar

From 205f94840066dcee5fd7863d6305c09d7a54cbff Mon Sep 17 00:00:00 2001
From: keisukefujii <fujiisoup@gmail.com>
Date: Thu, 20 Dec 2018 08:33:03 +0100
Subject: [PATCH 5/7] import algos

---
 xarray/core/pdcompat.py | 1 +
 1 file changed, 1 insertion(+)

diff --git a/xarray/core/pdcompat.py b/xarray/core/pdcompat.py
index 987cca3fc59..3456a616629 100644
--- a/xarray/core/pdcompat.py
+++ b/xarray/core/pdcompat.py
@@ -1,5 +1,6 @@
 import numpy as np
 import pandas as pd
+import pandas.core.algorithms as algos
 
 
 # for pandas 0.19

From b15cab3bd0fbd50e5d471c203f5bb11a694f42f0 Mon Sep 17 00:00:00 2001
From: keisukefujii <fujiisoup@gmail.com>
Date: Thu, 20 Dec 2018 08:53:17 +0100
Subject: [PATCH 6/7] Remove unused import

---
 xarray/core/indexing.py | 1 -
 1 file changed, 1 deletion(-)

diff --git a/xarray/core/indexing.py b/xarray/core/indexing.py
index c7329bc6201..7e0418d25b4 100644
--- a/xarray/core/indexing.py
+++ b/xarray/core/indexing.py
@@ -4,7 +4,6 @@
 import operator
 from collections import Hashable, defaultdict
 from datetime import timedelta
-from distutils.version import LooseVersion
 
 import numpy as np
 import pandas as pd

From edb4a24e27ba57a76c8c329393518977e6c21a2d Mon Sep 17 00:00:00 2001
From: keisukefujii <fujiisoup@gmail.com>
Date: Mon, 24 Dec 2018 12:23:59 +0100
Subject: [PATCH 7/7] Adopt local import

---
 xarray/core/pdcompat.py | 42 ++++++++++++++++++++++++++++++++++++++++-
 1 file changed, 41 insertions(+), 1 deletion(-)

diff --git a/xarray/core/pdcompat.py b/xarray/core/pdcompat.py
index 3456a616629..c1e153f4d92 100644
--- a/xarray/core/pdcompat.py
+++ b/xarray/core/pdcompat.py
@@ -1,6 +1,45 @@
+# The remove_unused_levels defined here was copied based on the source code
+# defined in pandas.core.indexes.muli.py
+
+# For reference, here is a copy of the pandas copyright notice:
+
+# (c) 2011-2012, Lambda Foundry, Inc. and PyData Development Team
+# All rights reserved.
+
+# Copyright (c) 2008-2011 AQR Capital Management, LLC
+# All rights reserved.
+
+# Redistribution and use in source and binary forms, with or without
+# modification, are permitted provided that the following conditions are
+# met:
+
+#     * Redistributions of source code must retain the above copyright
+#        notice, this list of conditions and the following disclaimer.
+
+#     * Redistributions in binary form must reproduce the above
+#        copyright notice, this list of conditions and the following
+#        disclaimer in the documentation and/or other materials provided
+#        with the distribution.
+
+#     * Neither the name of the copyright holder nor the names of any
+#        contributors may be used to endorse or promote products derived
+#        from this software without specific prior written permission.
+
+# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDER AND CONTRIBUTORS
+# "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+# LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+# A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+# OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+# SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+# LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+# DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+# THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+
 import numpy as np
 import pandas as pd
-import pandas.core.algorithms as algos
 
 
 # for pandas 0.19
@@ -29,6 +68,7 @@ def remove_unused_levels(self):
     MultiIndex(levels=[[1], ['a', 'b']],
                labels=[[0, 0], [0, 1]])
     """
+    import pandas.core.algorithms as algos
 
     new_levels = []
     new_labels = []