From ced299f9a753c52fda67c665116569f7973270b7 Mon Sep 17 00:00:00 2001
From: Tom Augspurger <tom.w.augspurger@gmail.com>
Date: Fri, 12 Oct 2018 13:22:38 -0500
Subject: [PATCH 01/23] ENH: Support EAs in Series.unstack

---
 pandas/core/reshape/reshape.py           | 22 ++++++++++++++
 pandas/tests/extension/base/reshaping.py | 38 ++++++++++++++++++++++++
 pandas/tests/extension/decimal/array.py  |  5 +++-
 3 files changed, 64 insertions(+), 1 deletion(-)

diff --git a/pandas/core/reshape/reshape.py b/pandas/core/reshape/reshape.py
index 03b77f0e787f0..e9fe6ee731984 100644
--- a/pandas/core/reshape/reshape.py
+++ b/pandas/core/reshape/reshape.py
@@ -344,6 +344,7 @@ def _unstack_multiple(data, clocs, fill_value=None):
     if isinstance(data, Series):
         dummy = data.copy()
         dummy.index = dummy_index
+
         unstacked = dummy.unstack('__placeholder__', fill_value=fill_value)
         new_levels = clevels
         new_names = cnames
@@ -399,6 +400,8 @@ def unstack(obj, level, fill_value=None):
         else:
             return obj.T.stack(dropna=False)
     else:
+        if is_extension_array_dtype(obj.dtype):
+            return unstack_extension_series(obj, level, fill_value)
         unstacker = _Unstacker(obj.values, obj.index, level=level,
                                fill_value=fill_value,
                                constructor=obj._constructor_expanddim)
@@ -947,3 +950,22 @@ def make_axis_dummies(frame, axis='minor', transform=None):
     values = values.take(labels, axis=0)
 
     return DataFrame(values, columns=items, index=frame.index)
+
+
+def unstack_extension_series(series, level, fill_value):
+    from pandas.core.reshape.concat import concat
+
+    dummy_arr = np.arange(len(series))
+    # fill_value=-1, since we will do a series.values.take later
+    result = _Unstacker(dummy_arr, series.index,
+                        level=level, fill_value=-1).get_result()
+
+    out = []
+    values = series.values
+
+    for col, indicies in result.iteritems():
+        out.append(Series(values.take(indicies.values,
+                                      allow_fill=True,
+                                      fill_value=fill_value),
+                          name=col, index=result.index))
+    return concat(out, axis='columns')
diff --git a/pandas/tests/extension/base/reshaping.py b/pandas/tests/extension/base/reshaping.py
index 7f13c2cd67373..fa9b168a2b522 100644
--- a/pandas/tests/extension/base/reshaping.py
+++ b/pandas/tests/extension/base/reshaping.py
@@ -1,3 +1,4 @@
+import itertools
 import pytest
 import numpy as np
 
@@ -170,3 +171,40 @@ def test_merge(self, data, na_value):
                  [data[0], data[0], data[1], data[2], na_value],
                  dtype=data.dtype)})
         self.assert_frame_equal(res, exp[['ext', 'int1', 'key', 'int2']])
+
+    @pytest.mark.parametrize("index", [
+        pd.MultiIndex.from_product(([['A', 'B'], ['a', 'b']])),
+        pd.MultiIndex.from_product(([['A', 'B'], ['a', 'b'], ['x', 'y', 'z']])),
+
+        # non-uniform
+        pd.MultiIndex.from_tuples([('A', 'a'), ('A', 'b'), ('B', 'b')]),
+
+        # three levels, non-uniform
+        pd.MultiIndex.from_product([('A', 'B'), ('a', 'b', 'c'), (0, 1, 2)]),
+        pd.MultiIndex.from_tuples([
+            ('A', 'a', 1),
+            ('A', 'b', 0),
+            ('A', 'a', 0),
+            ('B', 'a', 0),
+            ('B', 'c', 1),
+        ]),
+    ])
+    def test_unstack(self, data, index):
+        data = data[:len(index)]
+        ser = pd.Series(data, index=index)
+
+        n = index.nlevels
+        levels = list(range(n))
+        # [0, 1, 2]
+        # -> [(0,), (1,), (2,) (0, 1), (1, 0)]
+        combinations = itertools.chain.from_iterable(
+            itertools.permutations(levels, i) for i in range(1, n)
+        )
+
+        for level in combinations:
+            result = ser.unstack(level=level)
+            assert all(isinstance(result[col].values, type(data)) for col in result.columns)
+            expected = ser.astype(object).unstack(level=level)
+            result = result.astype(object)
+
+            self.assert_frame_equal(result, expected)
diff --git a/pandas/tests/extension/decimal/array.py b/pandas/tests/extension/decimal/array.py
index fe07aae61c5e2..8c6333c7ce8ee 100644
--- a/pandas/tests/extension/decimal/array.py
+++ b/pandas/tests/extension/decimal/array.py
@@ -102,7 +102,10 @@ def copy(self, deep=False):
     def astype(self, dtype, copy=True):
         if isinstance(dtype, type(self.dtype)):
             return type(self)(self._data, context=dtype.context)
-        return super(DecimalArray, self).astype(dtype, copy)
+        # need to replace decimal NA
+        result = np.asarray(self, dtype=dtype)
+        result[self.isna()] = np.nan
+        return result
 
     def __setitem__(self, key, value):
         if pd.api.types.is_list_like(value):

From 3b63fcbe82fb1a44498cc7e5d5ad2ba19428ab7c Mon Sep 17 00:00:00 2001
From: Tom Augspurger <tom.w.augspurger@gmail.com>
Date: Mon, 22 Oct 2018 14:58:20 -0500
Subject: [PATCH 02/23] release note

---
 doc/source/whatsnew/v0.24.0.txt | 1 +
 1 file changed, 1 insertion(+)

diff --git a/doc/source/whatsnew/v0.24.0.txt b/doc/source/whatsnew/v0.24.0.txt
index d0aa156cf5059..6d194acd8940b 100644
--- a/doc/source/whatsnew/v0.24.0.txt
+++ b/doc/source/whatsnew/v0.24.0.txt
@@ -724,6 +724,7 @@ update the ``ExtensionDtype._metadata`` tuple to match the signature of your
 - Updated the ``.type`` attribute for ``PeriodDtype``, ``DatetimeTZDtype``, and ``IntervalDtype`` to be instances of the dtype (``Period``, ``Timestamp``, and ``Interval`` respectively) (:issue:`22938`)
 - :func:`ExtensionArray.isna` is allowed to return an ``ExtensionArray`` (:issue:`22325`).
 - Support for reduction operations such as ``sum``, ``mean`` via opt-in base class method override (:issue:`22762`)
+- :meth:`Series.unstack` no longer converts extension arrays to object-dtype ndarrays. The output ``DataFrame`` will now have the same dtype as the input. This changes behavior for Categorical and Sparse data (:issue:`23077`).
 
 .. _whatsnew_0240.api.incompatibilities:
 

From 756dde9273e59a92ac3ba3c27ef5e33bcfd3d96f Mon Sep 17 00:00:00 2001
From: Tom Augspurger <tom.w.augspurger@gmail.com>
Date: Mon, 22 Oct 2018 15:05:49 -0500
Subject: [PATCH 03/23] xfail

---
 pandas/tests/extension/json/test_json.py | 6 +++++-
 1 file changed, 5 insertions(+), 1 deletion(-)

diff --git a/pandas/tests/extension/json/test_json.py b/pandas/tests/extension/json/test_json.py
index 15d99f6c5d2fc..0eafc9558956e 100644
--- a/pandas/tests/extension/json/test_json.py
+++ b/pandas/tests/extension/json/test_json.py
@@ -138,7 +138,11 @@ def test_from_dtype(self, data):
 
 
 class TestReshaping(BaseJSON, base.BaseReshapingTests):
-    pass
+    @pytest.mark.xfail(reason="dict for NA", strict=True)
+    def test_unstack(self, data, index):
+        # The base test has NaN for the expected NA value.
+        # this matches otherwise
+        return super().test_unstack(data, index)
 
 
 class TestGetitem(BaseJSON, base.BaseGetitemTests):

From 90f84ef6f8f0d6c6ebc3336f97c2f77f1cfe75c4 Mon Sep 17 00:00:00 2001
From: Tom Augspurger <tom.w.augspurger@gmail.com>
Date: Mon, 22 Oct 2018 15:08:00 -0500
Subject: [PATCH 04/23] spelling

---
 pandas/core/reshape/reshape.py | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/pandas/core/reshape/reshape.py b/pandas/core/reshape/reshape.py
index e9fe6ee731984..fc85bfafd4ac7 100644
--- a/pandas/core/reshape/reshape.py
+++ b/pandas/core/reshape/reshape.py
@@ -963,8 +963,8 @@ def unstack_extension_series(series, level, fill_value):
     out = []
     values = series.values
 
-    for col, indicies in result.iteritems():
-        out.append(Series(values.take(indicies.values,
+    for col, indices in result.iteritems():
+        out.append(Series(values.take(indices.values,
                                       allow_fill=True,
                                       fill_value=fill_value),
                           name=col, index=result.index))

From 942db1b918f818281c5dcfd10a951e519a04dc42 Mon Sep 17 00:00:00 2001
From: Tom Augspurger <tom.w.augspurger@gmail.com>
Date: Mon, 22 Oct 2018 16:12:23 -0500
Subject: [PATCH 05/23] lint

---
 pandas/tests/extension/base/reshaping.py | 6 ++++--
 1 file changed, 4 insertions(+), 2 deletions(-)

diff --git a/pandas/tests/extension/base/reshaping.py b/pandas/tests/extension/base/reshaping.py
index fa9b168a2b522..8367d02cc1af9 100644
--- a/pandas/tests/extension/base/reshaping.py
+++ b/pandas/tests/extension/base/reshaping.py
@@ -174,7 +174,8 @@ def test_merge(self, data, na_value):
 
     @pytest.mark.parametrize("index", [
         pd.MultiIndex.from_product(([['A', 'B'], ['a', 'b']])),
-        pd.MultiIndex.from_product(([['A', 'B'], ['a', 'b'], ['x', 'y', 'z']])),
+        pd.MultiIndex.from_product(([['A', 'B'], ['a', 'b'],
+                                     ['x', 'y', 'z']])),
 
         # non-uniform
         pd.MultiIndex.from_tuples([('A', 'a'), ('A', 'b'), ('B', 'b')]),
@@ -203,7 +204,8 @@ def test_unstack(self, data, index):
 
         for level in combinations:
             result = ser.unstack(level=level)
-            assert all(isinstance(result[col].values, type(data)) for col in result.columns)
+            assert all(isinstance(result[col].values, type(data))
+                       for col in result.columns)
             expected = ser.astype(object).unstack(level=level)
             result = result.astype(object)
 

From 36a4450c01cf34ccf166d8e35371f404b3b901ae Mon Sep 17 00:00:00 2001
From: Tom Augspurger <tom.w.augspurger@gmail.com>
Date: Tue, 23 Oct 2018 06:08:45 -0500
Subject: [PATCH 06/23] no copy

---
 pandas/core/reshape/reshape.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/pandas/core/reshape/reshape.py b/pandas/core/reshape/reshape.py
index fc85bfafd4ac7..4c433ac1548ed 100644
--- a/pandas/core/reshape/reshape.py
+++ b/pandas/core/reshape/reshape.py
@@ -968,4 +968,4 @@ def unstack_extension_series(series, level, fill_value):
                                       allow_fill=True,
                                       fill_value=fill_value),
                           name=col, index=result.index))
-    return concat(out, axis='columns')
+    return concat(out, axis='columns', copy=False)

From ee330d610584da54b338fd03180801c487357402 Mon Sep 17 00:00:00 2001
From: Tom Augspurger <tom.w.augspurger@gmail.com>
Date: Tue, 23 Oct 2018 07:19:28 -0500
Subject: [PATCH 07/23] Fixup decimal tests

---
 pandas/tests/extension/decimal/array.py       |  5 +---
 .../tests/extension/decimal/test_decimal.py   | 23 +++++++++++++++----
 2 files changed, 20 insertions(+), 8 deletions(-)

diff --git a/pandas/tests/extension/decimal/array.py b/pandas/tests/extension/decimal/array.py
index 8c6333c7ce8ee..958fa44a07761 100644
--- a/pandas/tests/extension/decimal/array.py
+++ b/pandas/tests/extension/decimal/array.py
@@ -102,10 +102,7 @@ def copy(self, deep=False):
     def astype(self, dtype, copy=True):
         if isinstance(dtype, type(self.dtype)):
             return type(self)(self._data, context=dtype.context)
-        # need to replace decimal NA
-        result = np.asarray(self, dtype=dtype)
-        result[self.isna()] = np.nan
-        return result
+        return np.asarray(self, dtype=dtype)
 
     def __setitem__(self, key, value):
         if pd.api.types.is_list_like(value):
diff --git a/pandas/tests/extension/decimal/test_decimal.py b/pandas/tests/extension/decimal/test_decimal.py
index be1c61166e4b1..73fac6eb39ee9 100644
--- a/pandas/tests/extension/decimal/test_decimal.py
+++ b/pandas/tests/extension/decimal/test_decimal.py
@@ -1,5 +1,6 @@
-import operator
 import decimal
+import math
+import operator
 
 import numpy as np
 import pandas as pd
@@ -63,9 +64,23 @@ def data_for_grouping():
 class BaseDecimal(object):
 
     def assert_series_equal(self, left, right, *args, **kwargs):
-
-        left_na = left.isna()
-        right_na = right.isna()
+        def convert(x):
+            # need to convert array([Decimal(NaN)], dtype='object') to np.NaN
+            # because Series[object].isnan doesn't recognize decimal(NaN) as
+            # NA.
+            try:
+                return math.isnan(x)
+            except TypeError:
+                return False
+
+        if left.dtype == 'object':
+            left_na = left.apply(convert)
+        else:
+            left_na = left.isna()
+        if right.dtype == 'object':
+            right_na = right.apply(convert)
+        else:
+            right_na = right.isna()
 
         tm.assert_series_equal(left_na, right_na)
         return tm.assert_series_equal(left[~left_na],

From e9498a1470a7953108bc0021c91bd8d3571555c7 Mon Sep 17 00:00:00 2001
From: Tom Augspurger <tom.w.augspurger@gmail.com>
Date: Tue, 23 Oct 2018 16:28:38 -0500
Subject: [PATCH 08/23] update

---
 doc/source/whatsnew/v0.24.0.txt          |  1 +
 pandas/core/reshape/reshape.py           | 67 +++++++++++++++++-------
 pandas/tests/extension/base/reshaping.py |  2 +-
 pandas/tests/frame/test_reshape.py       |  7 +--
 4 files changed, 53 insertions(+), 24 deletions(-)

diff --git a/doc/source/whatsnew/v0.24.0.txt b/doc/source/whatsnew/v0.24.0.txt
index a0ba4ea578387..74bee471444dc 100644
--- a/doc/source/whatsnew/v0.24.0.txt
+++ b/doc/source/whatsnew/v0.24.0.txt
@@ -975,6 +975,7 @@ Categorical
 - Bug when indexing with a boolean-valued ``Categorical``. Now a boolean-valued ``Categorical`` is treated as a boolean mask (:issue:`22665`)
 - Constructing a :class:`CategoricalIndex` with empty values and boolean categories was raising a ``ValueError`` after a change to dtype coercion (:issue:`22702`).
 - Bug in :meth:`Categorical.take` with a user-provided ``fill_value`` not encoding the ``fill_value``, which could result in a ``ValueError``, incorrect results, or a segmentation fault (:issue:`23296`).
+- In meth:`Series.unstack`, specifying a ``fill_value`` not present in the categories now raises a ``TypeError`` rather than ignoring the ``fill_value`` (:issue:`23284`)
 
 Datetimelike
 ^^^^^^^^^^^^
diff --git a/pandas/core/reshape/reshape.py b/pandas/core/reshape/reshape.py
index 4c433ac1548ed..640063224628a 100644
--- a/pandas/core/reshape/reshape.py
+++ b/pandas/core/reshape/reshape.py
@@ -401,7 +401,7 @@ def unstack(obj, level, fill_value=None):
             return obj.T.stack(dropna=False)
     else:
         if is_extension_array_dtype(obj.dtype):
-            return unstack_extension_series(obj, level, fill_value)
+            return _unstack_extension_series(obj, level, fill_value)
         unstacker = _Unstacker(obj.values, obj.index, level=level,
                                fill_value=fill_value,
                                constructor=obj._constructor_expanddim)
@@ -422,6 +422,52 @@ def _unstack_frame(obj, level, fill_value=None):
         return unstacker.get_result()
 
 
+def _unstack_extension_series(series, level, fill_value):
+    """
+    Unstack an ExtensionArray-backed Series.
+
+    The ExtensionDtype is preserved.
+
+    Parameters
+    ----------
+    series : Series
+        A Series with an ExtensionArray for values
+    level : Any
+        The level name or number.
+    fill_value : Any
+        The user-level (not physical storage) fill value to use for
+        missing values introduced by the reshape. Passed to
+        ``series.values.take``.
+
+    Returns
+    -------
+    DataFrame
+        Each column of the DataFrame will have the same dtype as
+        the input Series.
+    """
+    # Implementation note: the basic idea is to
+    # 1. Do a regular unstack on a dummy array of integers
+    # 2. Followup with a columnwise take.
+    # We use the dummy take to discover newly-created missing values
+    # introduced by the reshape.
+    from pandas.core.reshape.concat import concat
+
+    dummy_arr = np.arange(len(series))
+    # fill_value=-1, since we will do a series.values.take later
+    result = _Unstacker(dummy_arr, series.index,
+                        level=level, fill_value=-1).get_result()
+
+    out = []
+    values = series.values
+
+    for col, indices in result.iteritems():
+        out.append(Series(values.take(indices.values,
+                                      allow_fill=True,
+                                      fill_value=fill_value),
+                          name=col, index=result.index))
+    return concat(out, axis='columns', copy=False)
+
+
 def stack(frame, level=-1, dropna=True):
     """
     Convert DataFrame to Series with multi-level Index. Columns become the
@@ -950,22 +996,3 @@ def make_axis_dummies(frame, axis='minor', transform=None):
     values = values.take(labels, axis=0)
 
     return DataFrame(values, columns=items, index=frame.index)
-
-
-def unstack_extension_series(series, level, fill_value):
-    from pandas.core.reshape.concat import concat
-
-    dummy_arr = np.arange(len(series))
-    # fill_value=-1, since we will do a series.values.take later
-    result = _Unstacker(dummy_arr, series.index,
-                        level=level, fill_value=-1).get_result()
-
-    out = []
-    values = series.values
-
-    for col, indices in result.iteritems():
-        out.append(Series(values.take(indices.values,
-                                      allow_fill=True,
-                                      fill_value=fill_value),
-                          name=col, index=result.index))
-    return concat(out, axis='columns', copy=False)
diff --git a/pandas/tests/extension/base/reshaping.py b/pandas/tests/extension/base/reshaping.py
index 8367d02cc1af9..b47eb0c98f00a 100644
--- a/pandas/tests/extension/base/reshaping.py
+++ b/pandas/tests/extension/base/reshaping.py
@@ -197,7 +197,7 @@ def test_unstack(self, data, index):
         n = index.nlevels
         levels = list(range(n))
         # [0, 1, 2]
-        # -> [(0,), (1,), (2,) (0, 1), (1, 0)]
+        # [(0,), (1,), (2,), (0, 1), (0, 2), (1, 0), (1, 2), (2, 0), (2, 1)]
         combinations = itertools.chain.from_iterable(
             itertools.permutations(levels, i) for i in range(1, n)
         )
diff --git a/pandas/tests/frame/test_reshape.py b/pandas/tests/frame/test_reshape.py
index 9f6735c7ba2bf..24b6aaca960a4 100644
--- a/pandas/tests/frame/test_reshape.py
+++ b/pandas/tests/frame/test_reshape.py
@@ -314,9 +314,10 @@ def test_unstack_fill_frame_categorical(self):
                              index=list('xyz'))
         assert_frame_equal(result, expected)
 
-        # Fill with non-category results in NaN entries similar to above
-        result = data.unstack(fill_value='d')
-        assert_frame_equal(result, expected)
+        # Fill with non-category results in a TypeError
+        msg = r"'fill_value' \('d'\) is not in"
+        with tm.assert_raises_regex(TypeError, msg):
+            data.unstack(fill_value='d')
 
         # Fill with category value replaces missing values as expected
         result = data.unstack(fill_value='c')

From 72b5a0dcee2f16e414c5aab1c77348704bf04152 Mon Sep 17 00:00:00 2001
From: Tom Augspurger <tom.w.augspurger@gmail.com>
Date: Wed, 24 Oct 2018 06:08:46 -0500
Subject: [PATCH 09/23] handle names

---
 pandas/core/reshape/reshape.py           | 2 +-
 pandas/tests/extension/base/reshaping.py | 2 +-
 pandas/tests/frame/test_reshape.py       | 3 ++-
 3 files changed, 4 insertions(+), 3 deletions(-)

diff --git a/pandas/core/reshape/reshape.py b/pandas/core/reshape/reshape.py
index 640063224628a..2f5e98bbfda36 100644
--- a/pandas/core/reshape/reshape.py
+++ b/pandas/core/reshape/reshape.py
@@ -465,7 +465,7 @@ def _unstack_extension_series(series, level, fill_value):
                                       allow_fill=True,
                                       fill_value=fill_value),
                           name=col, index=result.index))
-    return concat(out, axis='columns', copy=False)
+    return concat(out, axis='columns', copy=False, keys=result.columns)
 
 
 def stack(frame, level=-1, dropna=True):
diff --git a/pandas/tests/extension/base/reshaping.py b/pandas/tests/extension/base/reshaping.py
index b47eb0c98f00a..066fb182b50a4 100644
--- a/pandas/tests/extension/base/reshaping.py
+++ b/pandas/tests/extension/base/reshaping.py
@@ -173,7 +173,7 @@ def test_merge(self, data, na_value):
         self.assert_frame_equal(res, exp[['ext', 'int1', 'key', 'int2']])
 
     @pytest.mark.parametrize("index", [
-        pd.MultiIndex.from_product(([['A', 'B'], ['a', 'b']])),
+        pd.MultiIndex.from_product(([['A', 'B'], ['a', 'b']]), names=['a', 'b']),
         pd.MultiIndex.from_product(([['A', 'B'], ['a', 'b'],
                                      ['x', 'y', 'z']])),
 
diff --git a/pandas/tests/frame/test_reshape.py b/pandas/tests/frame/test_reshape.py
index 24b6aaca960a4..cfd6399110c75 100644
--- a/pandas/tests/frame/test_reshape.py
+++ b/pandas/tests/frame/test_reshape.py
@@ -303,7 +303,8 @@ def test_unstack_fill_frame_categorical(self):
         # Test unstacking with categorical
         data = pd.Series(['a', 'b', 'c', 'a'], dtype='category')
         data.index = pd.MultiIndex.from_tuples(
-            [('x', 'a'), ('x', 'b'), ('y', 'b'), ('z', 'a')])
+            [('x', 'a'), ('x', 'b'), ('y', 'b'), ('z', 'a')],
+        )
 
         # By default missing values will be NaN
         result = data.unstack()

From 4d679cbc9d8551be9e856c2cdabbd0afd3abc16b Mon Sep 17 00:00:00 2001
From: Tom Augspurger <tom.w.augspurger@gmail.com>
Date: Wed, 24 Oct 2018 06:09:16 -0500
Subject: [PATCH 10/23] lint

---
 pandas/tests/extension/base/reshaping.py | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/pandas/tests/extension/base/reshaping.py b/pandas/tests/extension/base/reshaping.py
index 066fb182b50a4..5316d88ba64a1 100644
--- a/pandas/tests/extension/base/reshaping.py
+++ b/pandas/tests/extension/base/reshaping.py
@@ -173,7 +173,8 @@ def test_merge(self, data, na_value):
         self.assert_frame_equal(res, exp[['ext', 'int1', 'key', 'int2']])
 
     @pytest.mark.parametrize("index", [
-        pd.MultiIndex.from_product(([['A', 'B'], ['a', 'b']]), names=['a', 'b']),
+        pd.MultiIndex.from_product(([['A', 'B'], ['a', 'b']]),
+                                   names=['a', 'b']),
         pd.MultiIndex.from_product(([['A', 'B'], ['a', 'b'],
                                      ['x', 'y', 'z']])),
 

From ff7aba750f89cbd9c3ce3d80f22acf5744d0b401 Mon Sep 17 00:00:00 2001
From: Tom Augspurger <tom.w.augspurger@gmail.com>
Date: Wed, 24 Oct 2018 08:05:26 -0500
Subject: [PATCH 11/23] handle DataFrame.unstack

---
 pandas/core/reshape/reshape.py           | 7 +++++++
 pandas/tests/extension/base/reshaping.py | 8 ++++++--
 2 files changed, 13 insertions(+), 2 deletions(-)

diff --git a/pandas/core/reshape/reshape.py b/pandas/core/reshape/reshape.py
index 2f5e98bbfda36..fbe5c3d04d888 100644
--- a/pandas/core/reshape/reshape.py
+++ b/pandas/core/reshape/reshape.py
@@ -409,6 +409,13 @@ def unstack(obj, level, fill_value=None):
 
 
 def _unstack_frame(obj, level, fill_value=None):
+    from pandas.tools.merge import concat
+
+    if (obj._is_homogeneous_type and
+            is_extension_array_dtype(obj.dtypes.iloc[0])):
+        frames = [ser.unstack(level=level, fill_value=fill_value)
+                  for name, ser in obj.iteritems()]
+        return concat(frames, axis=1, keys=obj.columns)
     if obj._is_mixed_type:
         unstacker = partial(_Unstacker, index=obj.index,
                             level=level, fill_value=fill_value)
diff --git a/pandas/tests/extension/base/reshaping.py b/pandas/tests/extension/base/reshaping.py
index 5316d88ba64a1..5572f717a561c 100644
--- a/pandas/tests/extension/base/reshaping.py
+++ b/pandas/tests/extension/base/reshaping.py
@@ -191,9 +191,13 @@ def test_merge(self, data, na_value):
             ('B', 'c', 1),
         ]),
     ])
-    def test_unstack(self, data, index):
+    @pytest.mark.parametrize("obj", ["series", "frame"])
+    def test_unstack(self, data, index, obj):
         data = data[:len(index)]
-        ser = pd.Series(data, index=index)
+        if obj == "series":
+            ser = pd.Series(data, index=index)
+        else:
+            ser = pd.DataFrame({"A": data, "B": data}, index=index)
 
         n = index.nlevels
         levels = list(range(n))

From 49bdb50db44b8a6e0d67bfb59e0300bce6948718 Mon Sep 17 00:00:00 2001
From: Tom Augspurger <tom.w.augspurger@gmail.com>
Date: Wed, 24 Oct 2018 08:17:06 -0500
Subject: [PATCH 12/23] handle DataFrame.unstack

---
 pandas/core/internals/blocks.py          | 6 +++++-
 pandas/tests/extension/base/reshaping.py | 3 +--
 2 files changed, 6 insertions(+), 3 deletions(-)

diff --git a/pandas/core/internals/blocks.py b/pandas/core/internals/blocks.py
index 5ce8a9103f008..de981672ed034 100644
--- a/pandas/core/internals/blocks.py
+++ b/pandas/core/internals/blocks.py
@@ -34,6 +34,7 @@
     is_numeric_v_string_like, is_extension_type,
     is_extension_array_dtype,
     is_list_like,
+    is_sparse,
     is_re,
     is_re_compilable,
     pandas_dtype)
@@ -632,7 +633,10 @@ def _astype(self, dtype, copy=False, errors='raise', values=None,
             return self
 
         if klass is None:
-            if dtype == np.object_:
+            if is_sparse(self.values):
+                # Series[Sparse].astype(object) is sparse.
+                klass = ExtensionBlock
+            elif is_object_dtype(dtype):
                 klass = ObjectBlock
             elif is_extension_array_dtype(dtype):
                 klass = ExtensionBlock
diff --git a/pandas/tests/extension/base/reshaping.py b/pandas/tests/extension/base/reshaping.py
index 5572f717a561c..563f247ba052b 100644
--- a/pandas/tests/extension/base/reshaping.py
+++ b/pandas/tests/extension/base/reshaping.py
@@ -173,10 +173,9 @@ def test_merge(self, data, na_value):
         self.assert_frame_equal(res, exp[['ext', 'int1', 'key', 'int2']])
 
     @pytest.mark.parametrize("index", [
+        # Two levels, uniform.
         pd.MultiIndex.from_product(([['A', 'B'], ['a', 'b']]),
                                    names=['a', 'b']),
-        pd.MultiIndex.from_product(([['A', 'B'], ['a', 'b'],
-                                     ['x', 'y', 'z']])),
 
         # non-uniform
         pd.MultiIndex.from_tuples([('A', 'a'), ('A', 'b'), ('B', 'b')]),

From cf8ed731b8fe8eb6832a6c0e8b886863362bf95d Mon Sep 17 00:00:00 2001
From: Tom Augspurger <tom.w.augspurger@gmail.com>
Date: Wed, 24 Oct 2018 09:23:50 -0500
Subject: [PATCH 13/23] handle DataFrame.unstack

---
 doc/source/whatsnew/v0.24.0.txt    |  2 +-
 pandas/core/internals/blocks.py    | 30 ++++++++++++++++++++++++++++++
 pandas/core/reshape/reshape.py     |  2 +-
 pandas/tests/frame/test_reshape.py | 15 +++++++++++++++
 4 files changed, 47 insertions(+), 2 deletions(-)

diff --git a/doc/source/whatsnew/v0.24.0.txt b/doc/source/whatsnew/v0.24.0.txt
index e72b0b820ee5d..e75a0b5fc1aff 100644
--- a/doc/source/whatsnew/v0.24.0.txt
+++ b/doc/source/whatsnew/v0.24.0.txt
@@ -808,7 +808,7 @@ update the ``ExtensionDtype._metadata`` tuple to match the signature of your
 - Updated the ``.type`` attribute for ``PeriodDtype``, ``DatetimeTZDtype``, and ``IntervalDtype`` to be instances of the dtype (``Period``, ``Timestamp``, and ``Interval`` respectively) (:issue:`22938`)
 - :func:`ExtensionArray.isna` is allowed to return an ``ExtensionArray`` (:issue:`22325`).
 - Support for reduction operations such as ``sum``, ``mean`` via opt-in base class method override (:issue:`22762`)
-- :meth:`Series.unstack` no longer converts extension arrays to object-dtype ndarrays. The output ``DataFrame`` will now have the same dtype as the input. This changes behavior for Categorical and Sparse data (:issue:`23077`).
+- :meth:`Series.unstack` and :meth:`DataFrame.unstack` no longer convert extension arrays to object-dtype ndarrays. Each column in the output ``DataFrame`` will now have the same dtype as the input (:issue:`23077`).
 
 .. _whatsnew_0240.api.incompatibilities:
 
diff --git a/pandas/core/internals/blocks.py b/pandas/core/internals/blocks.py
index de981672ed034..b92d62e7e0ca4 100644
--- a/pandas/core/internals/blocks.py
+++ b/pandas/core/internals/blocks.py
@@ -1,4 +1,5 @@
 # -*- coding: utf-8 -*-
+import copy
 import warnings
 import inspect
 import re
@@ -1954,6 +1955,35 @@ def shift(self, periods, axis=0):
     def _ftype(self):
         return getattr(self.values, '_pandas_ftype', Block._ftype)
 
+    def _unstack(self, unstacker_func, new_columns):
+        # I wonder if this is supported
+        fill_value = unstacker_func.keywords['fill_value']
+        unstacker_func = copy.deepcopy(unstacker_func)
+        unstacker_func.keywords['fill_value'] = -1
+
+        # just get the index. Can maybe avoid this?
+        dummy_unstacker = unstacker_func(np.empty((0, 0)))
+
+        dummy_arr = np.arange(len(dummy_unstacker.index))
+
+        unstacker = unstacker_func(dummy_arr)
+        new_items = unstacker.get_new_columns()
+        new_placement = new_columns.get_indexer(new_items)
+        new_values, mask = unstacker.get_new_values()
+        mask = mask.any(0)
+
+        new_values = [
+            self.values.take(indices, allow_fill=True,
+                             fill_value=fill_value)
+            for indices in new_values.T
+        ]
+
+        blocks = [
+            self.make_block_same_class(vals, [place])
+            for vals, place in zip(new_values, new_placement)
+        ]
+        return blocks, mask
+
 
 class NumericBlock(Block):
     __slots__ = ()
diff --git a/pandas/core/reshape/reshape.py b/pandas/core/reshape/reshape.py
index fbe5c3d04d888..aa85be4bdbc02 100644
--- a/pandas/core/reshape/reshape.py
+++ b/pandas/core/reshape/reshape.py
@@ -409,7 +409,7 @@ def unstack(obj, level, fill_value=None):
 
 
 def _unstack_frame(obj, level, fill_value=None):
-    from pandas.tools.merge import concat
+    from pandas.core.reshape.concat import concat
 
     if (obj._is_homogeneous_type and
             is_extension_array_dtype(obj.dtypes.iloc[0])):
diff --git a/pandas/tests/frame/test_reshape.py b/pandas/tests/frame/test_reshape.py
index cfd6399110c75..54511df4effad 100644
--- a/pandas/tests/frame/test_reshape.py
+++ b/pandas/tests/frame/test_reshape.py
@@ -874,6 +874,21 @@ def test_stack_preserve_categorical_dtype(self, ordered, labels):
 
         tm.assert_series_equal(result, expected)
 
+    @pytest.mark.parametrize('level', [0, 1])
+    def test_unstack_mixed_extension_types(self, level):
+        index = pd.MultiIndex.from_tuples([('A', 0), ('A', 1), ('B', 1)],
+                                          names=['a', 'b'])
+        df = pd.DataFrame({"A": pd.core.arrays.integer_array([0, 1, None]),
+                           "B": pd.Categorical(['a', 'a', 'b'])}, index=index)
+
+        result = df.unstack(level=level)
+        expected = df.astype(object).unstack(level=level)
+
+        expected_dtypes = pd.Series([df.A.dtype] * 2 + [df.B.dtype] * 2,
+                                    index=result.columns)
+        tm.assert_series_equal(result.dtypes, expected_dtypes)
+        tm.assert_frame_equal(result.astype(object), expected)
+
     @pytest.mark.parametrize("level", [0, 'baz'])
     def test_unstack_swaplevel_sortlevel(self, level):
         # GH 20994

From 5902b5ba1be9e10f482d21b0e2b037b7228264f6 Mon Sep 17 00:00:00 2001
From: Tom Augspurger <tom.w.augspurger@gmail.com>
Date: Wed, 24 Oct 2018 10:08:48 -0500
Subject: [PATCH 14/23] Slightly de-hackify

---
 pandas/core/internals/blocks.py   | 67 +++++++++++++++++--------------
 pandas/core/internals/managers.py | 10 ++++-
 pandas/core/reshape/reshape.py    | 10 +----
 3 files changed, 47 insertions(+), 40 deletions(-)

diff --git a/pandas/core/internals/blocks.py b/pandas/core/internals/blocks.py
index b92d62e7e0ca4..19e832ef63c99 100644
--- a/pandas/core/internals/blocks.py
+++ b/pandas/core/internals/blocks.py
@@ -1,5 +1,5 @@
 # -*- coding: utf-8 -*-
-import copy
+import functools
 import warnings
 import inspect
 import re
@@ -1434,7 +1434,7 @@ def equals(self, other):
             return False
         return array_equivalent(self.values, other.values)
 
-    def _unstack(self, unstacker_func, new_columns):
+    def _unstack(self, unstacker_func, new_columns, n_rows, fill_value):
         """Return a list of unstacked blocks of self
 
         Parameters
@@ -1443,6 +1443,10 @@ def _unstack(self, unstacker_func, new_columns):
             Partially applied unstacker.
         new_columns : Index
             All columns of the unstacked BlockManager.
+        n_rows : int
+            Only used in ExtensionBlock.unstack
+        fill_value : int
+            Only used in ExtensionBlock.unstack
 
         Returns
         -------
@@ -1736,7 +1740,7 @@ def _slice(self, slicer):
     def _try_cast_result(self, result, dtype=None):
         return result
 
-    def _unstack(self, unstacker_func, new_columns):
+    def _unstack(self, unstacker_func, new_columns, n_rows, fill_value):
         """Return a list of unstacked blocks of self
 
         Parameters
@@ -1745,6 +1749,10 @@ def _unstack(self, unstacker_func, new_columns):
             Partially applied unstacker.
         new_columns : Index
             All columns of the unstacked BlockManager.
+        n_rows : int
+            Only used in ExtensionBlock.unstack
+        fill_value : int
+            Only used in ExtensionBlock.unstack
 
         Returns
         -------
@@ -1756,11 +1764,11 @@ def _unstack(self, unstacker_func, new_columns):
         # NonConsolidatable blocks can have a single item only, so we return
         # one block per item
         unstacker = unstacker_func(self.values.T)
-        new_items = unstacker.get_new_columns()
-        new_placement = new_columns.get_indexer(new_items)
-        new_values, mask = unstacker.get_new_values()
 
-        mask = mask.any(0)
+        new_placement, new_values, mask = self._get_unstack_items(
+            unstacker, new_columns
+        )
+
         new_values = new_values.T[mask]
         new_placement = new_placement[mask]
 
@@ -1768,6 +1776,16 @@ def _unstack(self, unstacker_func, new_columns):
                   for vals, place in zip(new_values, new_placement)]
         return blocks, mask
 
+    @staticmethod
+    def _get_unstack_items(unstacker, new_columns):
+        # shared with ExtensionBlock
+        new_items = unstacker.get_new_columns()
+        new_placement = new_columns.get_indexer(new_items)
+        new_values, mask = unstacker.get_new_values()
+
+        mask = mask.any(0)
+        return new_placement, new_values, mask
+
 
 class ExtensionBlock(NonConsolidatableMixIn, Block):
     """Block for holding extension types.
@@ -1955,32 +1973,21 @@ def shift(self, periods, axis=0):
     def _ftype(self):
         return getattr(self.values, '_pandas_ftype', Block._ftype)
 
-    def _unstack(self, unstacker_func, new_columns):
-        # I wonder if this is supported
-        fill_value = unstacker_func.keywords['fill_value']
-        unstacker_func = copy.deepcopy(unstacker_func)
-        unstacker_func.keywords['fill_value'] = -1
-
-        # just get the index. Can maybe avoid this?
-        dummy_unstacker = unstacker_func(np.empty((0, 0)))
-
-        dummy_arr = np.arange(len(dummy_unstacker.index))
+    def _unstack(self, unstacker_func, new_columns, n_rows, fill_value):
+        dummy_arr = np.arange(n_rows)
+        dummy_unstacker = functools.partial(unstacker_func, fill_value=-1)
+        unstacker = dummy_unstacker(dummy_arr)
 
-        unstacker = unstacker_func(dummy_arr)
-        new_items = unstacker.get_new_columns()
-        new_placement = new_columns.get_indexer(new_items)
-        new_values, mask = unstacker.get_new_values()
-        mask = mask.any(0)
-
-        new_values = [
-            self.values.take(indices, allow_fill=True,
-                             fill_value=fill_value)
-            for indices in new_values.T
-        ]
+        new_placement, new_values, mask = self._get_unstack_items(
+            unstacker, new_columns
+        )
 
         blocks = [
-            self.make_block_same_class(vals, [place])
-            for vals, place in zip(new_values, new_placement)
+            self.make_block_same_class(
+                self.values.take(indices, allow_fill=True,
+                                 fill_value=fill_value),
+                [place])
+            for indices, place in zip(new_values.T, new_placement)
         ]
         return blocks, mask
 
diff --git a/pandas/core/internals/managers.py b/pandas/core/internals/managers.py
index fc3a12a9da82a..0519c5e5abe33 100644
--- a/pandas/core/internals/managers.py
+++ b/pandas/core/internals/managers.py
@@ -1405,18 +1405,21 @@ def canonicalize(block):
         return all(block.equals(oblock)
                    for block, oblock in zip(self_blocks, other_blocks))
 
-    def unstack(self, unstacker_func):
+    def unstack(self, unstacker_func, fill_value):
         """Return a blockmanager with all blocks unstacked.
 
         Parameters
         ----------
         unstacker_func : callable
             A (partially-applied) ``pd.core.reshape._Unstacker`` class.
+        fill_value : Any
+            fill_value for newly introduced missing values.
 
         Returns
         -------
         unstacked : BlockManager
         """
+        n_rows = self.shape[-1]
         dummy = unstacker_func(np.empty((0, 0)), value_columns=self.items)
         new_columns = dummy.get_new_columns()
         new_index = dummy.get_new_index()
@@ -1427,7 +1430,10 @@ def unstack(self, unstacker_func):
             blocks, mask = blk._unstack(
                 partial(unstacker_func,
                         value_columns=self.items[blk.mgr_locs.indexer]),
-                new_columns)
+                new_columns,
+                n_rows,
+                fill_value
+            )
 
             new_blocks.extend(blocks)
             columns_mask.extend(mask)
diff --git a/pandas/core/reshape/reshape.py b/pandas/core/reshape/reshape.py
index aa85be4bdbc02..9f2e0e783d7d6 100644
--- a/pandas/core/reshape/reshape.py
+++ b/pandas/core/reshape/reshape.py
@@ -409,17 +409,11 @@ def unstack(obj, level, fill_value=None):
 
 
 def _unstack_frame(obj, level, fill_value=None):
-    from pandas.core.reshape.concat import concat
-
-    if (obj._is_homogeneous_type and
-            is_extension_array_dtype(obj.dtypes.iloc[0])):
-        frames = [ser.unstack(level=level, fill_value=fill_value)
-                  for name, ser in obj.iteritems()]
-        return concat(frames, axis=1, keys=obj.columns)
     if obj._is_mixed_type:
         unstacker = partial(_Unstacker, index=obj.index,
                             level=level, fill_value=fill_value)
-        blocks = obj._data.unstack(unstacker)
+        blocks = obj._data.unstack(unstacker,
+                                   fill_value=fill_value)
         return obj._constructor(blocks)
     else:
         unstacker = _Unstacker(obj.values, obj.index, level=level,

From a75806ade3dccc29139b9d35ecd026da061a0746 Mon Sep 17 00:00:00 2001
From: Tom Augspurger <tom.w.augspurger@gmail.com>
Date: Fri, 26 Oct 2018 07:39:19 -0500
Subject: [PATCH 15/23] docs, comments

---
 pandas/core/internals/blocks.py | 34 ++++++++++++++++++++++++++++++---
 1 file changed, 31 insertions(+), 3 deletions(-)

diff --git a/pandas/core/internals/blocks.py b/pandas/core/internals/blocks.py
index 19e832ef63c99..ba8ad8b8a817d 100644
--- a/pandas/core/internals/blocks.py
+++ b/pandas/core/internals/blocks.py
@@ -635,7 +635,7 @@ def _astype(self, dtype, copy=False, errors='raise', values=None,
 
         if klass is None:
             if is_sparse(self.values):
-                # Series[Sparse].astype(object) is sparse.
+                # special case sparse, Series[Sparse].astype(object) is sparse
                 klass = ExtensionBlock
             elif is_object_dtype(dtype):
                 klass = ObjectBlock
@@ -1776,8 +1776,30 @@ def _unstack(self, unstacker_func, new_columns, n_rows, fill_value):
                   for vals, place in zip(new_values, new_placement)]
         return blocks, mask
 
-    @staticmethod
-    def _get_unstack_items(unstacker, new_columns):
+    def _get_unstack_items(self, unstacker, new_columns):
+        """
+        Get the placement, values, and mask for a Block unstack.
+
+        This is shared between ObjectBlock and ExtensionBlock. They
+        differ in that ObjectBlock passes the values, while ExtensionBlock
+        passes the dummy ndarray of positions to be used by a take
+        later.
+
+        Parameters
+        ----------
+        unstacker : pandas.core.reshape.reshape._Unstacker
+        new_columns : Index
+            All columns of the unstacked BlockManager.
+
+        Returns
+        -------
+        new_placement : ndarray[int]
+            The placement of the new columns in `new_columns`.
+        new_values : Union[ndarray, ExtensionArray]
+            The first return value from _Unstacker.get_new_values.
+        mask : ndarray[bool]
+            The second return value from _Unstacker.get_new_values.
+        """
         # shared with ExtensionBlock
         new_items = unstacker.get_new_columns()
         new_placement = new_columns.get_indexer(new_items)
@@ -1974,6 +1996,12 @@ def _ftype(self):
         return getattr(self.values, '_pandas_ftype', Block._ftype)
 
     def _unstack(self, unstacker_func, new_columns, n_rows, fill_value):
+        # ExtensionArray-safe unstack.
+        # We override ObjectBlock._unstack, which unstacks directly on the
+        # values of the array. For EA-backed blocks, this would require
+        # converting to a 2-D ndarray of objects.
+        # Instead, we unstack an ndarray of integer positions, followed by
+        # a `take` on the actual values.
         dummy_arr = np.arange(n_rows)
         dummy_unstacker = functools.partial(unstacker_func, fill_value=-1)
         unstacker = dummy_unstacker(dummy_arr)

From 8ed7c73d27707f97ecfc44bba154f2059027f9d8 Mon Sep 17 00:00:00 2001
From: Tom Augspurger <tom.w.augspurger@gmail.com>
Date: Fri, 26 Oct 2018 07:42:15 -0500
Subject: [PATCH 16/23] unxfail test

---
 pandas/tests/frame/test_reshape.py | 2 --
 1 file changed, 2 deletions(-)

diff --git a/pandas/tests/frame/test_reshape.py b/pandas/tests/frame/test_reshape.py
index 23489098614cd..54511df4effad 100644
--- a/pandas/tests/frame/test_reshape.py
+++ b/pandas/tests/frame/test_reshape.py
@@ -277,8 +277,6 @@ def test_unstack_fill_frame_timedelta(self):
                              index=['x', 'y', 'z'])
         assert_frame_equal(result, expected)
 
-    @pytest.mark.xfail(reason="GH-23077",
-                       strict=True)
     def test_unstack_fill_frame_period(self):
 
         # Test unstacking with period

From b23234c35af4bb41a74e24f9e867a9cac856e41f Mon Sep 17 00:00:00 2001
From: Tom Augspurger <tom.w.augspurger@gmail.com>
Date: Fri, 26 Oct 2018 07:54:50 -0500
Subject: [PATCH 17/23] added benchmark

---
 asv_bench/benchmarks/reshape.py | 15 +++++++++++----
 1 file changed, 11 insertions(+), 4 deletions(-)

diff --git a/asv_bench/benchmarks/reshape.py b/asv_bench/benchmarks/reshape.py
index bda486dba3b0f..1d7dc58aca5ed 100644
--- a/asv_bench/benchmarks/reshape.py
+++ b/asv_bench/benchmarks/reshape.py
@@ -49,21 +49,28 @@ def time_unstack(self):
 
 class Unstack(object):
 
-    def setup(self):
+    params = ['int', 'category']
+
+    def setup(self, dtype):
         m = 100
         n = 1000
 
         levels = np.arange(m)
         index = MultiIndex.from_product([levels] * 2)
         columns = np.arange(n)
-        values = np.arange(m * m * n).reshape(m * m, n)
+        if dtype == 'int':
+            values = np.arange(m * m * n).reshape(m * m, n)
+        else:
+            indices = np.random.randint(0, 52, size=(m * m, n))
+            values = np.take(list(string.ascii_letters), indices)
+
         self.df = DataFrame(values, index, columns)
         self.df2 = self.df.iloc[:-1]
 
-    def time_full_product(self):
+    def time_full_product(self, dtype):
         self.df.unstack()
 
-    def time_without_last_row(self):
+    def time_without_last_row(self, dtype):
         self.df2.unstack()
 
 

From 19b7cfa90c61352968ffea6fe58e4ba27168f5ed Mon Sep 17 00:00:00 2001
From: Tom Augspurger <tom.w.augspurger@gmail.com>
Date: Mon, 29 Oct 2018 15:40:43 -0500
Subject: [PATCH 18/23] fix asv

---
 asv_bench/benchmarks/reshape.py | 1 +
 1 file changed, 1 insertion(+)

diff --git a/asv_bench/benchmarks/reshape.py b/asv_bench/benchmarks/reshape.py
index 1d7dc58aca5ed..a337968419afb 100644
--- a/asv_bench/benchmarks/reshape.py
+++ b/asv_bench/benchmarks/reshape.py
@@ -63,6 +63,7 @@ def setup(self, dtype):
         else:
             indices = np.random.randint(0, 52, size=(m * m, n))
             values = np.take(list(string.ascii_letters), indices)
+            values = [pd.Categorical(v) for v in values.T]
 
         self.df = DataFrame(values, index, columns)
         self.df2 = self.df.iloc[:-1]

From 2d78d42c7ab7ce8c5ef02b6e4cff2388b8f159a8 Mon Sep 17 00:00:00 2001
From: Tom Augspurger <tom.w.augspurger@gmail.com>
Date: Mon, 5 Nov 2018 11:52:13 -0600
Subject: [PATCH 19/23] CLN: remove dead code

---
 pandas/core/reshape/reshape.py | 29 ++++-------------------------
 1 file changed, 4 insertions(+), 25 deletions(-)

diff --git a/pandas/core/reshape/reshape.py b/pandas/core/reshape/reshape.py
index 933621b58be44..2dca7cf0e6aa3 100644
--- a/pandas/core/reshape/reshape.py
+++ b/pandas/core/reshape/reshape.py
@@ -12,12 +12,12 @@
 from pandas.core.dtypes.cast import maybe_promote
 from pandas.core.dtypes.common import (
     ensure_platform_int, is_bool_dtype, is_extension_array_dtype, is_list_like,
-    is_object_dtype, is_sparse, needs_i8_conversion)
+    is_object_dtype, needs_i8_conversion)
 from pandas.core.dtypes.missing import notna
 
 from pandas import compat
 import pandas.core.algorithms as algos
-from pandas.core.arrays import Categorical, SparseArray
+from pandas.core.arrays import SparseArray
 from pandas.core.arrays.categorical import _factorize_from_iterable
 from pandas.core.frame import DataFrame
 from pandas.core.index import Index, MultiIndex
@@ -82,28 +82,15 @@ class _Unstacker(object):
     def __init__(self, values, index, level=-1, value_columns=None,
                  fill_value=None, constructor=None):
 
-        self.is_categorical = None
-        self.is_sparse = is_sparse(values)
         if values.ndim == 1:
-            if isinstance(values, Categorical):
-                self.is_categorical = values
-                values = np.array(values)
-            elif self.is_sparse:
-                # XXX: Makes SparseArray *dense*, but it's supposedly
-                # a single column at a time, so it's "doable"
-                values = values.values
             values = values[:, np.newaxis]
         self.values = values
         self.value_columns = value_columns
         self.fill_value = fill_value
 
         if constructor is None:
-            if self.is_sparse:
-                self.constructor = SparseDataFrame
-            else:
-                self.constructor = DataFrame
-        else:
-            self.constructor = constructor
+            constructor = DataFrame
+        self.constructor = constructor
 
         if value_columns is None and values.shape[1] != 1:  # pragma: no cover
             raise ValueError('must pass column labels for multi-column data')
@@ -174,14 +161,6 @@ def get_result(self):
         columns = self.get_new_columns()
         index = self.get_new_index()
 
-        # may need to coerce categoricals here
-        if self.is_categorical is not None:
-            categories = self.is_categorical.categories
-            ordered = self.is_categorical.ordered
-            values = [Categorical(values[:, i], categories=categories,
-                                  ordered=ordered)
-                      for i in range(values.shape[-1])]
-
         return self.constructor(values, index=index, columns=columns)
 
     def get_new_values(self):

From a9e6263ecedf572eff7e3db90abd387e69b9fa67 Mon Sep 17 00:00:00 2001
From: Tom Augspurger <tom.w.augspurger@gmail.com>
Date: Mon, 5 Nov 2018 12:50:23 -0600
Subject: [PATCH 20/23] faster asv

---
 asv_bench/benchmarks/reshape.py | 4 ++++
 1 file changed, 4 insertions(+)

diff --git a/asv_bench/benchmarks/reshape.py b/asv_bench/benchmarks/reshape.py
index a337968419afb..67fdfb82e72c0 100644
--- a/asv_bench/benchmarks/reshape.py
+++ b/asv_bench/benchmarks/reshape.py
@@ -61,6 +61,10 @@ def setup(self, dtype):
         if dtype == 'int':
             values = np.arange(m * m * n).reshape(m * m, n)
         else:
+            # the category branch is ~20x slower than int. So we
+            # cut down the size a bit. Now it's only ~3x slower.
+            n = 50
+            columns = columns[:n]
             indices = np.random.randint(0, 52, size=(m * m, n))
             values = np.take(list(string.ascii_letters), indices)
             values = [pd.Categorical(v) for v in values.T]

From 967c674a3ca65cf901a77fd910f33e9a1737850c Mon Sep 17 00:00:00 2001
From: Tom Augspurger <tom.w.augspurger@gmail.com>
Date: Tue, 6 Nov 2018 07:53:05 -0600
Subject: [PATCH 21/23] API: decimal nan is na

---
 doc/source/whatsnew/v0.24.0.txt     |  1 +
 pandas/_libs/missing.pyx            |  5 ++++
 pandas/tests/dtypes/test_missing.py | 38 +++++++++++++++++++++++++++++
 3 files changed, 44 insertions(+)

diff --git a/doc/source/whatsnew/v0.24.0.txt b/doc/source/whatsnew/v0.24.0.txt
index f449ca532ae74..c8c5db61160dd 100644
--- a/doc/source/whatsnew/v0.24.0.txt
+++ b/doc/source/whatsnew/v0.24.0.txt
@@ -1227,6 +1227,7 @@ Missing
 - Bug in :func:`Series.hasnans` that could be incorrectly cached and return incorrect answers if null elements are introduced after an initial call (:issue:`19700`)
 - :func:`Series.isin` now treats all NaN-floats as equal also for `np.object`-dtype. This behavior is consistent with the behavior for float64 (:issue:`22119`)
 - :func:`unique` no longer mangles NaN-floats and the ``NaT``-object for `np.object`-dtype, i.e. ``NaT`` is no longer coerced to a NaN-value and is treated as a different entity. (:issue:`22295`)
+- :meth:`isna` now considers ``decimal.Decimal('NaN')`` a missing value (:issue:`23284`).
 
 
 MultiIndex
diff --git a/pandas/_libs/missing.pyx b/pandas/_libs/missing.pyx
index b8791359241ad..4fa96f652adaf 100644
--- a/pandas/_libs/missing.pyx
+++ b/pandas/_libs/missing.pyx
@@ -1,6 +1,7 @@
 # -*- coding: utf-8 -*-
 
 import cython
+import decimal
 from cython import Py_ssize_t
 
 import numpy as np
@@ -33,6 +34,8 @@ cdef inline bint _check_all_nulls(object val):
         res = get_datetime64_value(val) == NPY_NAT
     elif util.is_timedelta64_object(val):
         res = get_timedelta64_value(val) == NPY_NAT
+    elif isinstance(val, decimal.Decimal):
+        return val.is_nan()
     else:
         res = 0
     return res
@@ -71,6 +74,8 @@ cpdef bint checknull(object val):
         return get_timedelta64_value(val) == NPY_NAT
     elif util.is_array(val):
         return False
+    elif isinstance(val, decimal.Decimal):
+        return val.is_nan()
     else:
         return val is None or util.is_nan(val)
 
diff --git a/pandas/tests/dtypes/test_missing.py b/pandas/tests/dtypes/test_missing.py
index 8f82db69a9213..0fa7388931ec2 100644
--- a/pandas/tests/dtypes/test_missing.py
+++ b/pandas/tests/dtypes/test_missing.py
@@ -1,5 +1,6 @@
 # -*- coding: utf-8 -*-
 
+import decimal
 import pytest
 from warnings import catch_warnings, simplefilter
 import numpy as np
@@ -248,6 +249,43 @@ def test_period(self):
         tm.assert_series_equal(isna(s), exp)
         tm.assert_series_equal(notna(s), ~exp)
 
+    def test_decimal(self):
+        # scalars
+        a = decimal.Decimal(1.0)
+        assert pd.isna(a) is False
+        assert pd.notna(a) is True
+
+        b = decimal.Decimal('NaN')
+        assert pd.isna(b) is True
+        assert pd.notna(b) is False
+
+        # array
+        arr = np.array([a, b])
+        expected = np.array([False, True])
+        result = pd.isna(arr)
+        tm.assert_numpy_array_equal(result, expected)
+
+        result = pd.notna(arr)
+        tm.assert_numpy_array_equal(result, ~expected)
+
+        # series
+        ser = pd.Series(arr)
+        expected = pd.Series(expected)
+        result = pd.isna(ser)
+        tm.assert_series_equal(result, expected)
+
+        result = pd.notna(ser)
+        tm.assert_series_equal(result, ~expected)
+
+        # index
+        idx = pd.Index(arr)
+        expected = np.array([False, True])
+        result = pd.isna(idx)
+        tm.assert_numpy_array_equal(result, expected)
+
+        result = pd.notna(idx)
+        tm.assert_numpy_array_equal(result, ~expected)
+
 
 def test_array_equivalent():
     assert array_equivalent(np.array([np.nan, np.nan]),

From 32bc3deac394a2c6fb9d5d792980eb73550ee51d Mon Sep 17 00:00:00 2001
From: Tom Augspurger <tom.w.augspurger@gmail.com>
Date: Tue, 6 Nov 2018 09:31:23 -0600
Subject: [PATCH 22/23] Revert "API: decimal nan is na"

This reverts commit 967c674a3ca65cf901a77fd910f33e9a1737850c.
---
 doc/source/whatsnew/v0.24.0.txt     |  1 -
 pandas/_libs/missing.pyx            |  5 ----
 pandas/tests/dtypes/test_missing.py | 38 -----------------------------
 3 files changed, 44 deletions(-)

diff --git a/doc/source/whatsnew/v0.24.0.txt b/doc/source/whatsnew/v0.24.0.txt
index c40403509dd69..f6b619defc435 100644
--- a/doc/source/whatsnew/v0.24.0.txt
+++ b/doc/source/whatsnew/v0.24.0.txt
@@ -1229,7 +1229,6 @@ Missing
 - Bug in :func:`Series.hasnans` that could be incorrectly cached and return incorrect answers if null elements are introduced after an initial call (:issue:`19700`)
 - :func:`Series.isin` now treats all NaN-floats as equal also for `np.object`-dtype. This behavior is consistent with the behavior for float64 (:issue:`22119`)
 - :func:`unique` no longer mangles NaN-floats and the ``NaT``-object for `np.object`-dtype, i.e. ``NaT`` is no longer coerced to a NaN-value and is treated as a different entity. (:issue:`22295`)
-- :meth:`isna` now considers ``decimal.Decimal('NaN')`` a missing value (:issue:`23284`).
 
 
 MultiIndex
diff --git a/pandas/_libs/missing.pyx b/pandas/_libs/missing.pyx
index 4fa96f652adaf..b8791359241ad 100644
--- a/pandas/_libs/missing.pyx
+++ b/pandas/_libs/missing.pyx
@@ -1,7 +1,6 @@
 # -*- coding: utf-8 -*-
 
 import cython
-import decimal
 from cython import Py_ssize_t
 
 import numpy as np
@@ -34,8 +33,6 @@ cdef inline bint _check_all_nulls(object val):
         res = get_datetime64_value(val) == NPY_NAT
     elif util.is_timedelta64_object(val):
         res = get_timedelta64_value(val) == NPY_NAT
-    elif isinstance(val, decimal.Decimal):
-        return val.is_nan()
     else:
         res = 0
     return res
@@ -74,8 +71,6 @@ cpdef bint checknull(object val):
         return get_timedelta64_value(val) == NPY_NAT
     elif util.is_array(val):
         return False
-    elif isinstance(val, decimal.Decimal):
-        return val.is_nan()
     else:
         return val is None or util.is_nan(val)
 
diff --git a/pandas/tests/dtypes/test_missing.py b/pandas/tests/dtypes/test_missing.py
index 0fa7388931ec2..8f82db69a9213 100644
--- a/pandas/tests/dtypes/test_missing.py
+++ b/pandas/tests/dtypes/test_missing.py
@@ -1,6 +1,5 @@
 # -*- coding: utf-8 -*-
 
-import decimal
 import pytest
 from warnings import catch_warnings, simplefilter
 import numpy as np
@@ -249,43 +248,6 @@ def test_period(self):
         tm.assert_series_equal(isna(s), exp)
         tm.assert_series_equal(notna(s), ~exp)
 
-    def test_decimal(self):
-        # scalars
-        a = decimal.Decimal(1.0)
-        assert pd.isna(a) is False
-        assert pd.notna(a) is True
-
-        b = decimal.Decimal('NaN')
-        assert pd.isna(b) is True
-        assert pd.notna(b) is False
-
-        # array
-        arr = np.array([a, b])
-        expected = np.array([False, True])
-        result = pd.isna(arr)
-        tm.assert_numpy_array_equal(result, expected)
-
-        result = pd.notna(arr)
-        tm.assert_numpy_array_equal(result, ~expected)
-
-        # series
-        ser = pd.Series(arr)
-        expected = pd.Series(expected)
-        result = pd.isna(ser)
-        tm.assert_series_equal(result, expected)
-
-        result = pd.notna(ser)
-        tm.assert_series_equal(result, ~expected)
-
-        # index
-        idx = pd.Index(arr)
-        expected = np.array([False, True])
-        result = pd.isna(idx)
-        tm.assert_numpy_array_equal(result, expected)
-
-        result = pd.notna(idx)
-        tm.assert_numpy_array_equal(result, ~expected)
-
 
 def test_array_equivalent():
     assert array_equivalent(np.array([np.nan, np.nan]),

From 56e5f2fc31669e6708ffb15340da91d6e9e696e3 Mon Sep 17 00:00:00 2001
From: Tom Augspurger <tom.w.augspurger@gmail.com>
Date: Tue, 6 Nov 2018 11:10:28 -0600
Subject: [PATCH 23/23] Fixed sparse test

---
 pandas/tests/sparse/test_pivot.py | 1 +
 1 file changed, 1 insertion(+)

diff --git a/pandas/tests/sparse/test_pivot.py b/pandas/tests/sparse/test_pivot.py
index e7eba63e4e0b3..0e71048f51177 100644
--- a/pandas/tests/sparse/test_pivot.py
+++ b/pandas/tests/sparse/test_pivot.py
@@ -47,4 +47,5 @@ def test_pivot_table_multi(self):
                                     values=['D', 'E'])
         res_dense = pd.pivot_table(self.dense, index='A', columns='B',
                                    values=['D', 'E'])
+        res_dense = res_dense.apply(lambda x: x.astype("Sparse[float64]"))
         tm.assert_frame_equal(res_sparse, res_dense)