From 574facc76c898377a11956f1a7051de34965efbe Mon Sep 17 00:00:00 2001
From: jreback <jeff@reback.net>
Date: Thu, 9 Oct 2014 13:10:45 -0400
Subject: [PATCH] BUG/REGR: bool-like Indexes not properly coercing to object
 (GH8522)

---
 doc/source/v0.15.0.txt      |  2 +-
 pandas/core/base.py         |  2 +-
 pandas/core/index.py        | 18 ++++++++++------
 pandas/tests/test_base.py   | 42 +++++++++++++++++++++++++++++++------
 pandas/tests/test_index.py  |  8 +++++++
 pandas/tests/test_series.py |  9 +++++++-
 pandas/util/testing.py      |  7 +++++++
 7 files changed, 73 insertions(+), 15 deletions(-)

diff --git a/doc/source/v0.15.0.txt b/doc/source/v0.15.0.txt
index eec424f619bde..d972edeb2bbb3 100644
--- a/doc/source/v0.15.0.txt
+++ b/doc/source/v0.15.0.txt
@@ -642,7 +642,7 @@ Internal Refactoring
 
 In 0.15.0 ``Index`` has internally been refactored to no longer sub-class ``ndarray``
 but instead subclass ``PandasObject``, similarly to the rest of the pandas objects. This change allows very easy sub-classing and creation of new index types. This should be
-a transparent change with only very limited API implications (:issue:`5080`, :issue:`7439`, :issue:`7796`, :issue:`8024`, :issue:`8367`, :issue:`7997`)
+a transparent change with only very limited API implications (:issue:`5080`, :issue:`7439`, :issue:`7796`, :issue:`8024`, :issue:`8367`, :issue:`7997`, :issue:`8522`)
 
 - you may need to unpickle pandas version < 0.15.0 pickles using ``pd.read_pickle`` rather than ``pickle.load``. See :ref:`pickle docs <io.pickle>`
 - when plotting with a ``PeriodIndex``. The ``matplotlib`` internal axes will now be arrays of ``Period`` rather than a ``PeriodIndex``. (this is similar to how a ``DatetimeIndex`` passes arrays of ``datetimes`` now)
diff --git a/pandas/core/base.py b/pandas/core/base.py
index 794c05db082c7..5d6f39e1792c3 100644
--- a/pandas/core/base.py
+++ b/pandas/core/base.py
@@ -499,7 +499,7 @@ def searchsorted(self, key, side='left'):
     @Appender(_shared_docs['drop_duplicates'] % _indexops_doc_kwargs)
     def drop_duplicates(self, take_last=False, inplace=False):
         duplicated = self.duplicated(take_last=take_last)
-        result = self[~duplicated.values]
+        result = self[~(duplicated.values).astype(bool)]
         if inplace:
             return self._update_inplace(result)
         else:
diff --git a/pandas/core/index.py b/pandas/core/index.py
index 99f1682b133c3..f87b7e982b332 100644
--- a/pandas/core/index.py
+++ b/pandas/core/index.py
@@ -148,16 +148,16 @@ def __new__(cls, data=None, dtype=None, copy=False, name=None, fastpath=False,
                     data = np.array(data, dtype=dtype, copy=copy)
                 except TypeError:
                     pass
-            elif isinstance(data, PeriodIndex):
-                return PeriodIndex(data, copy=copy, name=name, **kwargs)
 
+            # maybe coerce to a sub-class
+            if isinstance(data, PeriodIndex):
+                return PeriodIndex(data, copy=copy, name=name, **kwargs)
             if issubclass(data.dtype.type, np.integer):
                 return Int64Index(data, copy=copy, dtype=dtype, name=name)
-            if issubclass(data.dtype.type, np.floating):
+            elif issubclass(data.dtype.type, np.floating):
                 return Float64Index(data, copy=copy, dtype=dtype, name=name)
-
-            if com.is_bool_dtype(data):
-                subarr = data
+            elif issubclass(data.dtype.type, np.bool) or com.is_bool_dtype(data):
+                subarr = data.astype('object')
             else:
                 subarr = com._asarray_tuplesafe(data, dtype=object)
 
@@ -583,6 +583,9 @@ def is_unique(self):
         """ return if the index has unique values """
         return self._engine.is_unique
 
+    def is_boolean(self):
+        return self.inferred_type in ['boolean']
+
     def is_integer(self):
         return self.inferred_type in ['integer']
 
@@ -592,6 +595,9 @@ def is_floating(self):
     def is_numeric(self):
         return self.inferred_type in ['integer', 'floating']
 
+    def is_object(self):
+        return self.dtype == np.object_
+
     def is_mixed(self):
         return 'mixed' in self.inferred_type
 
diff --git a/pandas/tests/test_base.py b/pandas/tests/test_base.py
index f508b8915da1c..814da043d0319 100644
--- a/pandas/tests/test_base.py
+++ b/pandas/tests/test_base.py
@@ -180,6 +180,7 @@ def f():
 
 class Ops(tm.TestCase):
     def setUp(self):
+        self.bool_index    = tm.makeBoolIndex(10)
         self.int_index     = tm.makeIntIndex(10)
         self.float_index   = tm.makeFloatIndex(10)
         self.dt_index      = tm.makeDateIndex(10)
@@ -189,14 +190,15 @@ def setUp(self):
 
         arr = np.random.randn(10)
         self.int_series    = Series(arr, index=self.int_index)
-        self.float_series  = Series(arr, index=self.int_index)
+        self.float_series  = Series(arr, index=self.float_index)
         self.dt_series     = Series(arr, index=self.dt_index)
         self.dt_tz_series  = self.dt_tz_index.to_series(keep_tz=True)
         self.period_series = Series(arr, index=self.period_index)
         self.string_series = Series(arr, index=self.string_index)
 
-        types = ['int','float','dt', 'dt_tz', 'period','string']
-        self.objs = [ getattr(self,"{0}_{1}".format(t,f)) for t in types for f in ['index','series'] ]
+        types = ['bool','int','float','dt', 'dt_tz', 'period','string']
+        fmts = [ "{0}_{1}".format(t,f) for t in types for f in ['index','series'] ]
+        self.objs = [ getattr(self,f) for f in fmts if getattr(self,f,None) is not None ]
 
     def check_ops_properties(self, props, filter=None, ignore_failures=False):
         for op in props:
@@ -340,6 +342,9 @@ def test_value_counts_unique_nunique(self):
                 # freq must be specified because repeat makes freq ambiguous
                 expected_index = o[::-1]
                 o = klass(np.repeat(values, range(1, len(o) + 1)), freq=o.freq)
+            # don't test boolean
+            elif isinstance(o,Index) and o.is_boolean():
+                continue
             elif isinstance(o, Index):
                 expected_index = values[::-1]
                 o = klass(np.repeat(values, range(1, len(o) + 1)))
@@ -366,6 +371,10 @@ def test_value_counts_unique_nunique(self):
                 klass = type(o)
                 values = o.values
 
+                if isinstance(o,Index) and o.is_boolean():
+                    # don't test boolean
+                    continue
+
                 if ((isinstance(o, Int64Index) and not isinstance(o,
                     (DatetimeIndex, PeriodIndex)))):
                     # skips int64 because it doesn't allow to include nan or None
@@ -537,7 +546,14 @@ def test_value_counts_inferred(self):
 
     def test_factorize(self):
         for o in self.objs:
-            exp_arr = np.array(range(len(o)))
+
+            if isinstance(o,Index) and o.is_boolean():
+                exp_arr = np.array([0,1] + [0] * 8)
+                exp_uniques = o
+                exp_uniques = Index([False,True])
+            else:
+                exp_arr = np.array(range(len(o)))
+                exp_uniques = o
             labels, uniques = o.factorize()
 
             self.assert_numpy_array_equal(labels, exp_arr)
@@ -545,16 +561,22 @@ def test_factorize(self):
                 expected = Index(o.values)
                 self.assert_numpy_array_equal(uniques, expected)
             else:
-                self.assertTrue(uniques.equals(o))
+                self.assertTrue(uniques.equals(exp_uniques))
 
         for o in self.objs:
+
+            # don't test boolean
+            if isinstance(o,Index) and o.is_boolean():
+                continue
+
             # sort by value, and create duplicates
             if isinstance(o, Series):
                 o.sort()
+                n = o.iloc[5:].append(o)
             else:
                 indexer = o.argsort()
                 o = o.take(indexer)
-            n = o[5:].append(o)
+                n = o[5:].append(o)
 
             exp_arr = np.array([5, 6, 7, 8, 9, 0, 1, 2, 3, 4, 5, 6, 7, 8, 9])
             labels, uniques = n.factorize(sort=True)
@@ -582,6 +604,14 @@ def test_duplicated_drop_duplicates(self):
         for original in self.objs:
 
             if isinstance(original, Index):
+
+                # special case
+                if original.is_boolean():
+                    result = original.drop_duplicates()
+                    expected = Index([False,True])
+                    tm.assert_index_equal(result, expected)
+                    continue
+
                 # original doesn't have duplicates
                 expected = Index([False] * len(original))
                 tm.assert_index_equal(original.duplicated(), expected)
diff --git a/pandas/tests/test_index.py b/pandas/tests/test_index.py
index 9984ad30612db..a8c4548f462ac 100644
--- a/pandas/tests/test_index.py
+++ b/pandas/tests/test_index.py
@@ -94,6 +94,7 @@ def setUp(self):
             dateIndex = tm.makeDateIndex(100),
             intIndex = tm.makeIntIndex(100),
             floatIndex = tm.makeFloatIndex(100),
+            boolIndex = Index([True,False]),
             empty = Index([]),
             tuples = MultiIndex.from_tuples(lzip(['foo', 'bar', 'baz'],
                                                  [1, 2, 3]))
@@ -732,6 +733,13 @@ def test_is_numeric(self):
         self.assertTrue(self.intIndex.is_numeric())
         self.assertTrue(self.floatIndex.is_numeric())
 
+    def test_is_object(self):
+        self.assertTrue(self.strIndex.is_object())
+        self.assertTrue(self.boolIndex.is_object())
+        self.assertFalse(self.intIndex.is_object())
+        self.assertFalse(self.dateIndex.is_object())
+        self.assertFalse(self.floatIndex.is_object())
+
     def test_is_all_dates(self):
         self.assertTrue(self.dateIndex.is_all_dates)
         self.assertFalse(self.strIndex.is_all_dates)
diff --git a/pandas/tests/test_series.py b/pandas/tests/test_series.py
index d3f7414289053..29bdb2c983d61 100644
--- a/pandas/tests/test_series.py
+++ b/pandas/tests/test_series.py
@@ -1222,7 +1222,7 @@ def test_getitem_dups(self):
         expected = Series([3,4],index=['C','C'],dtype=np.int64)
         result = s['C']
         assert_series_equal(result, expected)
-        
+
     def test_getitem_dataframe(self):
         rng = list(range(10))
         s   = pd.Series(10, index=rng)
@@ -1817,6 +1817,13 @@ def test_drop(self):
         # bad axis
         self.assertRaises(ValueError, s.drop, 'one', axis='columns')
 
+        # GH 8522
+        s = Series([2,3], index=[True, False])
+        self.assertTrue(s.index.is_object())
+        result = s.drop(True)
+        expected = Series([3],index=[False])
+        assert_series_equal(result,expected)
+
     def test_ix_setitem(self):
         inds = self.series.index[[3, 4, 7]]
 
diff --git a/pandas/util/testing.py b/pandas/util/testing.py
index 977d445f917a8..d8cc39908a31f 100644
--- a/pandas/util/testing.py
+++ b/pandas/util/testing.py
@@ -738,6 +738,13 @@ def makeStringIndex(k=10):
 def makeUnicodeIndex(k=10):
     return Index([randu(10) for _ in range(k)])
 
+def makeBoolIndex(k=10):
+    if k == 1:
+        return Index([True])
+    elif k == 2:
+        return Index([False,True])
+    return Index([False,True] + [False]*(k-2))
+
 def makeIntIndex(k=10):
     return Index(lrange(k))