diff --git a/doc/source/whatsnew/v0.18.1.txt b/doc/source/whatsnew/v0.18.1.txt index dbe446f0a7b4f..7e01690958457 100644 --- a/doc/source/whatsnew/v0.18.1.txt +++ b/doc/source/whatsnew/v0.18.1.txt @@ -89,3 +89,6 @@ Bug Fixes ~~~~~~~~~ - Bug in ``value_counts`` when ``normalize=True`` and ``dropna=True`` where nulls still contributed to the normalized count (:issue:`12558`) + +- Bug in ``CategoricalIndex.get_loc`` returns different result from + normal ``Index`` (:issue:`12531`) diff --git a/pandas/indexes/category.py b/pandas/indexes/category.py index 4ead02e5bd022..cc25b5fc6fb8c 100644 --- a/pandas/indexes/category.py +++ b/pandas/indexes/category.py @@ -287,11 +287,7 @@ def get_loc(self, key, method=None): codes = self.categories.get_loc(key) if (codes == -1): raise KeyError(key) - indexer, _ = self._engine.get_indexer_non_unique(np.array([codes])) - if (indexer == -1).any(): - raise KeyError(key) - - return indexer + return self._engine.get_loc(codes) def _can_reindex(self, indexer): """ always allow reindexing """ diff --git a/pandas/tests/indexes/test_category.py b/pandas/tests/indexes/test_category.py index 78016c0f0b5f7..d929c5df99e4a 100644 --- a/pandas/tests/indexes/test_category.py +++ b/pandas/tests/indexes/test_category.py @@ -363,6 +363,50 @@ def test_get_indexer(self): self.assertRaises(NotImplementedError, lambda: idx2.get_indexer(idx1, method='nearest')) + def test_get_loc(self): + # GH 12531 + cidx1 = CategoricalIndex(list('abcde'), categories=list('edabc')) + idx1 = Index(list('abcde')) + self.assertEqual(cidx1.get_loc('a'), idx1.get_loc('a')) + self.assertEqual(cidx1.get_loc('e'), idx1.get_loc('e')) + + for i in [cidx1, idx1]: + with tm.assertRaises(KeyError): + i.get_loc('NOT-EXIST') + + # non-unique + cidx2 = CategoricalIndex(list('aacded'), categories=list('edabc')) + idx2 = Index(list('aacded')) + # results in bool array + res = cidx2.get_loc('d') + self.assert_numpy_array_equal(res, idx2.get_loc('d')) + self.assert_numpy_array_equal(res, np.array([False, False, False, + True, False, True])) + # unique element results in scalar + res = cidx2.get_loc('e') + self.assertEqual(res, idx2.get_loc('e')) + self.assertEqual(res, 4) + + for i in [cidx2, idx2]: + with tm.assertRaises(KeyError): + i.get_loc('NOT-EXIST') + + # non-unique, slicable + cidx3 = CategoricalIndex(list('aabbb'), categories=list('abc')) + idx3 = Index(list('aabbb')) + # results in slice + res = cidx3.get_loc('a') + self.assertEqual(res, idx3.get_loc('a')) + self.assertEqual(res, slice(0, 2, None)) + + res = cidx3.get_loc('b') + self.assertEqual(res, idx3.get_loc('b')) + self.assertEqual(res, slice(2, 5, None)) + + for i in [cidx3, idx3]: + with tm.assertRaises(KeyError): + i.get_loc('c') + def test_repr_roundtrip(self): ci = CategoricalIndex(['a', 'b'], categories=['a', 'b'], ordered=True) diff --git a/pandas/tests/indexing/test_categorical.py b/pandas/tests/indexing/test_categorical.py index 4e31fb350f6ee..5abff801fccec 100644 --- a/pandas/tests/indexing/test_categorical.py +++ b/pandas/tests/indexing/test_categorical.py @@ -180,6 +180,50 @@ def test_loc_listlike_dtypes(self): 'that are in the categories'): df.loc[['a', 'x']] + def test_ix_categorical_index(self): + df = pd.DataFrame(np.random.randn(3, 3), + index=list('ABC'), columns=list('XYZ')) + cdf = df.copy() + cdf.index = pd.CategoricalIndex(df.index) + cdf.columns = pd.CategoricalIndex(df.columns) + + expect = pd.Series(df.ix['A', :], index=cdf.columns, name='A') + assert_series_equal(cdf.ix['A', :], expect) + + expect = pd.Series(df.ix[:, 'X'], index=cdf.index, name='X') + assert_series_equal(cdf.ix[:, 'X'], expect) + + expect = pd.DataFrame(df.ix[['A', 'B'], :], columns=cdf.columns, + index=pd.CategoricalIndex(list('AB'))) + assert_frame_equal(cdf.ix[['A', 'B'], :], expect) + + expect = pd.DataFrame(df.ix[:, ['X', 'Y']], index=cdf.index, + columns=pd.CategoricalIndex(list('XY'))) + assert_frame_equal(cdf.ix[:, ['X', 'Y']], expect) + + # non-unique + df = pd.DataFrame(np.random.randn(3, 3), + index=list('ABA'), columns=list('XYX')) + cdf = df.copy() + cdf.index = pd.CategoricalIndex(df.index) + cdf.columns = pd.CategoricalIndex(df.columns) + + expect = pd.DataFrame(df.ix['A', :], columns=cdf.columns, + index=pd.CategoricalIndex(list('AA'))) + assert_frame_equal(cdf.ix['A', :], expect) + + expect = pd.DataFrame(df.ix[:, 'X'], index=cdf.index, + columns=pd.CategoricalIndex(list('XX'))) + assert_frame_equal(cdf.ix[:, 'X'], expect) + + expect = pd.DataFrame(df.ix[['A', 'B'], :], columns=cdf.columns, + index=pd.CategoricalIndex(list('AAB'))) + assert_frame_equal(cdf.ix[['A', 'B'], :], expect) + + expect = pd.DataFrame(df.ix[:, ['X', 'Y']], index=cdf.index, + columns=pd.CategoricalIndex(list('XXY'))) + assert_frame_equal(cdf.ix[:, ['X', 'Y']], expect) + def test_read_only_source(self): # GH 10043 rw_array = np.eye(10)