diff --git a/doc/source/v0.15.0.txt b/doc/source/v0.15.0.txt index e23f3ea6ff53a..1d9acadb68e58 100644 --- a/doc/source/v0.15.0.txt +++ b/doc/source/v0.15.0.txt @@ -1012,3 +1012,4 @@ Bug Fixes - Bug in masked series assignment where mismatching types would break alignment (:issue:`8387`) - Bug in NDFrame.equals gives false negatives with dtype=object (:issue:`8437`) - Bug in assignment with indexer where type diversity would break alignment (:issue:`8258`) +- Bug in ``NDFrame.loc`` indexing when row/column names were lost when target was a list/ndarray (:issue:`6552`) diff --git a/pandas/core/frame.py b/pandas/core/frame.py index 2194e8c36a9e3..5cfb2affe5a7b 100644 --- a/pandas/core/frame.py +++ b/pandas/core/frame.py @@ -2260,8 +2260,7 @@ def _reindex_axes(self, axes, level, limit, method, fill_value, copy): def _reindex_index(self, new_index, method, copy, level, fill_value=NA, limit=None): new_index, indexer = self.index.reindex(new_index, method, level, - limit=limit, - copy_if_needed=True) + limit=limit) return self._reindex_with_indexers({0: [new_index, indexer]}, copy=copy, fill_value=fill_value, allow_dups=False) @@ -2269,8 +2268,7 @@ def _reindex_index(self, new_index, method, copy, level, fill_value=NA, def _reindex_columns(self, new_columns, copy, level, fill_value=NA, limit=None): new_columns, indexer = self.columns.reindex(new_columns, level=level, - limit=limit, - copy_if_needed=True) + limit=limit) return self._reindex_with_indexers({1: [new_columns, indexer]}, copy=copy, fill_value=fill_value, allow_dups=False) diff --git a/pandas/core/generic.py b/pandas/core/generic.py index 9f9b543f0fa7d..f2ff44bb5214c 100644 --- a/pandas/core/generic.py +++ b/pandas/core/generic.py @@ -1776,8 +1776,8 @@ def reindex_axis(self, labels, axis=0, method=None, level=None, copy=True, axis_name = self._get_axis_name(axis) axis_values = self._get_axis(axis_name) method = com._clean_fill_method(method) - new_index, indexer = axis_values.reindex( - labels, method, level, limit=limit, copy_if_needed=True) + new_index, indexer = axis_values.reindex(labels, method, level, + limit=limit) return self._reindex_with_indexers( {axis: [new_index, indexer]}, method=method, fill_value=fill_value, limit=limit, copy=copy) diff --git a/pandas/core/index.py b/pandas/core/index.py index da8edf13ff18f..2048081573308 100644 --- a/pandas/core/index.py +++ b/pandas/core/index.py @@ -1578,17 +1578,22 @@ def _get_method(self, method): } return aliases.get(method, method) - def reindex(self, target, method=None, level=None, limit=None, - copy_if_needed=False): + def reindex(self, target, method=None, level=None, limit=None): """ - For Index, simply returns the new index and the results of - get_indexer. Provided here to enable an interface that is amenable for - subclasses of Index whose internals are different (like MultiIndex) + Create index with target's values (move/add/delete values as necessary) Returns ------- - (new_index, indexer, mask) : tuple + new_index : pd.Index + Resulting index + indexer : np.ndarray or None + Indices of output values in original index + """ + # GH6552: preserve names when reindexing to non-named target + # (i.e. neither Index nor Series). + preserve_names = not hasattr(target, 'name') + target = _ensure_index(target) if level is not None: if method is not None: @@ -1596,16 +1601,8 @@ def reindex(self, target, method=None, level=None, limit=None, _, indexer, _ = self._join_level(target, level, how='right', return_indexers=True) else: - if self.equals(target): indexer = None - - # to avoid aliasing an existing index - if (copy_if_needed and target.name != self.name and - self.name is not None): - if target.name is None: - target = self.copy() - else: if self.is_unique: indexer = self.get_indexer(target, method=method, @@ -1616,6 +1613,10 @@ def reindex(self, target, method=None, level=None, limit=None, "with a method or limit") indexer, missing = self.get_indexer_non_unique(target) + if preserve_names and target.nlevels == 1 and target.name != self.name: + target = target.copy() + target.name = self.name + return target, indexer def join(self, other, how='left', level=None, return_indexers=False): @@ -3686,17 +3687,21 @@ def get_indexer(self, target, method=None, limit=None): return com._ensure_platform_int(indexer) - def reindex(self, target, method=None, level=None, limit=None, - copy_if_needed=False): + def reindex(self, target, method=None, level=None, limit=None): """ - Performs any necessary conversion on the input index and calls - get_indexer. This method is here so MultiIndex and an Index of - like-labeled tuples can play nice together + Create index with target's values (move/add/delete values as necessary) Returns ------- - (new_index, indexer, mask) : (MultiIndex, ndarray, ndarray) + new_index : pd.MultiIndex + Resulting index + indexer : np.ndarray or None + Indices of output values in original index + """ + # GH6552: preserve names when reindexing to non-named target + # (i.e. neither Index nor Series). + preserve_names = not hasattr(target, 'names') if level is not None: if method is not None: @@ -3724,6 +3729,11 @@ def reindex(self, target, method=None, level=None, limit=None, # hopefully? target = MultiIndex.from_tuples(target) + if (preserve_names and target.nlevels == self.nlevels and + target.names != self.names): + target = target.copy(deep=False) + target.names = self.names + return target, indexer @cache_readonly diff --git a/pandas/core/internals.py b/pandas/core/internals.py index a94e627952b75..c88d799a54fed 100644 --- a/pandas/core/internals.py +++ b/pandas/core/internals.py @@ -3088,7 +3088,7 @@ def reindex_axis(self, new_index, axis, method=None, limit=None, """ new_index = _ensure_index(new_index) new_index, indexer = self.axes[axis].reindex( - new_index, method=method, limit=limit, copy_if_needed=True) + new_index, method=method, limit=limit) return self.reindex_indexer(new_index, indexer, axis=axis, fill_value=fill_value, copy=copy) diff --git a/pandas/tests/test_index.py b/pandas/tests/test_index.py index c86797c7ac90c..ec9193d67151b 100644 --- a/pandas/tests/test_index.py +++ b/pandas/tests/test_index.py @@ -1019,6 +1019,36 @@ def test_nan_first_take_datetime(self): exp = Index([idx[-1], idx[0], idx[1]]) tm.assert_index_equal(res, exp) + def test_reindex_preserves_name_if_target_is_list_or_ndarray(self): + # GH6552 + idx = pd.Index([0, 1, 2]) + + dt_idx = pd.date_range('20130101', periods=3) + + idx.name = None + self.assertEqual(idx.reindex([])[0].name, None) + self.assertEqual(idx.reindex(np.array([]))[0].name, None) + self.assertEqual(idx.reindex(idx.tolist())[0].name, None) + self.assertEqual(idx.reindex(idx.tolist()[:-1])[0].name, None) + self.assertEqual(idx.reindex(idx.values)[0].name, None) + self.assertEqual(idx.reindex(idx.values[:-1])[0].name, None) + + # Must preserve name even if dtype changes. + self.assertEqual(idx.reindex(dt_idx.values)[0].name, None) + self.assertEqual(idx.reindex(dt_idx.tolist())[0].name, None) + + idx.name = 'foobar' + self.assertEqual(idx.reindex([])[0].name, 'foobar') + self.assertEqual(idx.reindex(np.array([]))[0].name, 'foobar') + self.assertEqual(idx.reindex(idx.tolist())[0].name, 'foobar') + self.assertEqual(idx.reindex(idx.tolist()[:-1])[0].name, 'foobar') + self.assertEqual(idx.reindex(idx.values)[0].name, 'foobar') + self.assertEqual(idx.reindex(idx.values[:-1])[0].name, 'foobar') + + # Must preserve name even if dtype changes. + self.assertEqual(idx.reindex(dt_idx.values)[0].name, 'foobar') + self.assertEqual(idx.reindex(dt_idx.tolist())[0].name, 'foobar') + class Numeric(Base): @@ -3267,6 +3297,30 @@ def test_isin_level_kwarg(self): self.assertRaises(KeyError, idx.isin, vals_1, level='C') + def test_reindex_preserves_names_when_target_is_list_or_ndarray(self): + # GH6552 + idx = self.index.copy() + target = idx.copy() + idx.names = target.names = [None, None] + + other_dtype = pd.MultiIndex.from_product([[1, 2], [3, 4]]) + + # list & ndarray cases + self.assertEqual(idx.reindex([])[0].names, [None, None]) + self.assertEqual(idx.reindex(np.array([]))[0].names, [None, None]) + self.assertEqual(idx.reindex(target.tolist())[0].names, [None, None]) + self.assertEqual(idx.reindex(target.values)[0].names, [None, None]) + self.assertEqual(idx.reindex(other_dtype.tolist())[0].names, [None, None]) + self.assertEqual(idx.reindex(other_dtype.values)[0].names, [None, None]) + + idx.names = ['foo', 'bar'] + self.assertEqual(idx.reindex([])[0].names, ['foo', 'bar']) + self.assertEqual(idx.reindex(np.array([]))[0].names, ['foo', 'bar']) + self.assertEqual(idx.reindex(target.tolist())[0].names, ['foo', 'bar']) + self.assertEqual(idx.reindex(target.values)[0].names, ['foo', 'bar']) + self.assertEqual(idx.reindex(other_dtype.tolist())[0].names, ['foo', 'bar']) + self.assertEqual(idx.reindex(other_dtype.values)[0].names, ['foo', 'bar']) + def test_get_combined_index(): from pandas.core.index import _get_combined_index diff --git a/pandas/tests/test_indexing.py b/pandas/tests/test_indexing.py index 7213aaafd1376..97ebca39aae5a 100644 --- a/pandas/tests/test_indexing.py +++ b/pandas/tests/test_indexing.py @@ -3836,9 +3836,7 @@ def test_iloc_empty_list_indexer_is_ok(self): assert_frame_equal(df.iloc[[],:], df.iloc[:0, :]) # horizontal empty assert_frame_equal(df.iloc[[]], df.iloc[:0, :]) # horizontal empty - # FIXME: fix loc & xs def test_loc_empty_list_indexer_is_ok(self): - raise nose.SkipTest('loc discards columns names') from pandas.util.testing import makeCustomDataframe as mkdf df = mkdf(5, 2) assert_frame_equal(df.loc[:,[]], df.iloc[:, :0]) # vertical empty @@ -3846,7 +3844,6 @@ def test_loc_empty_list_indexer_is_ok(self): assert_frame_equal(df.loc[[]], df.iloc[:0, :]) # horizontal empty def test_ix_empty_list_indexer_is_ok(self): - raise nose.SkipTest('ix discards columns names') from pandas.util.testing import makeCustomDataframe as mkdf df = mkdf(5, 2) assert_frame_equal(df.ix[:,[]], df.iloc[:, :0]) # vertical empty