Skip to content

Commit cc1a791

Browse files
committed
Merge pull request #5849 from jreback/dup_selection
BUG: Bug in selection with missing values via .ix from a duplicate indexed DataFrame failing
2 parents 96b1cad + 10c0943 commit cc1a791

File tree

3 files changed

+63
-27
lines changed

3 files changed

+63
-27
lines changed

doc/source/release.rst

+1
Original file line numberDiff line numberDiff line change
@@ -73,6 +73,7 @@ Bug Fixes
7373
~~~~~~~~~
7474
- Bug in Series replace with timestamp dict (:issue:`5797`)
7575
- read_csv/read_table now respects the `prefix` kwarg (:issue:`5732`).
76+
- Bug in selection with missing values via ``.ix`` from a duplicate indexed DataFrame failing (:issue:`5835`)
7677

7778
pandas 0.13.0
7879
-------------

pandas/core/internals.py

+54-27
Original file line numberDiff line numberDiff line change
@@ -3119,6 +3119,9 @@ def reindex_indexer(self, new_axis, indexer, axis=1, fill_value=None,
31193119
if not allow_dups and not self.axes[axis].is_unique:
31203120
raise ValueError("cannot reindex from a duplicate axis")
31213121

3122+
if not self.is_consolidated():
3123+
self = self.consolidate()
3124+
31223125
if axis == 0:
31233126
return self._reindex_indexer_items(new_axis, indexer, fill_value)
31243127

@@ -3140,38 +3143,62 @@ def _reindex_indexer_items(self, new_items, indexer, fill_value):
31403143
new_blocks = []
31413144
is_unique = new_items.is_unique
31423145

3146+
# we have duplicates in the items and what we are reindexing
3147+
if not is_unique and not self.items.is_unique:
3148+
3149+
rl = self._set_ref_locs(do_refs='force')
3150+
for i, idx in enumerate(indexer):
3151+
item = new_items.take([i])
3152+
if idx >= 0:
3153+
blk, lidx = rl[idx]
3154+
blk = make_block(_block_shape(blk.iget(lidx)), item,
3155+
new_items, ndim=self.ndim, fastpath=True,
3156+
placement=[i])
3157+
3158+
# a missing value
3159+
else:
3160+
blk = self._make_na_block(item,
3161+
new_items,
3162+
placement=[i],
3163+
fill_value=fill_value)
3164+
new_blocks.append(blk)
3165+
new_blocks = _consolidate(new_blocks, new_items)
3166+
3167+
31433168
# keep track of what items aren't found anywhere
3144-
l = np.arange(len(item_order))
3145-
mask = np.zeros(len(item_order), dtype=bool)
3146-
for blk in self.blocks:
3147-
blk_indexer = blk.items.get_indexer(item_order)
3148-
selector = blk_indexer != -1
3169+
else:
3170+
l = np.arange(len(item_order))
3171+
mask = np.zeros(len(item_order), dtype=bool)
31493172

3150-
# update with observed items
3151-
mask |= selector
3173+
for blk in self.blocks:
3174+
blk_indexer = blk.items.get_indexer(item_order)
3175+
selector = blk_indexer != -1
3176+
3177+
# update with observed items
3178+
mask |= selector
31523179

3153-
if not selector.any():
3154-
continue
3180+
if not selector.any():
3181+
continue
31553182

3156-
new_block_items = new_items.take(selector.nonzero()[0])
3157-
new_values = com.take_nd(blk.values, blk_indexer[selector], axis=0,
3158-
allow_fill=False)
3159-
placement = l[selector] if not is_unique else None
3160-
new_blocks.append(make_block(new_values,
3161-
new_block_items,
3183+
new_block_items = new_items.take(selector.nonzero()[0])
3184+
new_values = com.take_nd(blk.values, blk_indexer[selector], axis=0,
3185+
allow_fill=False)
3186+
placement = l[selector] if not is_unique else None
3187+
new_blocks.append(make_block(new_values,
3188+
new_block_items,
31623189
new_items,
3163-
placement=placement,
3164-
fastpath=True))
3165-
3166-
if not mask.all():
3167-
na_items = new_items[-mask]
3168-
placement = l[-mask] if not is_unique else None
3169-
na_block = self._make_na_block(na_items,
3170-
new_items,
3171-
placement=placement,
3172-
fill_value=fill_value)
3173-
new_blocks.append(na_block)
3174-
new_blocks = _consolidate(new_blocks, new_items)
3190+
placement=placement,
3191+
fastpath=True))
3192+
3193+
if not mask.all():
3194+
na_items = new_items[-mask]
3195+
placement = l[-mask] if not is_unique else None
3196+
na_block = self._make_na_block(na_items,
3197+
new_items,
3198+
placement=placement,
3199+
fill_value=fill_value)
3200+
new_blocks.append(na_block)
3201+
new_blocks = _consolidate(new_blocks, new_items)
31753202

31763203
return self.__class__(new_blocks, new_axes)
31773204

pandas/tests/test_indexing.py

+8
Original file line numberDiff line numberDiff line change
@@ -981,6 +981,14 @@ def test_dups_fancy_indexing(self):
981981
result = df.ix[['A','A','E']]
982982
assert_frame_equal(result, expected)
983983

984+
# GH 5835
985+
# dups on index and missing values
986+
df = DataFrame(np.random.randn(5,5),columns=['A','B','B','B','A'])
987+
988+
expected = pd.concat([df.ix[:,['A','B']],DataFrame(np.nan,columns=['C'],index=df.index)],axis=1)
989+
result = df.ix[:,['A','B','C']]
990+
assert_frame_equal(result, expected)
991+
984992
def test_indexing_mixed_frame_bug(self):
985993

986994
# GH3492

0 commit comments

Comments
 (0)