Skip to content

BUG: GH3216 Upcast when needed to DataFrame when setitem with indexer #3219

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 1 commit into from
Mar 31, 2013
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
7 changes: 6 additions & 1 deletion RELEASE.rst
Original file line number Diff line number Diff line change
Expand Up @@ -122,6 +122,8 @@ pandas 0.11.0
- Handle "ragged" CSV files missing trailing delimiters in rows with missing
fields when also providing explicit list of column names (so the parser
knows how many columns to expect in the result) (GH2981_)
- On a mixed DataFrame, allow setting with indexers with ndarray/DataFrame
on rhs (GH3216_)

**API Changes**

Expand Down Expand Up @@ -249,9 +251,11 @@ pandas 0.11.0
- Add comparison operators to Period object (GH2781_)
- Fix bug when concatenating two Series into a DataFrame when they have the
same name (GH2797_)
- fix automatic color cycling when plotting consecutive timeseries
- Fix automatic color cycling when plotting consecutive timeseries
without color arguments (GH2816_)
- fixed bug in the pickling of PeriodIndex (GH2891_)
- Upcast/split blocks when needed in a mixed DataFrame when setitem
with an indexer (GH3216_)

.. _GH622: https://github.com/pydata/pandas/issues/622
.. _GH797: https://github.com/pydata/pandas/issues/797
Expand Down Expand Up @@ -340,6 +344,7 @@ pandas 0.11.0
.. _GH2751: https://github.com/pydata/pandas/issues/2751
.. _GH2747: https://github.com/pydata/pandas/issues/2747
.. _GH2816: https://github.com/pydata/pandas/issues/2816
.. _GH3216: https://github.com/pydata/pandas/issues/2816

pandas 0.10.1
=============
Expand Down
35 changes: 34 additions & 1 deletion pandas/core/common.py
Original file line number Diff line number Diff line change
Expand Up @@ -694,6 +694,11 @@ def _maybe_promote(dtype, fill_value=np.nan):
if issubclass(fill_value.dtype.type, (np.datetime64,np.timedelta64)):
fill_value = tslib.iNaT
else:

# we need to change to object type as our
# fill_value is of object type
if fill_value.dtype == np.object_:
dtype = np.dtype(np.object_)
fill_value = np.nan

# returns tuple of (dtype, fill_value)
Expand Down Expand Up @@ -763,7 +768,7 @@ def changeit():
if change is not None:
change.dtype = r.dtype
change[:] = r

return r, True

# we want to decide whether putmask will work
Expand Down Expand Up @@ -792,6 +797,34 @@ def changeit():

return result, False

def _maybe_upcast_indexer(result, indexer, other, dtype=None):
""" a safe version of setitem that (potentially upcasts the result
return the result and a changed flag
"""

def changeit():
# our type is wrong here, need to upcast
r, fill_value = _maybe_upcast(result, fill_value=other, dtype=dtype, copy=True)
try:
r[indexer] = other
except:

# if we hit this then we still have an incompatible type
r[indexer] = fill_value

return r, True

new_dtype, fill_value = _maybe_promote(result.dtype,other)
if new_dtype != result.dtype:
return changeit()

try:
result[indexer] = other
except:
return changeit()

return result, False

def _maybe_upcast(values, fill_value=np.nan, dtype=None, copy=False):
""" provide explicty type promotion and coercion

Expand Down
64 changes: 47 additions & 17 deletions pandas/core/indexing.py
Original file line number Diff line number Diff line change
Expand Up @@ -119,24 +119,54 @@ def _setitem_with_indexer(self, indexer, value):
plane_indexer = indexer[:het_axis] + indexer[het_axis + 1:]
item_labels = self.obj._get_axis(het_axis)

if isinstance(value, (np.ndarray, DataFrame)) and value.ndim > 1:
raise ValueError('Setting mixed-type DataFrames with '
'array/DataFrame pieces not yet supported')
def setter(item, v):
data = self.obj[item]
values = data.values
if np.prod(values.shape):
result, changed = com._maybe_upcast_indexer(values,plane_indexer,v,dtype=getattr(data,'dtype',None))
if changed:
self.obj[item] = result

try:
for item in item_labels[het_idx]:
data = self.obj[item]
values = data.values
if np.prod(values.shape):
value = com._possibly_cast_to_datetime(
value, getattr(data, 'dtype', None))
values[plane_indexer] = value
except ValueError:
for item, v in zip(item_labels[het_idx], value):
data = self.obj[item]
values = data.values
if np.prod(values.shape):
values[plane_indexer] = v
labels = item_labels[het_idx]

if _is_list_like(value):

# we have an equal len Frame
if isinstance(value, DataFrame) and value.ndim > 1:

for item in labels:

# align to
if item in value:
v = value[item]
v = v.reindex(self.obj[item].reindex(v.index).dropna().index)
setter(item, v.values)
else:
setter(item, np.nan)

# we have an equal len ndarray
elif isinstance(value, np.ndarray) and value.ndim > 1:
if len(labels) != len(value):
raise ValueError('Must have equal len keys and value when'
' setting with an ndarray')

for i, item in enumerate(labels):
setter(item, value[:,i])

# we have an equal len list/ndarray
elif len(labels) == 1 and len(self.obj[labels[0]]) == len(value):
setter(labels[0], value)

# per label values
else:

for item, v in zip(labels, value):
setter(item, v)
else:

# scalar
for item in labels:
setter(item, value)

else:
if isinstance(indexer, tuple):
Expand Down
2 changes: 1 addition & 1 deletion pandas/core/series.py
Original file line number Diff line number Diff line change
Expand Up @@ -2065,7 +2065,7 @@ def update(self, other):
"""
other = other.reindex_like(self)
mask = notnull(other)
np.putmask(self.values, mask, other.values)
com._maybe_upcast_putmask(self.values,mask,other,change=self.values)

#----------------------------------------------------------------------
# Reindexing, sorting
Expand Down
49 changes: 45 additions & 4 deletions pandas/tests/test_frame.py
Original file line number Diff line number Diff line change
Expand Up @@ -1275,20 +1275,61 @@ def test_setitem_single_column_mixed_datetime(self):
df.ix['d', :] = nan
self.assert_(com.isnull(df.ix['c', :]).all() == False)

# as of GH 3216 this will now work!
# try to set with a list like item
self.assertRaises(
Exception, df.ix.__setitem__, ('d', 'timestamp'), [nan])
#self.assertRaises(
# Exception, df.ix.__setitem__, ('d', 'timestamp'), [nan])

def test_setitem_frame(self):
piece = self.frame.ix[:2, ['A', 'B']]
self.frame.ix[-2:, ['A', 'B']] = piece.values
assert_almost_equal(self.frame.ix[-2:, ['A', 'B']].values,
piece.values)

# GH 3216

# already aligned
f = self.mixed_frame.copy()
piece = DataFrame([[ 1, 2], [3, 4]], index=f.index[0:2],columns=['A', 'B'])
key = (slice(None,2), ['A', 'B'])
f.ix[key] = piece
assert_almost_equal(f.ix[0:2, ['A', 'B']].values,
piece.values)

# rows unaligned
f = self.mixed_frame.copy()
piece = DataFrame([[ 1, 2 ], [3, 4], [5, 6], [7, 8]], index=list(f.index[0:2]) + ['foo','bar'],columns=['A', 'B'])
key = (slice(None,2), ['A', 'B'])
f.ix[key] = piece
assert_almost_equal(f.ix[0:2:, ['A', 'B']].values,
piece.values[0:2])

# key is unaligned with values
f = self.mixed_frame.copy()
piece = f.ix[:2, ['A']]
key = (slice(-2, None), ['A', 'B'])
f.ix[key] = piece
piece['B'] = np.nan
assert_almost_equal(f.ix[-2:, ['A', 'B']].values,
piece.values)

# ndarray
f = self.mixed_frame.copy()
piece = self.mixed_frame.ix[:2, ['A', 'B']]
f = self.mixed_frame.ix.__setitem__
key = (slice(-2, None), ['A', 'B'])
self.assertRaises(ValueError, f, key, piece)
f.ix[key] = piece.values
assert_almost_equal(f.ix[-2:, ['A', 'B']].values,
piece.values)


# needs upcasting
df = DataFrame([[1,2,'foo'],[3,4,'bar']],columns=['A','B','C'])
df2 = df.copy()
df2.ix[:,['A','B']] = df.ix[:,['A','B']]+0.5
expected = df.reindex(columns=['A','B'])
expected += 0.5
expected['C'] = df['C']
assert_frame_equal(df2, expected)

def test_setitem_frame_align(self):
piece = self.frame.ix[:2, ['A', 'B']]
Expand Down
12 changes: 12 additions & 0 deletions pandas/tests/test_indexing.py
Original file line number Diff line number Diff line change
Expand Up @@ -724,6 +724,18 @@ def test_xs_multiindex(self):
expected = df.iloc[:,0:2].loc[:,'a']
assert_frame_equal(result,expected)

def test_setitem_dtype_upcast(self):

# GH3216
df = DataFrame([{"a": 1}, {"a": 3, "b": 2}])
df['c'] = np.nan
self.assert_(df['c'].dtype == np.float64)

df.ix[0,'c'] = 'foo'
expected = DataFrame([{"a": 1, "c" : 'foo'}, {"a": 3, "b": 2, "c" : np.nan}])
assert_frame_equal(df,expected)


if __name__ == '__main__':
import nose
nose.runmodule(argv=[__file__, '-vvs', '-x', '--pdb', '--pdb-failure'],
Expand Down
7 changes: 7 additions & 0 deletions pandas/tests/test_series.py
Original file line number Diff line number Diff line change
Expand Up @@ -2314,6 +2314,13 @@ def test_update(self):
expected = Series([1.5, 3.5, 3., 5., np.nan])
assert_series_equal(s, expected)

# GH 3217
df = DataFrame([{"a": 1}, {"a": 3, "b": 2}])
df['c'] = np.nan

# this will fail as long as series is a sub-class of ndarray
##### df['c'].update(Series(['foo'],index=[0])) #####

def test_corr(self):
_skip_if_no_scipy()

Expand Down