Skip to content

Commit f89a380

Browse files
committed
BUG: Fix wrong SparseBlock initialization in where method
BUG: Fix wrong SparseBlock initialization in quantile method BUG: Fix make_spase mask generation not to cast when dtype is object BUG: Add SparseArray.all method BUG: Add copy parameter to prevent reinterpret cast of sparse Revert and fix astype parameters BUG: Create SparseBlock.__init__ to set type information of SparseArray BUG: Override SparseBlock._can_hold_element Revert changes in Block.whare BUG: Override SparseBlock.make_block with fill_value argument BUG: Set fill_value and ndim parameter in make_block when generating SparseBlock from result BUG: Override SparseBlock._try_coerce_result to make result flatten and sparse BUG: Change form _can_hold_na to _can_hold_element for supporting non NA fill value BUG: Fix 1D check statement SparseDataFrame.where passes (1, n)-shape SparseBlock, but actual values is n-length SparseArray BUG: Adjust cond shape to SparseBlock SparseDataFrame.where passes (1, n)-shape SparseBlock and condition block to Block.where, but it compares n-length SparseArray held by the SparseBlock and (1, n)-shape condition block. BUG: Override SparseDataFrame.where method to set _default_fill_value
1 parent 96a5274 commit f89a380

File tree

3 files changed

+119
-16
lines changed

3 files changed

+119
-16
lines changed

pandas/core/internals.py

Lines changed: 106 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -29,6 +29,7 @@
2929
is_bool_dtype,
3030
is_object_dtype,
3131
is_datetimelike_v_numeric,
32+
is_complex_dtype,
3233
is_float_dtype, is_numeric_dtype,
3334
is_numeric_v_string_like, is_extension_type,
3435
is_list_like,
@@ -454,8 +455,11 @@ def make_a_block(nv, ref_loc):
454455
nv = _block_shape(nv, ndim=self.ndim)
455456
except (AttributeError, NotImplementedError):
456457
pass
458+
457459
block = self.make_block(values=nv,
458-
placement=ref_loc, fastpath=True)
460+
placement=ref_loc,
461+
fastpath=True)
462+
459463
return block
460464

461465
# ndim == 1
@@ -1020,7 +1024,7 @@ def f(m, v, i):
10201024

10211025
return [self.make_block(new_values, fastpath=True)]
10221026

1023-
def coerce_to_target_dtype(self, other):
1027+
def coerce_to_target_dtype(self, other, copy=False):
10241028
"""
10251029
coerce the current block to a dtype compat for other
10261030
we will return a block, possibly object, and not raise
@@ -1037,7 +1041,7 @@ def coerce_to_target_dtype(self, other):
10371041

10381042
if self.is_bool or is_object_dtype(dtype) or is_bool_dtype(dtype):
10391043
# we don't upcast to bool
1040-
return self.astype(object)
1044+
return self.astype(object, copy=copy)
10411045

10421046
elif ((self.is_float or self.is_complex) and
10431047
(is_integer_dtype(dtype) or is_float_dtype(dtype))):
@@ -1051,14 +1055,14 @@ def coerce_to_target_dtype(self, other):
10511055
# not a datetime
10521056
if not ((is_datetime64_dtype(dtype) or
10531057
is_datetime64tz_dtype(dtype)) and self.is_datetime):
1054-
return self.astype(object)
1058+
return self.astype(object, copy=copy)
10551059

10561060
# don't upcast timezone with different timezone or no timezone
10571061
mytz = getattr(self.dtype, 'tz', None)
10581062
othertz = getattr(dtype, 'tz', None)
10591063

10601064
if str(mytz) != str(othertz):
1061-
return self.astype(object)
1065+
return self.astype(object, copy=copy)
10621066

10631067
raise AssertionError("possible recursion in "
10641068
"coerce_to_target_dtype: {} {}".format(
@@ -1068,18 +1072,18 @@ def coerce_to_target_dtype(self, other):
10681072

10691073
# not a timedelta
10701074
if not (is_timedelta64_dtype(dtype) and self.is_timedelta):
1071-
return self.astype(object)
1075+
return self.astype(object, copy=copy)
10721076

10731077
raise AssertionError("possible recursion in "
10741078
"coerce_to_target_dtype: {} {}".format(
10751079
self, other))
10761080

10771081
try:
1078-
return self.astype(dtype)
1082+
return self.astype(dtype, copy=copy)
10791083
except (ValueError, TypeError):
10801084
pass
10811085

1082-
return self.astype(object)
1086+
return self.astype(object, copy=copy)
10831087

10841088
def interpolate(self, method='pad', axis=0, index=None, values=None,
10851089
inplace=False, limit=None, limit_direction='forward',
@@ -1440,6 +1444,11 @@ def where(self, other, cond, align=True, errors='raise',
14401444
if hasattr(other, 'reindex_axis'):
14411445
other = other.values
14421446

1447+
if is_scalar(other) or is_list_like(other):
1448+
fill_value = other
1449+
else:
1450+
fill_value = None
1451+
14431452
if hasattr(cond, 'reindex_axis'):
14441453
cond = cond.values
14451454

@@ -1452,6 +1461,9 @@ def where(self, other, cond, align=True, errors='raise',
14521461
if not hasattr(cond, 'shape'):
14531462
raise ValueError("where must have a condition that is ndarray "
14541463
"like")
1464+
else:
1465+
if self.is_sparse:
1466+
cond = cond.flatten()
14551467

14561468
# our where function
14571469
def func(cond, values, other):
@@ -1489,7 +1501,7 @@ def func(cond, values, other):
14891501
transpose=transpose)
14901502
return self._maybe_downcast(blocks, 'infer')
14911503

1492-
if self._can_hold_na or self.ndim == 1:
1504+
if self._can_hold_element(fill_value) or values.ndim == 1:
14931505

14941506
if transpose:
14951507
result = result.T
@@ -1498,7 +1510,12 @@ def func(cond, values, other):
14981510
if try_cast:
14991511
result = self._try_cast_result(result)
15001512

1501-
return self.make_block(result)
1513+
if isinstance(result, np.ndarray):
1514+
ndim = result.ndim
1515+
else:
1516+
ndim = None
1517+
1518+
return self.make_block(result, ndim=ndim, fill_value=fill_value)
15021519

15031520
# might need to separate out blocks
15041521
axis = cond.ndim - 1
@@ -1512,7 +1529,8 @@ def func(cond, values, other):
15121529
r = self._try_cast_result(result.take(m.nonzero()[0],
15131530
axis=axis))
15141531
result_blocks.append(
1515-
self.make_block(r.T, placement=self.mgr_locs[m]))
1532+
self.make_block_same_class(r.T,
1533+
placement=self.mgr_locs[m]))
15161534

15171535
return result_blocks
15181536

@@ -1832,6 +1850,7 @@ class FloatBlock(FloatOrComplexBlock):
18321850
is_float = True
18331851
_downcast_dtype = 'int64'
18341852

1853+
@classmethod
18351854
def _can_hold_element(self, element):
18361855
tipo = maybe_infer_dtype_type(element)
18371856
if tipo is not None:
@@ -1881,6 +1900,7 @@ class ComplexBlock(FloatOrComplexBlock):
18811900
__slots__ = ()
18821901
is_complex = True
18831902

1903+
@classmethod
18841904
def _can_hold_element(self, element):
18851905
tipo = maybe_infer_dtype_type(element)
18861906
if tipo is not None:
@@ -2042,6 +2062,7 @@ class BoolBlock(NumericBlock):
20422062
is_bool = True
20432063
_can_hold_na = False
20442064

2065+
@classmethod
20452066
def _can_hold_element(self, element):
20462067
tipo = maybe_infer_dtype_type(element)
20472068
if tipo is not None:
@@ -2751,11 +2772,63 @@ class SparseBlock(NonConsolidatableMixIn, Block):
27512772
is_sparse = True
27522773
is_numeric = True
27532774
_box_to_block_values = False
2754-
_can_hold_na = True
27552775
_ftype = 'sparse'
27562776
_holder = SparseArray
27572777
_concatenator = staticmethod(_concat._concat_sparse)
27582778

2779+
def __init__(self, values, placement, ndim=None, fastpath=False, **kwargs):
2780+
super(SparseBlock, self).__init__(values, placement,
2781+
ndim, fastpath,
2782+
**kwargs)
2783+
2784+
dtype = self.values.sp_values.dtype
2785+
2786+
if is_float_dtype(dtype):
2787+
self.is_float = True
2788+
self._can_hold_na = True
2789+
elif is_complex_dtype(dtype):
2790+
self.is_complex = True
2791+
self._can_hold_na = True
2792+
elif is_integer_dtype(dtype):
2793+
self.is_integer = True
2794+
self._can_hold_na = False
2795+
elif is_bool_dtype(dtype):
2796+
self.is_bool = True
2797+
self._can_hold_na = False
2798+
elif is_object_dtype(dtype):
2799+
self.is_object = True
2800+
self._can_hold_na = True
2801+
else:
2802+
self._can_hold_na = False
2803+
2804+
def _can_hold_element(self, element):
2805+
""" require the same dtype as ourselves """
2806+
dtype = self.values.sp_values.dtype
2807+
2808+
if is_bool_dtype(dtype):
2809+
return BoolBlock._can_hold_element(element)
2810+
elif is_integer_dtype(dtype):
2811+
if is_list_like(element):
2812+
element = np.array(element)
2813+
tipo = element.dtype.type
2814+
return (issubclass(tipo, np.integer) and
2815+
not issubclass(tipo,
2816+
(np.datetime64,
2817+
np.timedelta64)) and
2818+
dtype.itemsize >= element.dtype.itemsize)
2819+
return is_integer(element)
2820+
elif is_float_dtype(dtype):
2821+
return FloatBlock._can_hold_element(element)
2822+
elif is_complex_dtype(dtype):
2823+
return ComplexBlock._can_hold_element(element)
2824+
elif is_object_dtype(dtype):
2825+
return True
2826+
else:
2827+
return False
2828+
2829+
def coerce_to_target_dtype(self, other, copy=True):
2830+
return super(SparseBlock, self).coerce_to_target_dtype(other, copy)
2831+
27592832
@property
27602833
def shape(self):
27612834
return (len(self.mgr_locs), self.sp_index.length)
@@ -2816,6 +2889,20 @@ def copy(self, deep=True, mgr=None):
28162889
kind=self.kind, copy=deep,
28172890
placement=self.mgr_locs)
28182891

2892+
def make_block(self, values, placement=None,
2893+
ndim=None, fill_value=None, **kwargs):
2894+
"""
2895+
Create a new block, with type inference propagate any values that are
2896+
not specified
2897+
"""
2898+
if fill_value is not None and isinstance(values, SparseArray):
2899+
values = SparseArray(values.to_dense(), fill_value=fill_value,
2900+
kind=values.kind, dtype=values.dtype)
2901+
2902+
return super(SparseBlock, self).make_block(values, placement=placement,
2903+
ndim=ndim, fill_value=None,
2904+
**kwargs)
2905+
28192906
def make_block_same_class(self, values, placement, sparse_index=None,
28202907
kind=None, dtype=None, fill_value=None,
28212908
copy=False, fastpath=True, **kwargs):
@@ -2912,9 +2999,15 @@ def sparse_reindex(self, new_index):
29122999
return self.make_block_same_class(values, sparse_index=new_index,
29133000
placement=self.mgr_locs)
29143001

3002+
def _try_coerce_result(self, result):
3003+
""" reverse of try_coerce_args """
3004+
if isinstance(result, np.ndarray):
3005+
result = SparseArray(result.flatten(), kind=self.kind)
3006+
return result
3007+
29153008

29163009
def make_block(values, placement, klass=None, ndim=None, dtype=None,
2917-
fastpath=False):
3010+
fastpath=False, **kwargs):
29183011
if klass is None:
29193012
dtype = dtype or values.dtype
29203013
vtype = dtype.type

pandas/core/sparse/array.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -248,7 +248,7 @@ def _simple_new(cls, data, sp_index, fill_value):
248248
sp_index.ngaps > 0):
249249
# if float fill_value is being included in dense repr,
250250
# convert values to float
251-
data = data.astype(float)
251+
data = data.astype(float, copy=True)
252252

253253
result = data.view(cls)
254254

pandas/core/sparse/frame.py

Lines changed: 12 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -321,8 +321,9 @@ def _apply_columns(self, func):
321321
data=new_data, index=self.index, columns=self.columns,
322322
default_fill_value=self.default_fill_value).__finalize__(self)
323323

324-
def astype(self, dtype):
325-
return self._apply_columns(lambda x: x.astype(dtype))
324+
def astype(self, dtype, copy=True, errors='raise', **kwargs):
325+
return self._apply_columns(lambda x: x.astype(dtype, copy,
326+
errors, **kwargs))
326327

327328
def copy(self, deep=True):
328329
"""
@@ -333,6 +334,15 @@ def copy(self, deep=True):
333334
result._default_kind = self._default_kind
334335
return result
335336

337+
def where(self, cond, other=np.nan, inplace=False, axis=None, level=None,
338+
try_cast=False, raise_on_error=True):
339+
result = super(SparseDataFrame, self).where(cond, other,
340+
inplace, axis,
341+
level, try_cast,
342+
raise_on_error)
343+
result._default_fill_value = other
344+
return result
345+
336346
@property
337347
def default_fill_value(self):
338348
return self._default_fill_value

0 commit comments

Comments
 (0)