From 4ca06ee85d671cb5b9c1baf846ae0544c74b7b53 Mon Sep 17 00:00:00 2001 From: Brock Mendel Date: Mon, 15 Jan 2018 22:29:22 -0800 Subject: [PATCH 1/6] remove unused fastpath kwarg from Blocks --- pandas/core/internals.py | 84 ++++++++++++++++++---------------------- 1 file changed, 37 insertions(+), 47 deletions(-) diff --git a/pandas/core/internals.py b/pandas/core/internals.py index 3c923133477df..8894b56eb1509 100644 --- a/pandas/core/internals.py +++ b/pandas/core/internals.py @@ -105,7 +105,7 @@ class Block(PandasObject): _holder = None _concatenator = staticmethod(np.concatenate) - def __init__(self, values, placement, ndim=None, fastpath=False): + def __init__(self, values, placement, ndim=None): if ndim is None: ndim = values.ndim elif values.ndim != ndim: @@ -216,13 +216,12 @@ def make_block_scalar(self, values, **kwargs): """ return ScalarBlock(values) - def make_block_same_class(self, values, placement=None, fastpath=True, - **kwargs): + def make_block_same_class(self, values, placement=None, **kwargs): """ Wrap given values in a block of same type as self. """ if placement is None: placement = self.mgr_locs return make_block(values, placement=placement, klass=self.__class__, - fastpath=fastpath, **kwargs) + **kwargs) @mgr_locs.setter def mgr_locs(self, new_mgr_locs): @@ -340,7 +339,7 @@ def reindex_axis(self, indexer, method=None, axis=1, fill_value=None, new_values = algos.take_nd(self.values, indexer, axis, fill_value=fill_value, mask_info=mask_info) - return self.make_block(new_values, fastpath=True) + return self.make_block(new_values) def iget(self, i): return self.values[i] @@ -459,7 +458,7 @@ def make_a_block(nv, ref_loc): except (AttributeError, NotImplementedError): pass block = self.make_block(values=nv, - placement=ref_loc, fastpath=True) + placement=ref_loc) return block # ndim == 1 @@ -518,7 +517,7 @@ def downcast(self, dtypes=None, mgr=None): dtypes = 'infer' nv = maybe_downcast_to_dtype(values, dtypes) - return self.make_block(nv, fastpath=True) + return self.make_block(nv) # ndim > 1 if dtypes is None: @@ -910,7 +909,7 @@ def _is_empty_indexer(indexer): # coerce and try to infer the dtypes of the result values = self._try_coerce_and_cast_result(values, dtype) - block = self.make_block(transf(values), fastpath=True) + block = self.make_block(transf(values)) return block def putmask(self, mask, new, align=True, inplace=False, axis=0, @@ -1026,7 +1025,7 @@ def f(m, v, i): if transpose: new_values = new_values.T - return [self.make_block(new_values, fastpath=True)] + return [self.make_block(new_values)] def coerce_to_target_dtype(self, other): """ @@ -1161,7 +1160,7 @@ def _interpolate_with_fill(self, method='pad', axis=0, inplace=False, dtype=self.dtype) values = self._try_coerce_result(values) - blocks = [self.make_block(values, klass=self.__class__, fastpath=True)] + blocks = [self.make_block(values, klass=self.__class__)] return self._maybe_downcast(blocks, downcast) def _interpolate(self, method=None, index=None, values=None, @@ -1201,8 +1200,7 @@ def func(x): # interp each column independently interp_values = np.apply_along_axis(func, axis, data) - blocks = [self.make_block(interp_values, klass=self.__class__, - fastpath=True)] + blocks = [self.make_block(interp_values, klass=self.__class__)] return self._maybe_downcast(blocks, downcast) def take_nd(self, indexer, axis, new_mgr_locs=None, fill_tuple=None): @@ -1246,7 +1244,7 @@ def take_nd(self, indexer, axis, new_mgr_locs=None, fill_tuple=None): def diff(self, n, axis=1, mgr=None): """ return block for the diff of the values """ new_values = algos.diff(self.values, n, axis=axis) - return [self.make_block(values=new_values, fastpath=True)] + return [self.make_block(values=new_values)] def shift(self, periods, axis=0, mgr=None): """ shift the block by periods, possibly upcast """ @@ -1276,7 +1274,7 @@ def shift(self, periods, axis=0, mgr=None): if f_ordered: new_values = new_values.T - return [self.make_block(new_values, fastpath=True)] + return [self.make_block(new_values)] def eval(self, func, other, errors='raise', try_cast=False, mgr=None): """ @@ -1416,7 +1414,7 @@ def handle_error(): result = self._try_cast_result(result) result = _block_shape(result, ndim=self.ndim) - return [self.make_block(result, fastpath=True, )] + return [self.make_block(result)] def where(self, other, cond, align=True, errors='raise', try_cast=False, axis=0, transpose=False, mgr=None): @@ -1696,7 +1694,7 @@ class NonConsolidatableMixIn(object): _validate_ndim = False _holder = None - def __init__(self, values, placement, ndim=None, fastpath=False, **kwargs): + def __init__(self, values, placement, ndim=None, **kwargs): # Placement must be converted to BlockPlacement via property setter # before ndim logic, because placement may be a slice which doesn't @@ -1954,11 +1952,11 @@ class TimeDeltaBlock(DatetimeLikeBlockMixin, IntBlock): _can_hold_na = True is_numeric = False - def __init__(self, values, placement, fastpath=False, **kwargs): + def __init__(self, values, placement, **kwargs): if values.dtype != _TD_DTYPE: values = conversion.ensure_timedelta64ns(values) - super(TimeDeltaBlock, self).__init__(values, fastpath=True, + super(TimeDeltaBlock, self).__init__(values, placement=placement, **kwargs) @property @@ -2092,12 +2090,12 @@ class ObjectBlock(Block): is_object = True _can_hold_na = True - def __init__(self, values, ndim=2, fastpath=False, placement=None, + def __init__(self, values, ndim=2, placement=None, **kwargs): if issubclass(values.dtype.type, compat.string_types): values = np.array(values, dtype=object) - super(ObjectBlock, self).__init__(values, ndim=ndim, fastpath=fastpath, + super(ObjectBlock, self).__init__(values, ndim=ndim, placement=placement, **kwargs) @property @@ -2345,11 +2343,10 @@ class CategoricalBlock(NonConsolidatableMixIn, ObjectBlock): _holder = Categorical _concatenator = staticmethod(_concat._concat_categorical) - def __init__(self, values, placement, fastpath=False, **kwargs): + def __init__(self, values, placement, **kwargs): # coerce to categorical if we can super(CategoricalBlock, self).__init__(_maybe_to_categorical(values), - fastpath=True, placement=placement, **kwargs) @property @@ -2467,11 +2464,11 @@ class DatetimeBlock(DatetimeLikeBlockMixin, Block): is_datetime = True _can_hold_na = True - def __init__(self, values, placement, fastpath=False, **kwargs): + def __init__(self, values, placement, **kwargs): if values.dtype != _NS_DTYPE: values = conversion.ensure_datetime64ns(values) - super(DatetimeBlock, self).__init__(values, fastpath=True, + super(DatetimeBlock, self).__init__(values, placement=placement, **kwargs) def _astype(self, dtype, mgr=None, **kwargs): @@ -2825,7 +2822,7 @@ def copy(self, deep=True, mgr=None): def make_block_same_class(self, values, placement, sparse_index=None, kind=None, dtype=None, fill_value=None, - copy=False, fastpath=True, **kwargs): + copy=False, **kwargs): """ return a new block """ if dtype is None: dtype = values.dtype @@ -2844,8 +2841,7 @@ def make_block_same_class(self, values, placement, sparse_index=None, # won't take space since there's 0 items, plus it will preserve # the dtype. return self.make_block(np.empty(values.shape, dtype=dtype), - placement, - fastpath=True) + placement) elif nitems > 1: raise ValueError("Only 1-item 2d sparse blocks are supported") else: @@ -2854,7 +2850,7 @@ def make_block_same_class(self, values, placement, sparse_index=None, new_values = SparseArray(values, sparse_index=sparse_index, kind=kind or self.kind, dtype=dtype, fill_value=fill_value, copy=copy) - return self.make_block(new_values, fastpath=fastpath, + return self.make_block(new_values, placement=placement) def interpolate(self, method='pad', axis=0, inplace=False, limit=None, @@ -2921,8 +2917,7 @@ def sparse_reindex(self, new_index): placement=self.mgr_locs) -def make_block(values, placement, klass=None, ndim=None, dtype=None, - fastpath=False): +def make_block(values, placement, klass=None, ndim=None, dtype=None): if klass is None: dtype = dtype or values.dtype vtype = dtype.type @@ -2952,10 +2947,10 @@ def make_block(values, placement, klass=None, ndim=None, dtype=None, klass = ObjectBlock elif klass is DatetimeTZBlock and not is_datetimetz(values): - return klass(values, ndim=ndim, fastpath=fastpath, + return klass(values, ndim=ndim, placement=placement, dtype=dtype) - return klass(values, ndim=ndim, fastpath=fastpath, placement=placement) + return klass(values, ndim=ndim, placement=placement) # TODO: flexible with index=None and/or items=None @@ -3015,7 +3010,7 @@ class BlockManager(PandasObject): __slots__ = ['axes', 'blocks', '_ndim', '_shape', '_known_consolidated', '_is_consolidated', '_blknos', '_blklocs'] - def __init__(self, blocks, axes, do_integrity_check=True, fastpath=True): + def __init__(self, blocks, axes, do_integrity_check=True): self.axes = [_ensure_index(ax) for ax in axes] self.blocks = tuple(blocks) @@ -3626,8 +3621,7 @@ def get_slice(self, slobj, axis=0): new_axes = list(self.axes) new_axes[axis] = new_axes[axis][slobj] - bm = self.__class__(new_blocks, new_axes, do_integrity_check=False, - fastpath=True) + bm = self.__class__(new_blocks, new_axes, do_integrity_check=False) bm._consolidate_inplace() return bm @@ -3782,7 +3776,7 @@ def xs(self, key, axis=1, copy=True, takeable=False): # we must copy here as we are mixed type for blk in self.blocks: newb = make_block(values=blk.values[slicer], - klass=blk.__class__, fastpath=True, + klass=blk.__class__, placement=blk.mgr_locs) new_blocks.append(newb) elif len(self.blocks) == 1: @@ -3792,8 +3786,7 @@ def xs(self, key, axis=1, copy=True, takeable=False): vals = vals.copy() new_blocks = [make_block(values=vals, placement=block.mgr_locs, - klass=block.__class__, - fastpath=True, )] + klass=block.__class__)] return self.__class__(new_blocks, new_axes) @@ -3896,7 +3889,7 @@ def iget(self, i, fastpath=True): return SingleBlockManager( [block.make_block_same_class(values, placement=slice(0, len(values)), - ndim=1, fastpath=True)], + ndim=1)], self.axes[1]) def get_scalar(self, tup): @@ -4418,8 +4411,7 @@ def __init__(self, block, axis, do_integrity_check=False, fastpath=False): block = block[0] if not isinstance(block, Block): - block = make_block(block, placement=slice(0, len(axis)), ndim=1, - fastpath=True) + block = make_block(block, placement=slice(0, len(axis)), ndim=1) self.blocks = [block] @@ -4734,8 +4726,7 @@ def form_blocks(arrays, names, axes): if len(datetime_tz_items): dttz_blocks = [make_block(array, klass=DatetimeTZBlock, - fastpath=True, - placement=[i], ) + placement=[i]) for i, _, array in datetime_tz_items] blocks.extend(dttz_blocks) @@ -4752,7 +4743,7 @@ def form_blocks(arrays, names, axes): blocks.extend(sparse_blocks) if len(cat_items) > 0: - cat_blocks = [make_block(array, klass=CategoricalBlock, fastpath=True, + cat_blocks = [make_block(array, klass=CategoricalBlock, placement=[i]) for i, _, array in cat_items] blocks.extend(cat_blocks) @@ -4809,8 +4800,7 @@ def _sparse_blockify(tuples, dtype=None): new_blocks = [] for i, names, array in tuples: array = _maybe_to_sparse(array) - block = make_block(array, klass=SparseBlock, fastpath=True, - placement=[i]) + block = make_block(array, klass=SparseBlock, placement=[i]) new_blocks.append(block) return new_blocks @@ -4894,7 +4884,7 @@ def _merge_blocks(blocks, dtype=None, _can_consolidate=True): new_values = new_values[argsort] new_mgr_locs = new_mgr_locs[argsort] - return make_block(new_values, fastpath=True, placement=new_mgr_locs) + return make_block(new_values, placement=new_mgr_locs) # no merge return blocks From 5a0e5f2d132db85a20f6f2eaa21ed3d547057bbd Mon Sep 17 00:00:00 2001 From: Brock Mendel Date: Tue, 16 Jan 2018 10:53:01 -0800 Subject: [PATCH 2/6] restore fastpath to internals.make_block for downstream compat remove kwargs from Block.__init__ methods reorder placement and ndim kwargs in ObjectBlock.__init__ to match others --- pandas/core/internals.py | 31 +++++++++++++++---------------- 1 file changed, 15 insertions(+), 16 deletions(-) diff --git a/pandas/core/internals.py b/pandas/core/internals.py index 8894b56eb1509..29998055e70b7 100644 --- a/pandas/core/internals.py +++ b/pandas/core/internals.py @@ -210,7 +210,7 @@ def make_block(self, values, placement=None, ndim=None, **kwargs): return make_block(values, placement=placement, ndim=ndim, **kwargs) - def make_block_scalar(self, values, **kwargs): + def make_block_scalar(self, values): """ Create a ScalarBlock """ @@ -1694,7 +1694,7 @@ class NonConsolidatableMixIn(object): _validate_ndim = False _holder = None - def __init__(self, values, placement, ndim=None, **kwargs): + def __init__(self, values, placement, ndim=None): # Placement must be converted to BlockPlacement via property setter # before ndim logic, because placement may be a slice which doesn't @@ -1952,12 +1952,12 @@ class TimeDeltaBlock(DatetimeLikeBlockMixin, IntBlock): _can_hold_na = True is_numeric = False - def __init__(self, values, placement, **kwargs): + def __init__(self, values, placement, ndim=None): if values.dtype != _TD_DTYPE: values = conversion.ensure_timedelta64ns(values) super(TimeDeltaBlock, self).__init__(values, - placement=placement, **kwargs) + placement=placement, ndim=ndim) @property def _box_func(self): @@ -2090,13 +2090,12 @@ class ObjectBlock(Block): is_object = True _can_hold_na = True - def __init__(self, values, ndim=2, placement=None, - **kwargs): + def __init__(self, values, placement=None, ndim=2): if issubclass(values.dtype.type, compat.string_types): values = np.array(values, dtype=object) super(ObjectBlock, self).__init__(values, ndim=ndim, - placement=placement, **kwargs) + placement=placement) @property def is_bool(self): @@ -2343,11 +2342,11 @@ class CategoricalBlock(NonConsolidatableMixIn, ObjectBlock): _holder = Categorical _concatenator = staticmethod(_concat._concat_categorical) - def __init__(self, values, placement, **kwargs): + def __init__(self, values, placement, ndim=None): # coerce to categorical if we can super(CategoricalBlock, self).__init__(_maybe_to_categorical(values), - placement=placement, **kwargs) + placement=placement, ndim=ndim) @property def is_view(self): @@ -2464,12 +2463,12 @@ class DatetimeBlock(DatetimeLikeBlockMixin, Block): is_datetime = True _can_hold_na = True - def __init__(self, values, placement, **kwargs): + def __init__(self, values, placement, ndim=None): if values.dtype != _NS_DTYPE: values = conversion.ensure_datetime64ns(values) super(DatetimeBlock, self).__init__(values, - placement=placement, **kwargs) + placement=placement, ndim=ndim) def _astype(self, dtype, mgr=None, **kwargs): """ @@ -2600,13 +2599,11 @@ class DatetimeTZBlock(NonConsolidatableMixIn, DatetimeBlock): _concatenator = staticmethod(_concat._concat_datetime) is_datetimetz = True - def __init__(self, values, placement, ndim=2, **kwargs): + def __init__(self, values, placement, ndim=2, dtype=None): if not isinstance(values, self._holder): values = self._holder(values) - dtype = kwargs.pop('dtype', None) - if dtype is not None: if isinstance(dtype, compat.string_types): dtype = DatetimeTZDtype.construct_from_string(dtype) @@ -2616,7 +2613,7 @@ def __init__(self, values, placement, ndim=2, **kwargs): raise ValueError("cannot create a DatetimeTZBlock without a tz") super(DatetimeTZBlock, self).__init__(values, placement=placement, - ndim=ndim, **kwargs) + ndim=ndim) def copy(self, deep=True, mgr=None): """ copy constructor """ @@ -2917,7 +2914,9 @@ def sparse_reindex(self, new_index): placement=self.mgr_locs) -def make_block(values, placement, klass=None, ndim=None, dtype=None): +# TODO: deprecate `fastpath` kwarg after getting pyarrow to stop passing it +def make_block(values, placement, klass=None, ndim=None, dtype=None, + fastpath=False): if klass is None: dtype = dtype or values.dtype vtype = dtype.type From 028ce18c2947f92022d8fa07a24a5cee7f9add7c Mon Sep 17 00:00:00 2001 From: Brock Mendel Date: Tue, 16 Jan 2018 22:32:33 -0800 Subject: [PATCH 3/6] remove more unused kwargs, DeprecationWarning for make_block fastpath --- pandas/core/internals.py | 24 ++++++++++++++---------- 1 file changed, 14 insertions(+), 10 deletions(-) diff --git a/pandas/core/internals.py b/pandas/core/internals.py index 29998055e70b7..20d9ed622ee54 100644 --- a/pandas/core/internals.py +++ b/pandas/core/internals.py @@ -198,7 +198,7 @@ def array_dtype(self): """ return self.dtype - def make_block(self, values, placement=None, ndim=None, **kwargs): + def make_block(self, values, placement=None, ndim=None): """ Create a new block, with type inference propagate any values that are not specified @@ -208,7 +208,7 @@ def make_block(self, values, placement=None, ndim=None, **kwargs): if ndim is None: ndim = self.ndim - return make_block(values, placement=placement, ndim=ndim, **kwargs) + return make_block(values, placement=placement, ndim=ndim) def make_block_scalar(self, values): """ @@ -216,12 +216,13 @@ def make_block_scalar(self, values): """ return ScalarBlock(values) - def make_block_same_class(self, values, placement=None, **kwargs): + def make_block_same_class(self, values, placement=None, ndim=None): """ Wrap given values in a block of same type as self. """ if placement is None: placement = self.mgr_locs - return make_block(values, placement=placement, klass=self.__class__, - **kwargs) + # TODO: Why not set default for ndim like we do for self.make_block? + return make_block(values, placement=placement, ndim=ndim, + klass=self.__class__) @mgr_locs.setter def mgr_locs(self, new_mgr_locs): @@ -1160,7 +1161,7 @@ def _interpolate_with_fill(self, method='pad', axis=0, inplace=False, dtype=self.dtype) values = self._try_coerce_result(values) - blocks = [self.make_block(values, klass=self.__class__)] + blocks = [self.make_block_same_class(values, ndim=self.ndim)] return self._maybe_downcast(blocks, downcast) def _interpolate(self, method=None, index=None, values=None, @@ -1200,7 +1201,7 @@ def func(x): # interp each column independently interp_values = np.apply_along_axis(func, axis, data) - blocks = [self.make_block(interp_values, klass=self.__class__)] + blocks = [self.make_block_same_class(interp_values)] return self._maybe_downcast(blocks, downcast) def take_nd(self, indexer, axis, new_mgr_locs=None, fill_tuple=None): @@ -2819,7 +2820,7 @@ def copy(self, deep=True, mgr=None): def make_block_same_class(self, values, placement, sparse_index=None, kind=None, dtype=None, fill_value=None, - copy=False, **kwargs): + copy=False, ndim=None): """ return a new block """ if dtype is None: dtype = values.dtype @@ -2914,9 +2915,12 @@ def sparse_reindex(self, new_index): placement=self.mgr_locs) -# TODO: deprecate `fastpath` kwarg after getting pyarrow to stop passing it def make_block(values, placement, klass=None, ndim=None, dtype=None, - fastpath=False): + fastpath=None): + if fastpath is not None: + # GH#19265 pyarrow is passing this + warnings.warn("fastpath argument is deprecated, will be removed " + "in a future release.", DeprecationWarning) if klass is None: dtype = dtype or values.dtype vtype = dtype.type From 75a0a7782333467fb487acfba3a58302c62a0118 Mon Sep 17 00:00:00 2001 From: Brock Mendel Date: Wed, 17 Jan 2018 17:15:18 -0800 Subject: [PATCH 4/6] test that deprecationwarning is emitted --- pandas/tests/internals/test_internals.py | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/pandas/tests/internals/test_internals.py b/pandas/tests/internals/test_internals.py index 623d2d39607c2..b1f89829c95a5 100644 --- a/pandas/tests/internals/test_internals.py +++ b/pandas/tests/internals/test_internals.py @@ -1254,3 +1254,11 @@ def test_binop_other(self, op, value, dtype): result = op(s, e).dtypes expected = op(s, value).dtypes assert_series_equal(result, expected) + + +def test_deprecated_fastpath(): + # GH#19265 + values = np.random.rand(3, 3) + with tm.assert_produces_warning(DeprecationWarning, + check_stacklevel=False): + make_block(values, placement=np.arange(3), fastpath=True) From 3148e5216e41977e54c44dafb6b418d4ec7f4874 Mon Sep 17 00:00:00 2001 From: Brock Mendel Date: Thu, 18 Jan 2018 08:28:00 -0800 Subject: [PATCH 5/6] whitespace cleanup --- pandas/core/internals.py | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/pandas/core/internals.py b/pandas/core/internals.py index 9b46aa28a721a..90a5fe10f7420 100644 --- a/pandas/core/internals.py +++ b/pandas/core/internals.py @@ -4737,8 +4737,7 @@ def form_blocks(arrays, names, axes): blocks.extend(sparse_blocks) if len(items_dict['CategoricalBlock']) > 0: - cat_blocks = [make_block(array, klass=CategoricalBlock, - placement=[i]) + cat_blocks = [make_block(array, klass=CategoricalBlock, placement=[i]) for i, _, array in items_dict['CategoricalBlock']] blocks.extend(cat_blocks) From 1ee79409d2e6843fdcf1971c0ffebb00b7466c44 Mon Sep 17 00:00:00 2001 From: Brock Mendel Date: Fri, 19 Jan 2018 08:50:13 -0800 Subject: [PATCH 6/6] remove unneeded comment --- pandas/core/internals.py | 1 - 1 file changed, 1 deletion(-) diff --git a/pandas/core/internals.py b/pandas/core/internals.py index 90a5fe10f7420..d616ef441a31b 100644 --- a/pandas/core/internals.py +++ b/pandas/core/internals.py @@ -226,7 +226,6 @@ def make_block_same_class(self, values, placement=None, ndim=None): """ Wrap given values in a block of same type as self. """ if placement is None: placement = self.mgr_locs - # TODO: Why not set default for ndim like we do for self.make_block? return make_block(values, placement=placement, ndim=ndim, klass=self.__class__)