diff --git a/pandas/_libs/reduction.pyx b/pandas/_libs/reduction.pyx index 3a0fda5aed620..9459cd297c758 100644 --- a/pandas/_libs/reduction.pyx +++ b/pandas/_libs/reduction.pyx @@ -1,7 +1,5 @@ from copy import copy -from cython import Py_ssize_t - from libc.stdlib cimport free, malloc import numpy as np @@ -11,14 +9,14 @@ from numpy cimport int64_t, ndarray cnp.import_array() -from pandas._libs cimport util +from pandas._libs.util cimport is_array, set_array_not_contiguous from pandas._libs.lib import is_scalar, maybe_convert_objects cpdef check_result_array(object obj, Py_ssize_t cnt): - if (util.is_array(obj) or + if (is_array(obj) or (isinstance(obj, list) and len(obj) == cnt) or getattr(obj, 'shape', None) == (cnt,)): raise ValueError('Must produce aggregated value') @@ -33,7 +31,7 @@ cdef class _BaseGrouper: if (dummy.dtype != self.arr.dtype and values.dtype != self.arr.dtype): raise ValueError('Dummy array must be same dtype') - if util.is_array(values) and not values.flags.contiguous: + if is_array(values) and not values.flags.contiguous: # e.g. Categorical has no `flags` attribute values = values.copy() index = dummy.index.values @@ -106,7 +104,7 @@ cdef class SeriesBinGrouper(_BaseGrouper): self.f = f values = series.values - if util.is_array(values) and not values.flags.c_contiguous: + if is_array(values) and not values.flags.c_contiguous: # e.g. Categorical has no `flags` attribute values = values.copy('C') self.arr = values @@ -204,7 +202,7 @@ cdef class SeriesGrouper(_BaseGrouper): self.f = f values = series.values - if util.is_array(values) and not values.flags.c_contiguous: + if is_array(values) and not values.flags.c_contiguous: # e.g. Categorical has no `flags` attribute values = values.copy('C') self.arr = values @@ -288,9 +286,9 @@ cpdef inline extract_result(object res, bint squeeze=True): res = res._values if squeeze and res.ndim == 1 and len(res) == 1: res = res[0] - if hasattr(res, 'values') and util.is_array(res.values): + if hasattr(res, 'values') and is_array(res.values): res = res.values - if util.is_array(res): + if is_array(res): if res.ndim == 0: res = res.item() elif squeeze and res.ndim == 1 and len(res) == 1: @@ -304,7 +302,7 @@ cdef class Slider: """ cdef: ndarray values, buf - Py_ssize_t stride, orig_len, orig_stride + Py_ssize_t stride char *orig_data def __init__(self, ndarray values, ndarray buf): @@ -316,11 +314,9 @@ cdef class Slider: self.values = values self.buf = buf - self.stride = values.strides[0] + self.stride = values.strides[0] self.orig_data = self.buf.data - self.orig_len = self.buf.shape[0] - self.orig_stride = self.buf.strides[0] self.buf.data = self.values.data self.buf.strides[0] = self.stride @@ -333,10 +329,8 @@ cdef class Slider: self.buf.shape[0] = end - start cdef reset(self): - - self.buf.shape[0] = self.orig_len self.buf.data = self.orig_data - self.buf.strides[0] = self.orig_stride + self.buf.shape[0] = 0 class InvalidApply(Exception): @@ -408,39 +402,34 @@ cdef class BlockSlider: """ Only capable of sliding on axis=0 """ - - cdef public: - object frame, dummy, index - int nblocks - Slider idx_slider - list blocks - cdef: + object frame, dummy, index, block + list blk_values + ndarray values + Slider idx_slider char **base_ptrs + int nblocks + Py_ssize_t i def __init__(self, object frame): - cdef: - Py_ssize_t i - object b - self.frame = frame self.dummy = frame[:0] self.index = self.dummy.index - self.blocks = [b.values for b in self.dummy._mgr.blocks] + self.blk_values = [block.values for block in self.dummy._mgr.blocks] - for x in self.blocks: - util.set_array_not_contiguous(x) + for values in self.blk_values: + set_array_not_contiguous(values) - self.nblocks = len(self.blocks) + self.nblocks = len(self.blk_values) # See the comment in indexes/base.py about _index_data. # We need this for EA-backed indexes that have a reference to a 1-d # ndarray like datetime / timedelta / period. self.idx_slider = Slider( self.frame.index._index_data, self.dummy.index._index_data) - self.base_ptrs = malloc(sizeof(char*) * len(self.blocks)) - for i, block in enumerate(self.blocks): + self.base_ptrs = malloc(sizeof(char*) * self.nblocks) + for i, block in enumerate(self.blk_values): self.base_ptrs[i] = (block).data def __dealloc__(self): @@ -450,10 +439,9 @@ cdef class BlockSlider: cdef: ndarray arr Py_ssize_t i - # move blocks for i in range(self.nblocks): - arr = self.blocks[i] + arr = self.blk_values[i] # axis=1 is the frame's axis=0 arr.data = self.base_ptrs[i] + arr.strides[1] * start @@ -470,10 +458,8 @@ cdef class BlockSlider: cdef: ndarray arr Py_ssize_t i - - # reset blocks for i in range(self.nblocks): - arr = self.blocks[i] + arr = self.blk_values[i] # axis=1 is the frame's axis=0 arr.data = self.base_ptrs[i]