diff --git a/pandas/core/common.py b/pandas/core/common.py index 134e43bcd006a..e895c8ed0cf2d 100644 --- a/pandas/core/common.py +++ b/pandas/core/common.py @@ -134,7 +134,7 @@ def _isnull_new(obj): elif isinstance(obj, (ABCSeries, np.ndarray)): return _isnull_ndarraylike(obj) elif isinstance(obj, ABCGeneric): - return obj._constructor(obj._data.apply(lambda x: isnull(x.values))) + return obj._constructor(obj._data.isnull(func=isnull)) elif isinstance(obj, list) or hasattr(obj, '__array__'): return _isnull_ndarraylike(np.asarray(obj)) else: @@ -160,8 +160,7 @@ def _isnull_old(obj): elif isinstance(obj, (ABCSeries, np.ndarray)): return _isnull_ndarraylike_old(obj) elif isinstance(obj, ABCGeneric): - return obj._constructor(obj._data.apply( - lambda x: _isnull_old(x.values))) + return obj._constructor(obj._data.isnull(func=_isnull_old)) elif isinstance(obj, list) or hasattr(obj, '__array__'): return _isnull_ndarraylike_old(np.asarray(obj)) else: @@ -1540,14 +1539,7 @@ def _maybe_box(indexer, values, obj, key): # return the value return values - -def _values_from_object(o): - """ return my values or the object if we are say an ndarray """ - f = getattr(o, 'get_values', None) - if f is not None: - o = f() - return o - +_values_from_object = lib.values_from_object def _possibly_convert_objects(values, convert_dates=True, convert_numeric=True, @@ -2036,20 +2028,16 @@ def _maybe_make_list(obj): return obj -def is_bool(obj): - return isinstance(obj, (bool, np.bool_)) +is_bool = lib.is_bool -def is_integer(obj): - return isinstance(obj, (numbers.Integral, np.integer)) +is_integer = lib.is_integer -def is_float(obj): - return isinstance(obj, (float, np.floating)) +is_float = lib.is_float -def is_complex(obj): - return isinstance(obj, (numbers.Complex, np.complexfloating)) +is_complex = lib.is_complex def is_iterator(obj): diff --git a/pandas/core/frame.py b/pandas/core/frame.py index 4c19831c6cbe3..de8bac05f211f 100644 --- a/pandas/core/frame.py +++ b/pandas/core/frame.py @@ -2825,14 +2825,14 @@ def _combine_match_columns(self, other, func, fill_value=None): fill_value) new_data = left._data.eval( - func, right, axes=[left.columns, self.index]) + func=func, other=right, axes=[left.columns, self.index]) return self._constructor(new_data) def _combine_const(self, other, func, raise_on_error=True): if self.empty: return self - new_data = self._data.eval(func, other, raise_on_error=raise_on_error) + new_data = self._data.eval(func=func, other=other, raise_on_error=raise_on_error) return self._constructor(new_data) def _compare_frame_evaluate(self, other, func, str_rep): @@ -3228,7 +3228,7 @@ def diff(self, periods=1): ------- diffed : DataFrame """ - new_data = self._data.diff(periods) + new_data = self._data.diff(n=periods) return self._constructor(new_data) #---------------------------------------------------------------------- diff --git a/pandas/core/generic.py b/pandas/core/generic.py index 2ee96d660eb87..0a0cfe94409f9 100644 --- a/pandas/core/generic.py +++ b/pandas/core/generic.py @@ -128,7 +128,7 @@ def _init_mgr(self, mgr, axes=None, dtype=None, copy=False): elif dtype is not None: # avoid copy if we can if len(mgr.blocks) > 1 or mgr.blocks[0].values.dtype != dtype: - mgr = mgr.astype(dtype) + mgr = mgr.astype(dtype=dtype) return mgr #---------------------------------------------------------------------- @@ -2011,7 +2011,7 @@ def astype(self, dtype, copy=True, raise_on_error=True): """ mgr = self._data.astype( - dtype, copy=copy, raise_on_error=raise_on_error) + dtype=dtype, copy=copy, raise_on_error=raise_on_error) return self._constructor(mgr).__finalize__(self) def copy(self, deep=True): @@ -2153,7 +2153,7 @@ def fillna(self, value=None, method=None, axis=0, inplace=False, from pandas import Series value = Series(value) - new_data = self._data.fillna(value, inplace=inplace, + new_data = self._data.fillna(value=value, inplace=inplace, downcast=downcast) elif isinstance(value, (dict, com.ABCSeries)): @@ -2170,7 +2170,7 @@ def fillna(self, value=None, method=None, axis=0, inplace=False, obj.fillna(v, inplace=True) return result else: - new_data = self._data.fillna(value, inplace=inplace, + new_data = self._data.fillna(value=value, inplace=inplace, downcast=downcast) if inplace: @@ -2355,7 +2355,8 @@ def replace(self, to_replace=None, value=None, inplace=False, limit=None, new_data = self._data for c, src in compat.iteritems(to_replace): if c in value and c in self: - new_data = new_data.replace(src, value[c], + new_data = new_data.replace(to_replace=src, + value=value[c], filter=[c], inplace=inplace, regex=regex) @@ -2365,7 +2366,8 @@ def replace(self, to_replace=None, value=None, inplace=False, limit=None, new_data = self._data for k, src in compat.iteritems(to_replace): if k in self: - new_data = new_data.replace(src, value, + new_data = new_data.replace(to_replace=src, + value=value, filter=[k], inplace=inplace, regex=regex) @@ -2380,13 +2382,16 @@ def replace(self, to_replace=None, value=None, inplace=False, limit=None, 'in length. Expecting %d got %d ' % (len(to_replace), len(value))) - new_data = self._data.replace_list(to_replace, value, + new_data = self._data.replace_list(src_list=to_replace, + dest_list=value, inplace=inplace, regex=regex) else: # [NA, ''] -> 0 - new_data = self._data.replace(to_replace, value, - inplace=inplace, regex=regex) + new_data = self._data.replace(to_replace=to_replace, + value=value, + inplace=inplace, + regex=regex) elif to_replace is None: if not (com.is_re_compilable(regex) or com.is_list_like(regex) or @@ -2406,13 +2411,14 @@ def replace(self, to_replace=None, value=None, inplace=False, limit=None, for k, v in compat.iteritems(value): if k in self: - new_data = new_data.replace(to_replace, v, + new_data = new_data.replace(to_replace=to_replace, + value=v, filter=[k], inplace=inplace, regex=regex) elif not com.is_list_like(value): # NA -> 0 - new_data = self._data.replace(to_replace, value, + new_data = self._data.replace(to_replace=to_replace, value=value, inplace=inplace, regex=regex) else: msg = ('Invalid "to_replace" type: ' @@ -3116,12 +3122,12 @@ def where(self, cond, other=np.nan, inplace=False, axis=None, level=None, if inplace: # we may have different type blocks come out of putmask, so # reconstruct the block manager - new_data = self._data.putmask(cond, other, align=axis is None, + new_data = self._data.putmask(mask=cond, new=other, align=axis is None, inplace=True) self._update_inplace(new_data) else: - new_data = self._data.where(other, cond, align=axis is None, + new_data = self._data.where(other=other, cond=cond, align=axis is None, raise_on_error=raise_on_error, try_cast=try_cast) @@ -3168,7 +3174,7 @@ def shift(self, periods=1, freq=None, axis=0, **kwds): if freq is None and not len(kwds): block_axis = self._get_block_manager_axis(axis) indexer = com._shift_indexer(len(self), periods) - new_data = self._data.shift(indexer, periods, axis=block_axis) + new_data = self._data.shift(indexer=indexer, periods=periods, axis=block_axis) else: return self.tshift(periods, freq, **kwds) diff --git a/pandas/core/indexing.py b/pandas/core/indexing.py index 780ad57ed8f13..d2f538decd576 100644 --- a/pandas/core/indexing.py +++ b/pandas/core/indexing.py @@ -321,7 +321,7 @@ def _setitem_with_indexer(self, indexer, value): # as we select a slice indexer on the mi idx = index._convert_slice_indexer(idx) obj = obj.copy() - obj._data = obj._data.setitem(tuple([idx]), value) + obj._data = obj._data.setitem(indexer=tuple([idx]), value=value) self.obj[item] = obj return @@ -341,7 +341,7 @@ def setter(item, v): # set the item, possibly having a dtype change s = s.copy() - s._data = s._data.setitem(pi, v) + s._data = s._data.setitem(indexer=pi, value=v) s._maybe_update_cacher(clear=True) self.obj[item] = s @@ -419,7 +419,7 @@ def can_do_equal_len(): value = self._align_panel(indexer, value) # actually do the set - self.obj._data = self.obj._data.setitem(indexer, value) + self.obj._data = self.obj._data.setitem(indexer=indexer, value=value) self.obj._maybe_update_cacher(clear=True) def _align_series(self, indexer, ser): diff --git a/pandas/core/internals.py b/pandas/core/internals.py index e88f1972b5f7d..d0a8e1c06fd28 100644 --- a/pandas/core/internals.py +++ b/pandas/core/internals.py @@ -215,6 +215,14 @@ def dtype(self): def ftype(self): return "%s:%s" % (self.dtype, self._ftype) + def as_block(self, result): + """ if we are not a block, then wrap as a block, must have compatible shape """ + if not isinstance(result, Block): + result = make_block(result, + self.items, + self.ref_items) + return result + def merge(self, other): if not self.ref_items.equals(other.ref_items): raise AssertionError('Merge operands must have same ref_items') @@ -346,6 +354,10 @@ def split_block_at(self, item): klass=self.__class__, fastpath=True) + def apply(self, func, **kwargs): + """ apply the function to my values; return a block if we are not one """ + return self.as_block(func(self.values)) + def fillna(self, value, inplace=False, downcast=None): if not self._can_hold_na: if inplace: @@ -2342,38 +2354,32 @@ def _verify_integrity(self): 'tot_items: {1}'.format(len(self.items), tot_items)) - def apply(self, f, *args, **kwargs): - """ iterate over the blocks, collect and create a new block manager + def apply(self, f, axes=None, filter=None, do_integrity_check=False, **kwargs): + """ + iterate over the blocks, collect and create a new block manager Parameters ---------- f : the callable or function name to operate on at the block level axes : optional (if not supplied, use self.axes) filter : list, if supplied, only call the block if the filter is in - the block + the block + do_integrity_check : boolean, default False. Do the block manager integrity check + + Returns + ------- + Block Manager (new object) + """ - axes = kwargs.pop('axes', None) - filter = kwargs.get('filter') - do_integrity_check = kwargs.pop('do_integrity_check', False) result_blocks = [] for blk in self.blocks: if filter is not None: - kwargs['filter'] = set(kwargs['filter']) + kwargs['filter'] = set(filter) if not blk.items.isin(filter).any(): result_blocks.append(blk) continue - if callable(f): - applied = f(blk, *args, **kwargs) - - # if we are no a block, try to coerce - if not isinstance(applied, Block): - applied = make_block(applied, - blk.items, - blk.ref_items) - - else: - applied = getattr(blk, f)(*args, **kwargs) + applied = getattr(blk, f)(**kwargs) if isinstance(applied, list): result_blocks.extend(applied) @@ -2386,43 +2392,46 @@ def apply(self, f, *args, **kwargs): bm._consolidate_inplace() return bm - def where(self, *args, **kwargs): - return self.apply('where', *args, **kwargs) + def isnull(self, **kwargs): + return self.apply('apply', **kwargs) + + def where(self, **kwargs): + return self.apply('where', **kwargs) - def eval(self, *args, **kwargs): - return self.apply('eval', *args, **kwargs) + def eval(self, **kwargs): + return self.apply('eval', **kwargs) - def setitem(self, *args, **kwargs): - return self.apply('setitem', *args, **kwargs) + def setitem(self, **kwargs): + return self.apply('setitem', **kwargs) - def putmask(self, *args, **kwargs): - return self.apply('putmask', *args, **kwargs) + def putmask(self, **kwargs): + return self.apply('putmask', **kwargs) - def diff(self, *args, **kwargs): - return self.apply('diff', *args, **kwargs) + def diff(self, **kwargs): + return self.apply('diff', **kwargs) - def interpolate(self, *args, **kwargs): - return self.apply('interpolate', *args, **kwargs) + def interpolate(self, **kwargs): + return self.apply('interpolate', **kwargs) - def shift(self, *args, **kwargs): - return self.apply('shift', *args, **kwargs) + def shift(self, **kwargs): + return self.apply('shift', **kwargs) - def fillna(self, *args, **kwargs): - return self.apply('fillna', *args, **kwargs) + def fillna(self, **kwargs): + return self.apply('fillna', **kwargs) - def downcast(self, *args, **kwargs): - return self.apply('downcast', *args, **kwargs) + def downcast(self, **kwargs): + return self.apply('downcast', **kwargs) - def astype(self, *args, **kwargs): - return self.apply('astype', *args, **kwargs) + def astype(self, dtype, **kwargs): + return self.apply('astype', dtype=dtype, **kwargs) - def convert(self, *args, **kwargs): - return self.apply('convert', *args, **kwargs) + def convert(self, **kwargs): + return self.apply('convert', **kwargs) - def replace(self, *args, **kwargs): - return self.apply('replace', *args, **kwargs) + def replace(self, **kwargs): + return self.apply('replace', **kwargs) - def replace_list(self, src_lst, dest_lst, inplace=False, regex=False): + def replace_list(self, src_list, dest_list, inplace=False, regex=False): """ do a list replace """ # figure out our mask a-priori to avoid repeated replacements @@ -2432,7 +2441,7 @@ def comp(s): if isnull(s): return isnull(values) return values == getattr(s, 'asm8', s) - masks = [comp(s) for i, s in enumerate(src_lst)] + masks = [comp(s) for i, s in enumerate(src_list)] result_blocks = [] for blk in self.blocks: @@ -2440,7 +2449,7 @@ def comp(s): # its possible to get multiple result blocks here # replace ALWAYS will return a list rb = [blk if inplace else blk.copy()] - for i, (s, d) in enumerate(zip(src_lst, dest_lst)): + for i, (s, d) in enumerate(zip(src_list, dest_list)): new_rb = [] for b in rb: if b.dtype == np.object_: @@ -2465,13 +2474,13 @@ def comp(s): bm._consolidate_inplace() return bm - def prepare_for_merge(self, *args, **kwargs): + def prepare_for_merge(self, **kwargs): """ prepare for merging, return a new block manager with Sparse -> Dense """ self._consolidate_inplace() if self._has_sparse: - return self.apply('prepare_for_merge', *args, **kwargs) + return self.apply('prepare_for_merge', **kwargs) return self def post_merge(self, objs, **kwargs): @@ -3631,6 +3640,18 @@ def shape(self): self._shape = tuple([len(self.axes[0])]) return self._shape + def apply(self, f, axes=None, do_integrity_check=False, **kwargs): + """ + fast path for SingleBlock Manager + + ssee also BlockManager.apply + """ + applied = getattr(self._block, f)(**kwargs) + bm = self.__class__(applied, axes or self.axes, + do_integrity_check=do_integrity_check) + bm._consolidate_inplace() + return bm + def reindex(self, new_axis, indexer=None, method=None, fill_value=None, limit=None, copy=True): # if we are the same and don't copy, just return @@ -3687,14 +3708,14 @@ def set_ref_items(self, ref_items, maybe_rename=True): def index(self): return self.axes[0] - def convert(self, *args, **kwargs): + def convert(self, **kwargs): """ convert the whole block as one """ kwargs['by_item'] = False - return self.apply('convert', *args, **kwargs) + return self.apply('convert', **kwargs) @property def dtype(self): - return self._block.dtype + return self._values.dtype @property def ftype(self): @@ -3706,7 +3727,7 @@ def values(self): @property def itemsize(self): - return self._block.itemsize + return self._values.itemsize @property def _can_hold_na(self): diff --git a/pandas/core/series.py b/pandas/core/series.py index 8873af08cc5f3..300da3dc6834d 100644 --- a/pandas/core/series.py +++ b/pandas/core/series.py @@ -212,7 +212,7 @@ def __init__(self, data=None, index=None, dtype=None, name=None, # create/copy the manager if isinstance(data, SingleBlockManager): if dtype is not None: - data = data.astype(dtype, raise_on_error=False) + data = data.astype(dtype=dtype, raise_on_error=False) elif copy: data = data.copy() else: @@ -281,23 +281,23 @@ def _set_subtyp(self, is_all_dates): # ndarray compatibility def item(self): - return self.values.item() + return self._data.values.item() @property def data(self): - return self.values.data + return self._data.values.data @property def strides(self): - return self.values.strides + return self._data.values.strides @property def size(self): - return self.values.size + return self._data.values.size @property def flags(self): - return self.values.flags + return self._data.values.flags @property def dtype(self): @@ -694,7 +694,7 @@ def _set_labels(self, key, value): def _set_values(self, key, value): if isinstance(key, Series): key = key.values - self._data = self._data.setitem(key, value) + self._data = self._data.setitem(indexer=key, value=value) # help out SparseSeries _get_val_at = ndarray.__getitem__ @@ -1643,7 +1643,7 @@ def update(self, other): other = other.reindex_like(self) mask = notnull(other) - self._data = self._data.putmask(mask, other, inplace=True) + self._data = self._data.putmask(mask=mask, new=other, inplace=True) self._maybe_update_cacher() #---------------------------------------------------------------------- diff --git a/pandas/lib.pyx b/pandas/lib.pyx index ea5071eab976c..dccc68ab59ad3 100644 --- a/pandas/lib.pyx +++ b/pandas/lib.pyx @@ -59,6 +59,16 @@ PyDateTime_IMPORT import_array() import_ufunc() +def values_from_object(object o): + """ return my values or the object if we are say an ndarray """ + cdef f + + f = getattr(o, 'get_values', None) + if f is not None: + o = f() + + return o + cpdef map_indices_list(list index): ''' Produce a dict mapping the values of the input array to their respective diff --git a/pandas/src/inference.pyx b/pandas/src/inference.pyx index c048b2786bd91..046fa3887a32d 100644 --- a/pandas/src/inference.pyx +++ b/pandas/src/inference.pyx @@ -3,6 +3,19 @@ from tslib import NaT from datetime import datetime, timedelta iNaT = util.get_nat() +# core.common import for fast inference checks +def is_float(object obj): + return util.is_float_object(obj) + +def is_integer(object obj): + return util.is_integer_object(obj) + +def is_bool(object obj): + return util.is_bool_object(obj) + +def is_complex(object obj): + return util.is_complex_object(obj) + _TYPE_MAP = { np.int8: 'integer', np.int16: 'integer', diff --git a/pandas/src/reduce.pyx b/pandas/src/reduce.pyx index 50fff7f9eb460..a22e7e636d7e4 100644 --- a/pandas/src/reduce.pyx +++ b/pandas/src/reduce.pyx @@ -55,12 +55,6 @@ cdef class Reducer: # in cython, so increment first Py_INCREF(dummy) else: - if dummy.dtype != self.arr.dtype: - raise ValueError('Dummy array must be same dtype') - if len(dummy) != self.chunksize: - raise ValueError('Dummy array must be length %d' % - self.chunksize) - # we passed a series-like if hasattr(dummy,'values'): @@ -68,6 +62,12 @@ cdef class Reducer: index = getattr(dummy,'index',None) dummy = dummy.values + if dummy.dtype != self.arr.dtype: + raise ValueError('Dummy array must be same dtype') + if len(dummy) != self.chunksize: + raise ValueError('Dummy array must be length %d' % + self.chunksize) + return dummy, index def get_result(self): @@ -193,9 +193,9 @@ cdef class SeriesBinGrouper: values = np.empty(0, dtype=self.arr.dtype) index = None else: - if dummy.dtype != self.arr.dtype: - raise ValueError('Dummy array must be same dtype') values = dummy.values + if values.dtype != self.arr.dtype: + raise ValueError('Dummy array must be same dtype') if not values.flags.contiguous: values = values.copy() index = dummy.index @@ -318,9 +318,9 @@ cdef class SeriesGrouper: values = np.empty(0, dtype=self.arr.dtype) index = None else: + values = dummy.values if dummy.dtype != self.arr.dtype: raise ValueError('Dummy array must be same dtype') - values = dummy.values if not values.flags.contiguous: values = values.copy() index = dummy.index diff --git a/pandas/tests/test_common.py b/pandas/tests/test_common.py index e28aca3e5ef3a..2c32b5d0310db 100644 --- a/pandas/tests/test_common.py +++ b/pandas/tests/test_common.py @@ -198,7 +198,7 @@ def test_nan_to_nat_conversions(): assert(result == iNaT) s = df['B'].copy() - s._data = s._data.setitem(tuple([slice(8,9)]),np.nan) + s._data = s._data.setitem(indexer=tuple([slice(8,9)]),value=np.nan) assert(isnull(s[8])) # numpy < 1.7.0 is wrong diff --git a/pandas/tests/test_internals.py b/pandas/tests/test_internals.py index f86dec1a99850..eca1eae540920 100644 --- a/pandas/tests/test_internals.py +++ b/pandas/tests/test_internals.py @@ -604,7 +604,7 @@ def test_equals(self): bm1 = BlockManager([block1, block2], [index, np.arange(block1.shape[1])]) bm2 = BlockManager([block2, block1], [index, np.arange(block1.shape[1])]) self.assert_(bm1.equals(bm2)) - + if __name__ == '__main__': import nose nose.runmodule(argv=[__file__, '-vvs', '-x', '--pdb', '--pdb-failure'], diff --git a/pandas/tseries/index.py b/pandas/tseries/index.py index 2762f5fca9a97..23181485d3bbb 100644 --- a/pandas/tseries/index.py +++ b/pandas/tseries/index.py @@ -1205,16 +1205,14 @@ def get_value(self, series, key): Fast lookup of value from 1-dimensional ndarray. Only use this if you know what you're doing """ - timestamp = None - #if isinstance(key, Timestamp): - # timestamp = key - #el + if isinstance(key, datetime): + # needed to localize naive datetimes - timestamp = Timestamp(key, tz=self.tz) + if self.tz is not None: + key = Timestamp(key, tz=self.tz) - if timestamp: - return self.get_value_maybe_box(series, timestamp) + return self.get_value_maybe_box(series, key) try: return _maybe_box(self, Index.get_value(self, series, key), series, key)