diff --git a/doc/source/advanced.rst b/doc/source/advanced.rst index d976b0c8c21a5..465fbf483b1cc 100644 --- a/doc/source/advanced.rst +++ b/doc/source/advanced.rst @@ -617,10 +617,20 @@ faster than fancy indexing. timeit ser.ix[indexer] timeit ser.take(indexer) +.. _indexing.index_types: + +Index Types +----------- + +We have discussed ``MultiIndex`` in the previous sections pretty extensively. ``DatetimeIndex`` and ``PeriodIndex`` +are shown :ref:`here `. ``TimedeltaIndex`` are :ref:`here `. + +In the following sub-sections we will highlite some other index types. + .. _indexing.categoricalindex: CategoricalIndex ----------------- +~~~~~~~~~~~~~~~~ .. versionadded:: 0.16.1 @@ -702,10 +712,21 @@ values NOT in the categories, similarly to how you can reindex ANY pandas index. In [12]: pd.concat([df2, df3] TypeError: categories must match existing categories when appending +.. _indexing.rangeindex: + +Int64Index and RangeIndex +~~~~~~~~~~~~~~~~~~~~~~~~~ + +``Int64Index`` is a fundamental basic index in *pandas*. This is an Immutable array implementing an ordered, sliceable set. +Prior to 0.18.0, the ``Int64Index`` would provide the default index for all ``NDFrame`` objects. + +``RangeIndex`` is a sub-class of ``Int64Index`` added in version 0.18.0, now providing the default index for all ``NDFrame`` objects. +``RangeIndex`` is an optimized version of ``Int64Index`` that can represent a monotonic ordered set. These are analagous to python :ref:`range types `. + .. _indexing.float64index: Float64Index ------------- +~~~~~~~~~~~~ .. note:: diff --git a/doc/source/timeseries.rst b/doc/source/timeseries.rst index b5be9cf395feb..80a4774e02e69 100644 --- a/doc/source/timeseries.rst +++ b/doc/source/timeseries.rst @@ -1091,7 +1091,7 @@ An example of how holidays and holiday calendars are defined: Using this calendar, creating an index or doing offset arithmetic skips weekends and holidays (i.e., Memorial Day/July 4th). For example, the below defines a custom business day offset using the ``ExampleCalendar``. Like any other offset, -it can be used to create a ``DatetimeIndex`` or added to ``datetime`` +it can be used to create a ``DatetimeIndex`` or added to ``datetime`` or ``Timestamp`` objects. .. ipython:: python diff --git a/doc/source/whatsnew/v0.18.0.txt b/doc/source/whatsnew/v0.18.0.txt index c1f14ce6703a0..05a9d3ac0c861 100644 --- a/doc/source/whatsnew/v0.18.0.txt +++ b/doc/source/whatsnew/v0.18.0.txt @@ -19,6 +19,7 @@ Highlights include: - Window functions are now methods on ``.groupby`` like objects, see :ref:`here `. - ``pd.test()`` top-level nose test runner is available (:issue:`4327`) +- Adding support for a ``RangeIndex`` as a specialized form of the ``Int64Index`` for memory savings, see :ref:`here `. Check the :ref:`API Changes ` and :ref:`deprecations ` before updating. @@ -102,6 +103,39 @@ And multiple aggregations r.agg({'A' : ['mean','std'], 'B' : ['mean','std']}) +.. _whatsnew_0180.enhancements.rangeindex: + +Range Index +^^^^^^^^^^^ + +A ``RangeIndex`` has been added to the ``Int64Index`` sub-classes to support a memory saving alternative for common use cases. This has a similar implementation to the python ``range`` object (``xrange`` in python 2), in that it only stores the start, stop, and step values for the index. It will transparently interact with the user API, converting to ``Int64Index`` if needed. + +This will now be the default constructed index for ``NDFrame`` objects, rather than previous an ``Int64Index``. (:issue:`939`) + +Previous Behavior: + +.. code-block:: python + + In [3]: s = Series(range(1000)) + + In [4]: s.index + Out[4]: + Int64Index([ 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, + ... + 990, 991, 992, 993, 994, 995, 996, 997, 998, 999], dtype='int64', length=1000) + + In [6]: s.index.nbytes + Out[6]: 8000 + + +New Behavior: + +.. ipython:: python + + s = Series(range(1000)) + s.index + s.index.nbytes + .. _whatsnew_0180.enhancements.other: Other enhancements diff --git a/pandas/core/api.py b/pandas/core/api.py index e2ac57e37cba6..0c463d1a201b9 100644 --- a/pandas/core/api.py +++ b/pandas/core/api.py @@ -8,7 +8,8 @@ from pandas.core.categorical import Categorical from pandas.core.groupby import Grouper from pandas.core.format import set_eng_float_format -from pandas.core.index import Index, CategoricalIndex, Int64Index, Float64Index, MultiIndex +from pandas.core.index import (Index, CategoricalIndex, Int64Index, + RangeIndex, Float64Index, MultiIndex) from pandas.core.series import Series, TimeSeries from pandas.core.frame import DataFrame diff --git a/pandas/core/common.py b/pandas/core/common.py index b80b7eecaeb11..7f955002a2c68 100644 --- a/pandas/core/common.py +++ b/pandas/core/common.py @@ -84,6 +84,8 @@ def _check(cls, inst): ABCIndex = create_pandas_abc_type("ABCIndex", "_typ", ("index", )) ABCInt64Index = create_pandas_abc_type("ABCInt64Index", "_typ", ("int64index", )) +ABCRangeIndex = create_pandas_abc_type("ABCRangeIndex", "_typ", + ("rangeindex", )) ABCFloat64Index = create_pandas_abc_type("ABCFloat64Index", "_typ", ("float64index", )) ABCMultiIndex = create_pandas_abc_type("ABCMultiIndex", "_typ", @@ -97,7 +99,8 @@ def _check(cls, inst): ABCCategoricalIndex = create_pandas_abc_type("ABCCategoricalIndex", "_typ", ("categoricalindex", )) ABCIndexClass = create_pandas_abc_type("ABCIndexClass", "_typ", - ("index", "int64index", "float64index", + ("index", "int64index", "rangeindex", + "float64index", "multiindex", "datetimeindex", "timedeltaindex", "periodindex", "categoricalindex")) @@ -1796,11 +1799,8 @@ def is_bool_indexer(key): def _default_index(n): - from pandas.core.index import Int64Index - values = np.arange(n, dtype=np.int64) - result = Int64Index(values, name=None) - result.is_unique = True - return result + from pandas.core.index import RangeIndex + return RangeIndex(0, n, name=None) def ensure_float(arr): diff --git a/pandas/core/dtypes.py b/pandas/core/dtypes.py index 1e358694de63e..e6adbc8500117 100644 --- a/pandas/core/dtypes.py +++ b/pandas/core/dtypes.py @@ -214,5 +214,6 @@ def __eq__(self, other): if isinstance(other, compat.string_types): return other == self.name - return isinstance(other, DatetimeTZDtype) and self.unit == other.unit \ - and self.tz == other.tz + return isinstance(other, DatetimeTZDtype) and \ + self.unit == other.unit and \ + str(self.tz) == str(other.tz) diff --git a/pandas/core/frame.py b/pandas/core/frame.py index 273166db12142..7f53e08b7c38b 100644 --- a/pandas/core/frame.py +++ b/pandas/core/frame.py @@ -5325,7 +5325,7 @@ def extract_index(data): (lengths[0], len(index))) raise ValueError(msg) else: - index = Index(np.arange(lengths[0])) + index = _default_index(lengths[0]) return _ensure_index(index) @@ -5538,11 +5538,11 @@ def convert(arr): def _get_names_from_index(data): - index = lrange(len(data)) has_some_name = any([getattr(s, 'name', None) is not None for s in data]) if not has_some_name: - return index + return _default_index(len(data)) + index = lrange(len(data)) count = 0 for i, s in enumerate(data): n = getattr(s, 'name', None) diff --git a/pandas/core/index.py b/pandas/core/index.py index 3832d0c69ed0e..63b748ada6afa 100644 --- a/pandas/core/index.py +++ b/pandas/core/index.py @@ -3,6 +3,8 @@ import warnings import operator from functools import partial +from math import ceil, floor + from sys import getsizeof import numpy as np @@ -15,18 +17,23 @@ from pandas.compat import range, zip, lrange, lzip, u, map from pandas import compat from pandas.core import algorithms -from pandas.core.base import PandasObject, FrozenList, FrozenNDArray, IndexOpsMixin, PandasDelegate +from pandas.core.base import (PandasObject, FrozenList, FrozenNDArray, + IndexOpsMixin, PandasDelegate) import pandas.core.base as base from pandas.util.decorators import (Appender, Substitution, cache_readonly, deprecate, deprecate_kwarg) import pandas.core.common as com from pandas.core.missing import _clean_reindex_fill_method -from pandas.core.common import (isnull, array_equivalent, is_dtype_equal, is_object_dtype, - is_datetimetz, ABCSeries, ABCCategorical, ABCPeriodIndex, - _values_from_object, is_float, is_integer, is_iterator, is_categorical_dtype, +from pandas.core.common import (isnull, array_equivalent, is_dtype_equal, + is_object_dtype, is_datetimetz, ABCSeries, + ABCCategorical, ABCPeriodIndex, + _values_from_object, is_float, is_integer, + is_iterator, is_categorical_dtype, _ensure_object, _ensure_int64, is_bool_indexer, - is_list_like, is_bool_dtype, is_null_slice, is_integer_dtype) + is_list_like, is_bool_dtype, is_null_slice, + is_integer_dtype, is_int64_dtype) from pandas.core.strings import StringAccessorMixin + from pandas.core.config import get_option from pandas.io.common import PerformanceWarning @@ -123,22 +130,33 @@ def __new__(cls, data=None, dtype=None, copy=False, name=None, fastpath=False, if name is None and hasattr(data, 'name'): name = data.name - # no class inference! if fastpath: return cls._simple_new(data, name) + # range + if isinstance(data, RangeIndex): + return RangeIndex(start=data, copy=copy, dtype=dtype, name=name) + elif isinstance(data, range): + return RangeIndex.from_range(data, copy=copy, dtype=dtype, + name=name) + + # categorical if is_categorical_dtype(data) or is_categorical_dtype(dtype): return CategoricalIndex(data, copy=copy, name=name, **kwargs) - if isinstance(data, (np.ndarray, Index, ABCSeries)): + # index-like + elif isinstance(data, (np.ndarray, Index, ABCSeries)): + + if issubclass(data.dtype.type, + np.datetime64) or is_datetimetz(data): - if issubclass(data.dtype.type, np.datetime64) or is_datetimetz(data): from pandas.tseries.index import DatetimeIndex result = DatetimeIndex(data, copy=copy, name=name, **kwargs) if dtype is not None and _o_dtype == dtype: return Index(result.to_pydatetime(), dtype=_o_dtype) else: return result + elif issubclass(data.dtype.type, np.timedelta64): from pandas.tseries.tdi import TimedeltaIndex result = TimedeltaIndex(data, copy=copy, name=name, **kwargs) @@ -327,7 +345,8 @@ def is_(self, other): True if both have same underlying data, False otherwise : bool """ # use something other than None to be clearer - return self._id is getattr(other, '_id', Ellipsis) + return self._id is getattr( + other, '_id', Ellipsis) and self._id is not None def _reset_identity(self): """Initializes or resets ``_id`` attribute with new object""" @@ -455,14 +474,14 @@ def _coerce_scalar_to_index(self, item): """ return Index([item], dtype=self.dtype, **self._get_attributes_dict()) - def copy(self, names=None, name=None, dtype=None, deep=False): - """ + _index_shared_docs['copy'] = """ Make a copy of this object. Name and dtype sets those attributes on the new object. Parameters ---------- name : string, optional + deep : boolean, default False dtype : numpy dtype or pandas type Returns @@ -474,6 +493,10 @@ def copy(self, names=None, name=None, dtype=None, deep=False): In most cases, there should be no functional difference from using ``deep``, but if ``deep`` is passed it will attempt to deepcopy. """ + + @Appender(_index_shared_docs['copy']) + def copy(self, name=None, deep=False, dtype=None, **kwargs): + names = kwargs.get('names') if names is not None and name is not None: raise TypeError("Can only provide one of `names` and `name`") if deep: @@ -1060,9 +1083,9 @@ def _invalid_indexer(self, form, key): """ consistent invalid indexer message """ raise TypeError("cannot do {form} indexing on {klass} with these " "indexers [{key}] of {kind}".format(form=form, - klass=type(self), - key=key, - kind=type(key))) + klass=type(self), + key=key, + kind=type(key))) def get_duplicates(self): from collections import defaultdict @@ -1076,6 +1099,10 @@ def get_duplicates(self): def _cleanup(self): self._engine.clear_mapping() + @cache_readonly + def _constructor(self): + return type(self) + @cache_readonly def _engine(self): # property, for now, slow to look up @@ -1639,7 +1666,7 @@ def union(self, other): def _wrap_union_result(self, other, result): name = self.name if self.name == other.name else None - return self.__class__(data=result, name=name) + return self.__class__(result, name=name) def intersection(self, other): """ @@ -2158,6 +2185,7 @@ def reindex(self, target, method=None, level=None, limit=None, # GH7774: preserve dtype/tz if target is empty and not an Index. target = _ensure_has_len(target) # target may be an iterator + if not isinstance(target, Index) and len(target) == 0: attrs = self._get_attributes_dict() attrs.pop('freq', None) # don't preserve freq @@ -2221,9 +2249,9 @@ def _reindex_non_unique(self, target): missing = com._ensure_platform_int(missing) missing_labels = target.take(missing) - missing_indexer = com._ensure_int64(l[~check]) + missing_indexer = _ensure_int64(l[~check]) cur_labels = self.take(indexer[check])._values - cur_indexer = com._ensure_int64(l[check]) + cur_indexer = _ensure_int64(l[check]) new_labels = np.empty(tuple([len(indexer)]), dtype=object) new_labels[cur_indexer] = cur_labels @@ -2442,7 +2470,7 @@ def _get_leaf_sorter(labels): return np.empty(0, dtype='int64') if len(labels) == 1: - lab = com._ensure_int64(labels[0]) + lab = _ensure_int64(labels[0]) sorter, _ = groupsort_indexer(lab, 1 + lab.max()) return sorter @@ -2453,8 +2481,8 @@ def _get_leaf_sorter(labels): tic |= lab[:-1] != lab[1:] starts = np.hstack(([True], tic, [True])).nonzero()[0] - lab = com._ensure_int64(labels[-1]) - return lib.get_level_sorter(lab, com._ensure_int64(starts)) + lab = _ensure_int64(labels[-1]) + return lib.get_level_sorter(lab, _ensure_int64(starts)) if isinstance(self, MultiIndex) and isinstance(other, MultiIndex): raise TypeError('Join on level between two MultiIndex objects ' @@ -2486,7 +2514,7 @@ def _get_leaf_sorter(labels): join_index = left[left_indexer] else: - left_lev_indexer = com._ensure_int64(left_lev_indexer) + left_lev_indexer = _ensure_int64(left_lev_indexer) rev_indexer = lib.get_reverse_indexer(left_lev_indexer, len(old_level)) @@ -2956,6 +2984,7 @@ def invalid_op(self, other=None): invalid_op.__name__ = name return invalid_op + cls.__pow__ = cls.__rpow__ = _make_invalid_op('__pow__') cls.__mul__ = cls.__rmul__ = _make_invalid_op('__mul__') cls.__floordiv__ = cls.__rfloordiv__ = _make_invalid_op('__floordiv__') cls.__truediv__ = cls.__rtruediv__ = _make_invalid_op('__truediv__') @@ -2970,40 +2999,82 @@ def _maybe_update_attributes(self, attrs): """ Update Index attributes (e.g. freq) depending on op """ return attrs + def _validate_for_numeric_unaryop(self, op, opstr): + """ validate if we can perform a numeric unary operation """ + + if not self._is_numeric_dtype: + raise TypeError("cannot evaluate a numeric op " + "{opstr} for type: {typ}".format( + opstr=opstr, + typ=type(self)) + ) + + def _validate_for_numeric_binop(self, other, op, opstr): + """ + return valid other, evaluate or raise TypeError + if we are not of the appropriate type + + internal method called by ops + """ + from pandas.tseries.offsets import DateOffset + + # if we are an inheritor of numeric, + # but not actually numeric (e.g. DatetimeIndex/PeriodInde) + if not self._is_numeric_dtype: + raise TypeError("cannot evaluate a numeric op {opstr} " + "for type: {typ}".format( + opstr=opstr, + typ=type(self)) + ) + + if isinstance(other, Index): + if not other._is_numeric_dtype: + raise TypeError("cannot evaluate a numeric op " + "{opstr} with type: {typ}".format( + opstr=type(self), + typ=type(other)) + ) + elif isinstance(other, np.ndarray) and not other.ndim: + other = other.item() + + if isinstance(other, (Index, ABCSeries, np.ndarray)): + if len(self) != len(other): + raise ValueError("cannot evaluate a numeric op with " + "unequal lengths") + other = _values_from_object(other) + if other.dtype.kind not in ['f', 'i']: + raise TypeError("cannot evaluate a numeric op " + "with a non-numeric dtype") + elif isinstance(other, (DateOffset, np.timedelta64, + Timedelta, datetime.timedelta)): + # higher up to handle + pass + elif isinstance(other, (Timestamp, np.datetime64)): + # higher up to handle + pass + else: + if not (is_float(other) or is_integer(other)): + raise TypeError("can only perform ops with scalar values") + + return other + @classmethod - def _add_numeric_methods(cls): + def _add_numeric_methods_binary(cls): """ add in numeric methods """ def _make_evaluate_binop(op, opstr, reversed=False): def _evaluate_numeric_binop(self, other): - import pandas.tseries.offsets as offsets - - # if we are an inheritor of numeric, but not actually numeric (e.g. DatetimeIndex/PeriodInde) - if not self._is_numeric_dtype: - raise TypeError("cannot evaluate a numeric op {opstr} for type: {typ}".format(opstr=opstr, - typ=type(self))) - - if isinstance(other, Index): - if not other._is_numeric_dtype: - raise TypeError("cannot evaluate a numeric op {opstr} with type: {typ}".format(opstr=type(self), - typ=type(other))) - elif isinstance(other, np.ndarray) and not other.ndim: - other = other.item() - if isinstance(other, (Index, ABCSeries, np.ndarray)): - if len(self) != len(other): - raise ValueError("cannot evaluate a numeric op with unequal lengths") - other = _values_from_object(other) - if other.dtype.kind not in ['f','i']: - raise TypeError("cannot evaluate a numeric op with a non-numeric dtype") - elif isinstance(other, (offsets.DateOffset, np.timedelta64, Timedelta, datetime.timedelta)): + from pandas.tseries.offsets import DateOffset + other = self._validate_for_numeric_binop(other, op, opstr) + + # handle time-based others + if isinstance(other, (DateOffset, np.timedelta64, + Timedelta, datetime.timedelta)): return self._evaluate_with_timedelta_like(other, op, opstr) elif isinstance(other, (Timestamp, np.datetime64)): return self._evaluate_with_datetime_like(other, op, opstr) - else: - if not (is_float(other) or is_integer(other)): - raise TypeError("can only perform ops with scalar values") # if we are a reversed non-communative op values = self.values @@ -3016,28 +3087,18 @@ def _evaluate_numeric_binop(self, other): return _evaluate_numeric_binop - def _make_evaluate_unary(op, opstr): - - def _evaluate_numeric_unary(self): - - # if we are an inheritor of numeric, but not actually numeric (e.g. DatetimeIndex/PeriodInde) - if not self._is_numeric_dtype: - raise TypeError("cannot evaluate a numeric op {opstr} for type: {typ}".format(opstr=opstr, - typ=type(self))) - attrs = self._get_attributes_dict() - attrs = self._maybe_update_attributes(attrs) - return Index(op(self.values), **attrs) - - return _evaluate_numeric_unary - cls.__add__ = cls.__radd__ = _make_evaluate_binop( operator.add, '__add__') - cls.__sub__ = _make_evaluate_binop(operator.sub, '__sub__') + cls.__sub__ = _make_evaluate_binop( + operator.sub, '__sub__') cls.__rsub__ = _make_evaluate_binop( operator.sub, '__sub__', reversed=True) cls.__mul__ = cls.__rmul__ = _make_evaluate_binop( operator.mul, '__mul__') - cls.__mod__ = _make_evaluate_binop(operator.mod, '__mod__') + cls.__pow__ = cls.__rpow__ = _make_evaluate_binop( + operator.pow, '__pow__') + cls.__mod__ = _make_evaluate_binop( + operator.mod, '__mod__') cls.__floordiv__ = _make_evaluate_binop( operator.floordiv, '__floordiv__') cls.__rfloordiv__ = _make_evaluate_binop( @@ -3051,11 +3112,32 @@ def _evaluate_numeric_unary(self): operator.div, '__div__') cls.__rdiv__ = _make_evaluate_binop( operator.div, '__div__', reversed=True) + + @classmethod + def _add_numeric_methods_unary(cls): + """ add in numeric unary methods """ + + def _make_evaluate_unary(op, opstr): + + def _evaluate_numeric_unary(self): + + self._validate_for_numeric_unaryop(op, opstr) + attrs = self._get_attributes_dict() + attrs = self._maybe_update_attributes(attrs) + return Index(op(self.values), **attrs) + + return _evaluate_numeric_unary + cls.__neg__ = _make_evaluate_unary(lambda x: -x, '__neg__') cls.__pos__ = _make_evaluate_unary(lambda x: x, '__pos__') cls.__abs__ = _make_evaluate_unary(np.abs, '__abs__') cls.__inv__ = _make_evaluate_unary(lambda x: -x, '__inv__') + @classmethod + def _add_numeric_methods(cls): + cls._add_numeric_methods_unary() + cls._add_numeric_methods_binary() + @classmethod def _add_logical_methods(cls): """ add in logical methods """ @@ -3828,6 +3910,560 @@ def _wrap_joined_index(self, joined, other): Int64Index._add_logical_methods() +class RangeIndex(Int64Index): + + """ + Immutable Index implementing a monotonic range. RangeIndex is a + memory-saving special case of Int64Index limited to representing + monotonic ranges. + + Parameters + ---------- + start : int (default: 0) + stop : int (default: 0) + step : int (default: 1) + name : object, optional + Name to be stored in the index + copy : bool, default False + Make a copy of input if its a RangeIndex + + """ + + _typ = 'rangeindex' + _engine_type = _index.Int64Engine + + def __new__(cls, start=None, stop=None, step=None, name=None, dtype=None, + fastpath=False, copy=False, **kwargs): + + if fastpath: + return cls._simple_new(start, stop, step, name=name) + + cls._validate_dtype(dtype) + + # RangeIndex + if isinstance(start, RangeIndex): + if not copy: + return start + if name is None: + name = getattr(start, 'name', None) + start, stop, step = start._start, start._stop, start._step + + # validate the arguments + def _ensure_int(value, field): + try: + new_value = int(value) + except: + new_value = value + + if not is_integer(new_value) or new_value != value: + raise TypeError("RangeIndex(...) must be called with integers," + " {value} was passed for {field}".format( + value=type(value).__name__, + field=field) + ) + + return new_value + + if start is None: + start = 0 + else: + start = _ensure_int(start, 'start') + if stop is None: + stop = start + start = 0 + else: + stop = _ensure_int(stop, 'stop') + if step is None: + step = 1 + elif step == 0: + raise ValueError("Step must not be zero") + else: + step = _ensure_int(step, 'step') + + return cls._simple_new(start, stop, step, name) + + @classmethod + def from_range(cls, data, name=None, dtype=None, **kwargs): + """ create RangeIndex from a range (py3), or xrange (py2) object """ + if not isinstance(data, range): + raise TypeError( + '{0}(...) must be called with object coercible to a ' + 'range, {1} was passed'.format(cls.__name__, repr(data))) + + if compat.PY3: + step = data.step + stop = data.stop + start = data.start + else: + # seems we only have indexing ops to infer + # rather than direct accessors + if len(data) > 1: + step = data[1] - data[0] + stop = data[-1] + step + start = data[0] + elif len(data): + start = data[0] + stop = data[0] + 1 + step = 1 + else: + start = stop = 0 + step = 1 + return RangeIndex(start, stop, step, dtype=dtype, name=name, **kwargs) + + @classmethod + def _simple_new(cls, start, stop=None, step=None, name=None, + dtype=None, **kwargs): + result = object.__new__(cls) + + # handle passed None, non-integers + if start is None or not is_integer(start): + try: + return RangeIndex(start, stop, step, name=name, **kwargs) + except TypeError: + return Index(start, stop, step, name=name, **kwargs) + + result._start = start + result._stop = stop or 0 + result._step = step or 1 + result.name = name + for k, v in compat.iteritems(kwargs): + setattr(result, k, v) + + result._reset_identity() + return result + + @staticmethod + def _validate_dtype(dtype): + """ require dtype to be None or int64 """ + if not (dtype is None or is_int64_dtype(dtype)): + raise TypeError('Invalid to pass a non-int64 dtype to RangeIndex') + + @cache_readonly + def _constructor(self): + """ return the class to use for construction """ + return Int64Index + + @cache_readonly + def _data(self): + return np.arange(self._start, self._stop, self._step, dtype=np.int64) + + @cache_readonly + def _int64index(self): + return Int64Index(self._data, name=self.name, fastpath=True) + + def _get_data_as_items(self): + """ return a list of tuples of start, stop, step """ + return [('start', self._start), + ('stop', self._stop), + ('step', self._step)] + + def __reduce__(self): + d = self._get_attributes_dict() + d.update(dict(self._get_data_as_items())) + return _new_Index, (self.__class__, d), None + + def _format_attrs(self): + """ + Return a list of tuples of the (attr, formatted_value) + """ + attrs = self._get_data_as_items() + if self.name is not None: + attrs.append(('name', default_pprint(self.name))) + return attrs + + def _format_data(self): + # we are formatting thru the attributes + return None + + @cache_readonly + def nbytes(self): + """ return the number of bytes in the underlying data """ + return sum([getsizeof(getattr(self, v)) for v in + ['_start', '_stop', '_step']]) + + def memory_usage(self, deep=False): + """ + Memory usage of my values + + Parameters + ---------- + deep : bool + Introspect the data deeply, interrogate + `object` dtypes for system-level memory consumption + + Returns + ------- + bytes used + + Notes + ----- + Memory usage does not include memory consumed by elements that + are not components of the array if deep=False + + See Also + -------- + numpy.ndarray.nbytes + """ + return self.nbytes + + @property + def dtype(self): + return np.dtype(np.int64) + + @property + def is_unique(self): + """ return if the index has unique values """ + return True + + @property + def has_duplicates(self): + return False + + def tolist(self): + return lrange(self._start, self._stop, self._step) + + def _shallow_copy(self, values=None, **kwargs): + """ create a new Index, don't copy the data, use the same object attributes + with passed in attributes taking precedence """ + if values is None: + return RangeIndex(name=self.name, fastpath=True, + **dict(self._get_data_as_items())) + else: + kwargs.setdefault('name', self.name) + return self._int64index._shallow_copy(values, **kwargs) + + @Appender(_index_shared_docs['copy']) + def copy(self, name=None, deep=False, dtype=None, **kwargs): + self._validate_dtype(dtype) + if name is None: + name = self.name + return RangeIndex(name=name, fastpath=True, + **dict(self._get_data_as_items())) + + def argsort(self, *args, **kwargs): + """ + return an ndarray indexer of the underlying data + + See also + -------- + numpy.ndarray.argsort + """ + if self._step > 0: + return np.arange(len(self)) + else: + return np.arange(len(self) - 1, -1, -1) + + def equals(self, other): + """ + Determines if two Index objects contain the same elements. + """ + if isinstance(other, RangeIndex): + ls = len(self) + lo = len(other) + return (ls == lo == 0 or + ls == lo == 1 and + self._start == other._start or + ls == lo and + self._start == other._start and + self._step == other._step) + + return super(RangeIndex, self).equals(other) + + def intersection(self, other): + """ + Form the intersection of two Index objects. Sortedness of the result is + not guaranteed + + Parameters + ---------- + other : Index or array-like + + Returns + ------- + intersection : Index + """ + if not isinstance(other, RangeIndex): + return super(RangeIndex, self).intersection(other) + + # check whether intervals intersect + # deals with in- and decreasing ranges + int_low = max(min(self._start, self._stop + 1), + min(other._start, other._stop + 1)) + int_high = min(max(self._stop, self._start + 1), + max(other._stop, other._start + 1)) + if int_high <= int_low: + return RangeIndex() + + # Method hint: linear Diophantine equation + # solve intersection problem + # performance hint: for identical step sizes, could use + # cheaper alternative + gcd, s, t = self._extended_gcd(self._step, other._step) + + # check whether element sets intersect + if (self._start - other._start) % gcd: + return RangeIndex() + + # calculate parameters for the RangeIndex describing the + # intersection disregarding the lower bounds + tmp_start = self._start + (other._start - self._start) * \ + self._step // gcd * s + new_step = self._step * other._step // gcd + new_index = RangeIndex(tmp_start, int_high, new_step, fastpath=True) + + # adjust index to limiting interval + new_index._start = new_index._min_fitting_element(int_low) + return new_index + + def _min_fitting_element(self, lower_limit): + """Returns the value of the smallest element greater than the limit""" + round = ceil if self._step > 0 else floor + no_steps = round((float(lower_limit) - self._start) / self._step) + return self._start + self._step * no_steps + + def _max_fitting_element(self, upper_limit): + """Returns the value of the largest element smaller than the limit""" + round = floor if self._step > 0 else ceil + no_steps = round((float(upper_limit) - self._start) / self._step) + return self._start + self._step * no_steps + + def _extended_gcd(self, a, b): + """ + Extended Euclidean algorithms to solve Bezout's identity: + a*x + b*y = gcd(x, y) + Finds one particular solution for x, y: s, t + Returns: gcd, s, t + """ + s, old_s = 0, 1 + t, old_t = 1, 0 + r, old_r = b, a + while r: + quotient = old_r // r + old_r, r = r, old_r - quotient * r + old_s, s = s, old_s - quotient * s + old_t, t = t, old_t - quotient * t + return old_r, old_s, old_t + + def union(self, other): + """ + Form the union of two Index objects and sorts if possible + + Parameters + ---------- + other : Index or array-like + + Returns + ------- + union : Index + """ + # note: could return a RangeIndex in some circumstances + return self._int64index.union(other) + + def join(self, other, how='left', level=None, return_indexers=False): + """ + *this is an internal non-public method* + + Compute join_index and indexers to conform data + structures to the new index. + + Parameters + ---------- + other : Index + how : {'left', 'right', 'inner', 'outer'} + level : int or level name, default None + return_indexers : boolean, default False + + Returns + ------- + join_index, (left_indexer, right_indexer) + """ + if how == 'outer' and self is not other: + # note: could return RangeIndex in more circumstances + return self._int64index.join(other, how, level, return_indexers) + + return super(RangeIndex, self).join(other, how, level, return_indexers) + + def __len__(self): + """ + return the length of the RangeIndex + """ + return max(0, -(-(self._stop - self._start) // self._step)) + + @property + def size(self): + return len(self) + + def __getitem__(self, key): + """ + Conserve RangeIndex type for scalar and slice keys. + """ + super_getitem = super(RangeIndex, self).__getitem__ + + if np.isscalar(key): + n = int(key) + if n != key: + return super_getitem(key) + if n < 0: + n = len(self) + key + if n < 0 or n > len(self) - 1: + raise IndexError("index {key} is out of bounds for axis 0 " + "with size {size}".format(key=key, + size=len(self))) + return self._start + n * self._step + + if isinstance(key, slice): + + # This is basically PySlice_GetIndicesEx, but delegation to our + # super routines if we don't have integers + + l = len(self) + + # complete missing slice information + step = 1 if key.step is None else key.step + if key.start is None: + start = l - 1 if step < 0 else 0 + else: + start = key.start + + if start < 0: + start += l + if start < 0: + start = -1 if step < 0 else 0 + if start >= l: + start = l - 1 if step < 0 else l + + if key.stop is None: + stop = -1 if step < 0 else l + else: + stop = key.stop + + if stop < 0: + stop += l + if stop < 0: + stop = -1 + if stop > l: + stop = l + + # delegate non-integer slices + if (start != int(start) and + stop != int(stop) and + step != int(step)): + return super_getitem(key) + + # convert indexes to values + start = self._start + self._step * start + stop = self._start + self._step * stop + step = self._step * step + + return RangeIndex(start, stop, step, self.name, fastpath=True) + + # fall back to Int64Index + return super_getitem(key) + + @classmethod + def _add_numeric_methods_binary(cls): + """ add in numeric methods, specialized to RangeIndex """ + + def _make_evaluate_binop(op, opstr, reversed=False, step=False): + """ + Parameters + ---------- + op : callable that accepts 2 parms + perform the binary op + opstr : string + string name of ops + reversed : boolean, default False + if this is a reversed op, e.g. radd + step : callable, optional, default to False + op to apply to the step parm if not None + if False, use the existing step + """ + + def _evaluate_numeric_binop(self, other): + + other = self._validate_for_numeric_binop(other, op, opstr) + attrs = self._get_attributes_dict() + attrs = self._maybe_update_attributes(attrs) + + if reversed: + self, other = other, self + + try: + # alppy if we have an override + if step: + rstep = step(self._step, other) + + # we don't have a representable op + # so return a base index + if not is_integer(rstep) or not rstep: + raise ValueError + + else: + rstep = self._step + + rstart = op(self._start, other) + rstop = op(self._stop, other) + + result = RangeIndex(rstart, + rstop, + rstep, + **attrs) + + # for compat with numpy / Int64Index + # even if we can represent as a RangeIndex, return + # as a Float64Index if we have float-like descriptors + if not all([is_integer(x) for x in + [rstart, rstop, rstep]]): + result = result.astype('float64') + + return result + + except (ValueError, TypeError, AttributeError): + pass + + # convert to Int64Index ops + if isinstance(self, RangeIndex): + self = self.values + if isinstance(other, RangeIndex): + other = other.values + + return Index(op(self, other), **attrs) + + return _evaluate_numeric_binop + + cls.__add__ = cls.__radd__ = _make_evaluate_binop( + operator.add, '__add__') + cls.__sub__ = _make_evaluate_binop(operator.sub, '__sub__') + cls.__rsub__ = _make_evaluate_binop( + operator.sub, '__sub__', reversed=True) + cls.__mul__ = cls.__rmul__ = _make_evaluate_binop( + operator.mul, + '__mul__', + step=operator.mul) + cls.__truediv__ = _make_evaluate_binop( + operator.truediv, + '__truediv__', + step=operator.truediv) + cls.__rtruediv__ = _make_evaluate_binop( + operator.truediv, + '__truediv__', + reversed=True, + step=operator.truediv) + if not compat.PY3: + cls.__div__ = _make_evaluate_binop( + operator.div, + '__div__', + step=operator.div) + cls.__rdiv__ = _make_evaluate_binop( + operator.div, + '__div__', + reversed=True, + step=operator.div) + +RangeIndex._add_numeric_methods() +RangeIndex._add_logical_methods() + + class Float64Index(NumericIndex): """ @@ -4658,10 +5294,12 @@ def get_level_values(self, level): num = self._get_level_number(level) unique = self.levels[num] # .values labels = self.labels[num] - filled = com.take_1d(unique._values, labels, fill_value=unique._na_value) - values = unique._simple_new(filled, self.names[num], - freq=getattr(unique, 'freq', None), - tz=getattr(unique, 'tz', None)) + filled = com.take_1d(unique.values, labels, + fill_value=unique._na_value) + _simple_new = unique._simple_new + values = _simple_new(filled, self.names[num], + freq=getattr(unique, 'freq', None), + tz=getattr(unique, 'tz', None)) return values def format(self, space=2, sparsify=None, adjoin=True, names=False, @@ -5740,7 +6378,7 @@ def convert_indexer(start, stop, step, indexer=indexer, labels=labels): # a partial date slicer on a DatetimeIndex generates a slice # note that the stop ALREADY includes the stopped point (if # it was a string sliced) - return convert_indexer(start.start,stop.stop,step) + return convert_indexer(start.start, stop.stop, step) elif level > 0 or self.lexsort_depth == 0 or step is not None: # need to have like semantics here to right diff --git a/pandas/core/series.py b/pandas/core/series.py index ed5b9093681f1..73e645039506f 100644 --- a/pandas/core/series.py +++ b/pandas/core/series.py @@ -110,7 +110,7 @@ class Series(base.IndexOpsMixin, strings.StringAccessorMixin, generic.NDFrame,): index : array-like or Index (1d) Values must be unique and hashable, same length as data. Index object (or other iterable of same length as data) Will default to - np.arange(len(data)) if not provided. If both a dict and index + RangeIndex(len(data)) if not provided. If both a dict and index sequence are used, the index will override the keys found in the dict. dtype : numpy.dtype or None @@ -920,7 +920,7 @@ def reset_index(self, level=None, drop=False, name=None, inplace=False): resetted : DataFrame, or Series if drop == True """ if drop: - new_index = np.arange(len(self)) + new_index = _default_index(len(self)) if level is not None and isinstance(self.index, MultiIndex): if not isinstance(level, (tuple, list)): level = [level] @@ -1706,7 +1706,7 @@ def _try_kind_sort(arr): bad = isnull(arr) good = ~bad - idx = np.arange(len(self)) + idx = _default_index(len(self)) argsorted = _try_kind_sort(arr[good]) diff --git a/pandas/io/packers.py b/pandas/io/packers.py index d5c02736a1cf5..0ba1254659540 100644 --- a/pandas/io/packers.py +++ b/pandas/io/packers.py @@ -49,8 +49,8 @@ from pandas.compat import u, PY3 from pandas import ( Timestamp, Period, Series, DataFrame, Panel, Panel4D, - Index, MultiIndex, Int64Index, PeriodIndex, DatetimeIndex, Float64Index, - NaT + Index, MultiIndex, Int64Index, RangeIndex, PeriodIndex, + DatetimeIndex, Float64Index, NaT ) from pandas.sparse.api import SparseSeries, SparseDataFrame, SparsePanel from pandas.sparse.array import BlockIndex, IntIndex @@ -273,7 +273,14 @@ def encode(obj): tobj = type(obj) if isinstance(obj, Index): - if isinstance(obj, PeriodIndex): + if isinstance(obj, RangeIndex): + return {'typ': 'range_index', + 'klass': obj.__class__.__name__, + 'name': getattr(obj, 'name', None), + 'start': getattr(obj, '_start', None), + 'stop': getattr(obj, '_stop', None), + 'step': getattr(obj, '_step', None)} + elif isinstance(obj, PeriodIndex): return {'typ': 'period_index', 'klass': obj.__class__.__name__, 'name': getattr(obj, 'name', None), @@ -464,6 +471,11 @@ def decode(obj): data = unconvert(obj['data'], dtype, obj.get('compress')) return globals()[obj['klass']](data, dtype=dtype, name=obj['name']) + elif typ == 'range_index': + return globals()[obj['klass']](obj['start'], + obj['stop'], + obj['step'], + name=obj['name']) elif typ == 'multi_index': dtype = dtype_for(obj['dtype']) data = unconvert(obj['data'], dtype, diff --git a/pandas/io/tests/test_json/test_pandas.py b/pandas/io/tests/test_json/test_pandas.py index 5f41a803538e6..1690667ef743b 100644 --- a/pandas/io/tests/test_json/test_pandas.py +++ b/pandas/io/tests/test_json/test_pandas.py @@ -729,7 +729,7 @@ def test_misc_example(self): DataFrame\\.index values are different \\(100\\.0 %\\) \\[left\\]: Index\\(\\[u?'a', u?'b'\\], dtype='object'\\) -\\[right\\]: Int64Index\\(\\[0, 1\\], dtype='int64'\\)""" +\\[right\\]: RangeIndex\\(start=0, stop=2, step=1\\)""" with tm.assertRaisesRegexp(AssertionError, error_msg): assert_frame_equal(result, expected, check_index_type=False) diff --git a/pandas/io/tests/test_packers.py b/pandas/io/tests/test_packers.py index 61b24c858b60d..bdbcb9c0d0d3e 100644 --- a/pandas/io/tests/test_packers.py +++ b/pandas/io/tests/test_packers.py @@ -253,6 +253,7 @@ def setUp(self): 'string': tm.makeStringIndex(100), 'date': tm.makeDateIndex(100), 'int': tm.makeIntIndex(100), + 'rng': tm.makeRangeIndex(100), 'float': tm.makeFloatIndex(100), 'empty': Index([]), 'tuple': Index(zip(['foo', 'bar', 'baz'], [1, 2, 3])), diff --git a/pandas/io/tests/test_pytables.py b/pandas/io/tests/test_pytables.py index c13afb34dfb84..38f5150516551 100644 --- a/pandas/io/tests/test_pytables.py +++ b/pandas/io/tests/test_pytables.py @@ -10,8 +10,10 @@ import pandas import pandas as pd -from pandas import (Series, DataFrame, Panel, MultiIndex, Categorical, bdate_range, - date_range, timedelta_range, Index, DatetimeIndex, TimedeltaIndex, isnull) +from pandas import (Series, DataFrame, Panel, MultiIndex, Int64Index, + RangeIndex, Categorical, bdate_range, + date_range, timedelta_range, Index, DatetimeIndex, + isnull) from pandas.compat import is_platform_windows, PY3, PY35 from pandas.io.pytables import _tables, TableIterator @@ -1619,34 +1621,51 @@ def test_column_multiindex(self): # GH 4710 # recreate multi-indexes properly - index = MultiIndex.from_tuples([('A','a'), ('A','b'), ('B','a'), ('B','b')], names=['first','second']) - df = DataFrame(np.arange(12).reshape(3,4), columns=index) + index = MultiIndex.from_tuples([('A', 'a'), ('A', 'b'), + ('B', 'a'), ('B', 'b')], + names=['first', 'second']) + df = DataFrame(np.arange(12).reshape(3, 4), columns=index) + expected = df.copy() + if isinstance(expected.index, RangeIndex): + expected.index = Int64Index(expected.index) with ensure_clean_store(self.path) as store: - store.put('df',df) - tm.assert_frame_equal(store['df'],df,check_index_type=True,check_column_type=True) + store.put('df', df) + tm.assert_frame_equal(store['df'], expected, + check_index_type=True, + check_column_type=True) - store.put('df1',df,format='table') - tm.assert_frame_equal(store['df1'],df,check_index_type=True,check_column_type=True) + store.put('df1', df, format='table') + tm.assert_frame_equal(store['df1'], expected, + check_index_type=True, + check_column_type=True) - self.assertRaises(ValueError, store.put, 'df2',df,format='table',data_columns=['A']) - self.assertRaises(ValueError, store.put, 'df3',df,format='table',data_columns=True) + self.assertRaises(ValueError, store.put, 'df2', df, + format='table', data_columns=['A']) + self.assertRaises(ValueError, store.put, 'df3', df, + format='table', data_columns=True) # appending multi-column on existing table (see GH 6167) with ensure_clean_store(self.path) as store: store.append('df2', df) store.append('df2', df) - tm.assert_frame_equal(store['df2'], concat((df,df))) + tm.assert_frame_equal(store['df2'], concat((df, df))) # non_index_axes name - df = DataFrame(np.arange(12).reshape(3,4), columns=Index(list('ABCD'),name='foo')) + df = DataFrame(np.arange(12).reshape(3, 4), + columns=Index(list('ABCD'), name='foo')) + expected = df.copy() + if isinstance(expected.index, RangeIndex): + expected.index = Int64Index(expected.index) with ensure_clean_store(self.path) as store: - store.put('df1',df,format='table') - tm.assert_frame_equal(store['df1'],df,check_index_type=True,check_column_type=True) + store.put('df1', df, format='table') + tm.assert_frame_equal(store['df1'], expected, + check_index_type=True, + check_column_type=True) def test_store_multiindex(self): @@ -2478,11 +2497,6 @@ def test_backwards_compat_without_term_object(self): expected = wp.loc[:, [Timestamp('20000102'), Timestamp('20000103')]] assert_panel_equal(result, expected) - with assert_produces_warning(expected_warning=FutureWarning, - check_stacklevel=False): - result = store.select('wp', [('minor_axis', '=', ['A', 'B'])]) - expected = wp.loc[:, :, ['A', 'B']] - assert_panel_equal(result, expected) def test_same_name_scoping(self): diff --git a/pandas/src/reduce.pyx b/pandas/src/reduce.pyx index be6e11ce70c76..892fee77eb177 100644 --- a/pandas/src/reduce.pyx +++ b/pandas/src/reduce.pyx @@ -179,8 +179,8 @@ cdef class SeriesBinGrouper: if not values.flags.c_contiguous: values = values.copy('C') self.arr = values - self.typ = type(series) - self.ityp = type(series.index) + self.typ = series._constructor + self.ityp = series.index._constructor self.index = series.index.values self.name = getattr(series,'name',None) @@ -306,8 +306,8 @@ cdef class SeriesGrouper: if not values.flags.c_contiguous: values = values.copy('C') self.arr = values - self.typ = type(series) - self.ityp = type(series.index) + self.typ = series._constructor + self.ityp = series.index._constructor self.index = series.index.values self.name = getattr(series,'name',None) diff --git a/pandas/tests/frame/test_repr_info.py b/pandas/tests/frame/test_repr_info.py index a458445081be5..c5c005beeb69e 100644 --- a/pandas/tests/frame/test_repr_info.py +++ b/pandas/tests/frame/test_repr_info.py @@ -344,10 +344,13 @@ def test_info_memory_usage(self): data[i] = np.random.randint(2, size=n).astype(dtype) df = DataFrame(data) df.columns = dtypes + # Ensure df size is as expected + # (cols * rows * bytes) + index size df_size = df.memory_usage().sum() - exp_size = (len(dtypes) + 1) * n * 8 # (cols + index) * rows * bytes + exp_size = len(dtypes) * n * 8 + df.index.nbytes self.assertEqual(df_size, exp_size) + # Ensure number of cols in memory_usage is the same as df size_df = np.size(df.columns.values) + 1 # index=True; default self.assertEqual(size_df, np.size(df.memory_usage())) diff --git a/pandas/tests/test_index.py b/pandas/tests/test_index.py index d0c2d2bd15b4e..4dcc390787908 100644 --- a/pandas/tests/test_index.py +++ b/pandas/tests/test_index.py @@ -2,7 +2,10 @@ # pylint: disable=E1101,E1103,W0232 from datetime import datetime, timedelta, time -from pandas.compat import range, lrange, lzip, u, zip, PY3 +from pandas import compat +from pandas.compat import (long, is_platform_windows, range, + lrange, lzip, u, zip, PY3) +from itertools import combinations import operator import re import nose @@ -12,19 +15,18 @@ import numpy as np from pandas import (period_range, date_range, Categorical, Series, - Index, Float64Index, Int64Index, MultiIndex, - CategoricalIndex, DatetimeIndex, TimedeltaIndex, PeriodIndex) -from pandas.core.index import InvalidIndexError, NumericIndex + DataFrame, Index, Float64Index, Int64Index, RangeIndex, + MultiIndex, CategoricalIndex, DatetimeIndex, + TimedeltaIndex, PeriodIndex) +from pandas.core.index import InvalidIndexError from pandas.util.testing import (assert_almost_equal, assertRaisesRegexp, assert_copy) -from pandas import compat -from pandas.compat import long, is_platform_windows + import pandas.util.testing as tm import pandas.core.config as cf from pandas.tseries.index import _to_m8 -import pandas.tseries.offsets as offsets import pandas as pd from pandas.lib import Timestamp @@ -90,33 +92,34 @@ def test_numeric_compat(self): idx = self.create_index() tm.assertRaisesRegexp(TypeError, "cannot perform __mul__", - lambda : idx * 1) + lambda: idx * 1) tm.assertRaisesRegexp(TypeError, "cannot perform __mul__", - lambda : 1 * idx) + lambda: 1 * idx) - div_err = "cannot perform __truediv__" if compat.PY3 else "cannot perform __div__" + div_err = "cannot perform __truediv__" if PY3 \ + else "cannot perform __div__" tm.assertRaisesRegexp(TypeError, div_err, - lambda : idx / 1) + lambda: idx / 1) tm.assertRaisesRegexp(TypeError, div_err, - lambda : 1 / idx) + lambda: 1 / idx) tm.assertRaisesRegexp(TypeError, "cannot perform __floordiv__", - lambda : idx // 1) + lambda: idx // 1) tm.assertRaisesRegexp(TypeError, "cannot perform __floordiv__", - lambda : 1 // idx) + lambda: 1 // idx) def test_logical_compat(self): idx = self.create_index() tm.assertRaisesRegexp(TypeError, 'cannot perform all', - lambda : idx.all()) + lambda: idx.all()) tm.assertRaisesRegexp(TypeError, 'cannot perform any', - lambda : idx.any()) + lambda: idx.any()) def test_boolean_context_compat(self): @@ -467,6 +470,10 @@ def test_delete_base(self): if not len(idx): continue + if isinstance(idx, RangeIndex): + # tested in class + continue + expected = idx[1:] result = idx.delete(0) self.assertTrue(result.equals(expected)) @@ -673,18 +680,19 @@ class TestIndex(Base, tm.TestCase): def setUp(self): self.indices = dict( - unicodeIndex = tm.makeUnicodeIndex(100), - strIndex = tm.makeStringIndex(100), - dateIndex = tm.makeDateIndex(100), - periodIndex = tm.makePeriodIndex(100), - tdIndex = tm.makeTimedeltaIndex(100), - intIndex = tm.makeIntIndex(100), - floatIndex = tm.makeFloatIndex(100), - boolIndex = Index([True,False]), - catIndex = tm.makeCategoricalIndex(100), - empty = Index([]), - tuples = MultiIndex.from_tuples(lzip(['foo', 'bar', 'baz'], - [1, 2, 3])) + unicodeIndex=tm.makeUnicodeIndex(100), + strIndex=tm.makeStringIndex(100), + dateIndex=tm.makeDateIndex(100), + periodIndex=tm.makePeriodIndex(100), + tdIndex=tm.makeTimedeltaIndex(100), + intIndex=tm.makeIntIndex(100), + rangeIndex=tm.makeIntIndex(100), + floatIndex=tm.makeFloatIndex(100), + boolIndex=Index([True, False]), + catIndex=tm.makeCategoricalIndex(100), + empty=Index([]), + tuples=MultiIndex.from_tuples(lzip(['foo', 'bar', 'baz'], + [1, 2, 3])) ) self.setup_indices() @@ -1065,7 +1073,6 @@ def test_empty_fancy(self): # be tested separately. for idx in [self.strIndex, self.intIndex, self.floatIndex]: empty_idx = idx.__class__([]) - values = idx.values self.assertTrue(idx[[]].identical(empty_idx)) self.assertTrue(idx[empty_iarr].identical(empty_idx)) @@ -2382,18 +2389,18 @@ def test_repr_roundtrip(self): ci = CategoricalIndex(['a', 'b'], categories=['a', 'b'], ordered=True) str(ci) - tm.assert_index_equal(eval(repr(ci)),ci,exact=True) + tm.assert_index_equal(eval(repr(ci)), ci, exact=True) # formatting - if compat.PY3: + if PY3: str(ci) else: compat.text_type(ci) # long format # this is not reprable - ci = CategoricalIndex(np.random.randint(0,5,size=100)) - if compat.PY3: + ci = CategoricalIndex(np.random.randint(0, 5, size=100)) + if PY3: str(ci) else: compat.text_type(ci) @@ -2636,7 +2643,8 @@ def test_fillna_categorical(self): self.assert_index_equal(idx.fillna(1.0), exp) # fill by value not in categories raises ValueError - with tm.assertRaisesRegexp(ValueError, 'fill value must be in categories'): + with tm.assertRaisesRegexp(ValueError, + 'fill value must be in categories'): idx.fillna(2.0) @@ -2644,42 +2652,56 @@ class Numeric(Base): def test_numeric_compat(self): - idx = self._holder(np.arange(5,dtype='int64')) - didx = self._holder(np.arange(5,dtype='int64')**2 - ) + idx = self.create_index() + didx = idx * idx + result = idx * 1 tm.assert_index_equal(result, idx) result = 1 * idx tm.assert_index_equal(result, idx) - result = idx * idx - tm.assert_index_equal(result, didx) + # in general not true for RangeIndex + if not isinstance(idx, RangeIndex): + result = idx * idx + tm.assert_index_equal(result, idx ** 2) + # truediv under PY3 result = idx / 1 - tm.assert_index_equal(result, idx) + expected = idx + if PY3: + expected = expected.astype('float64') + tm.assert_index_equal(result, expected) + + result = idx / 2 + if PY3: + expected = expected.astype('float64') + expected = Index(idx.values / 2) + tm.assert_index_equal(result, expected) result = idx // 1 tm.assert_index_equal(result, idx) - result = idx * np.array(5,dtype='int64') - tm.assert_index_equal(result, self._holder(np.arange(5,dtype='int64')*5)) + result = idx * np.array(5, dtype='int64') + tm.assert_index_equal(result, idx * 5) - result = idx * np.arange(5,dtype='int64') + result = idx * np.arange(5, dtype='int64') tm.assert_index_equal(result, didx) - result = idx * Series(np.arange(5,dtype='int64')) + result = idx * Series(np.arange(5, dtype='int64')) tm.assert_index_equal(result, didx) - result = idx * Series(np.arange(5,dtype='float64')+0.1) - tm.assert_index_equal(result, - Float64Index(np.arange(5,dtype='float64')*(np.arange(5,dtype='float64')+0.1))) + result = idx * Series(np.arange(5, dtype='float64') + 0.1) + expected = Float64Index(np.arange(5, dtype='float64') * ( + np.arange(5, dtype='float64') + 0.1)) + tm.assert_index_equal(result, expected) # invalid - self.assertRaises(TypeError, lambda : idx * date_range('20130101',periods=5)) - self.assertRaises(ValueError, lambda : idx * self._holder(np.arange(3))) - self.assertRaises(ValueError, lambda : idx * np.array([1,2])) - + self.assertRaises(TypeError, lambda: idx * date_range('20130101', + periods=5) + ) + self.assertRaises(ValueError, lambda: idx * idx[0:3]) + self.assertRaises(ValueError, lambda: idx * np.array([1, 2])) def test_explicit_conversions(self): @@ -2942,11 +2964,11 @@ def test_fillna_float64(self): self.assert_index_equal(idx.fillna(0.1), exp) # downcast - exp = Int64Index([1, 2, 3], name='x') + exp = Float64Index([1.0, 2.0, 3.0], name='x') self.assert_index_equal(idx.fillna(2), exp) # object - exp = Index([1, 'obj', 3], name='x') + exp = Index([1.0, 'obj', 3.0], name='x') self.assert_index_equal(idx.fillna('obj'), exp) @@ -3358,7 +3380,6 @@ def test_take_preserve_name(self): self.assertEqual(index.name, taken.name) def test_int_name_format(self): - from pandas import Series, DataFrame index = Index(['a', 'b', 'c'], name=0) s = Series(lrange(3), index) df = DataFrame(lrange(3), index=index) @@ -3382,14 +3403,14 @@ def test_repr_roundtrip(self): def test_unicode_string_with_unicode(self): idx = Index(lrange(1000)) - if compat.PY3: + if PY3: str(idx) else: compat.text_type(idx) def test_bytestring_with_unicode(self): idx = Index(lrange(1000)) - if compat.PY3: + if PY3: bytes(idx) else: str(idx) @@ -3399,40 +3420,791 @@ def test_slice_keep_name(self): self.assertEqual(idx.name, idx[1:].name) def test_ufunc_coercions(self): - idx = pd.Int64Index([1, 2, 3, 4, 5], name='x') + idx = Int64Index([1, 2, 3, 4, 5], name='x') result = np.sqrt(idx) tm.assertIsInstance(result, Float64Index) - exp = pd.Float64Index(np.sqrt(np.array([1, 2, 3, 4, 5])), name='x') + exp = Float64Index(np.sqrt(np.array([1, 2, 3, 4, 5])), name='x') tm.assert_index_equal(result, exp) result = np.divide(idx, 2.) tm.assertIsInstance(result, Float64Index) - exp = pd.Float64Index([0.5, 1., 1.5, 2., 2.5], name='x') + exp = Float64Index([0.5, 1., 1.5, 2., 2.5], name='x') tm.assert_index_equal(result, exp) # _evaluate_numeric_binop result = idx + 2. tm.assertIsInstance(result, Float64Index) - exp = pd.Float64Index([3., 4., 5., 6., 7.], name='x') + exp = Float64Index([3., 4., 5., 6., 7.], name='x') tm.assert_index_equal(result, exp) result = idx - 2. tm.assertIsInstance(result, Float64Index) - exp = pd.Float64Index([-1., 0., 1., 2., 3.], name='x') + exp = Float64Index([-1., 0., 1., 2., 3.], name='x') tm.assert_index_equal(result, exp) result = idx * 1. tm.assertIsInstance(result, Float64Index) - exp = pd.Float64Index([1., 2., 3., 4., 5.], name='x') + exp = Float64Index([1., 2., 3., 4., 5.], name='x') tm.assert_index_equal(result, exp) result = idx / 2. tm.assertIsInstance(result, Float64Index) - exp = pd.Float64Index([0.5, 1., 1.5, 2., 2.5], name='x') + exp = Float64Index([0.5, 1., 1.5, 2., 2.5], name='x') tm.assert_index_equal(result, exp) +class TestRangeIndex(Numeric, tm.TestCase): + _holder = RangeIndex + _compat_props = ['shape', 'ndim', 'size', 'itemsize'] + + def setUp(self): + self.indices = dict(index=RangeIndex(0, 20, 2, name='foo')) + self.setup_indices() + + def create_index(self): + return RangeIndex(5) + + def test_binops(self): + ops = [operator.add, operator.sub, operator.mul, + operator.floordiv, operator.truediv, pow] + scalars = [-1, 1, 2] + idxs = [RangeIndex(0, 10, 1), + RangeIndex(0, 20, 2), + RangeIndex(-10, 10, 2), + RangeIndex(5, -5, -1)] + for op in ops: + for a, b in combinations(idxs, 2): + result = op(a, b) + expected = op(Int64Index(a), Int64Index(b)) + tm.assert_index_equal(result, expected) + for idx in idxs: + for scalar in scalars: + result = op(idx, scalar) + expected = op(Int64Index(idx), scalar) + tm.assert_index_equal(result, expected) + + def test_too_many_names(self): + def testit(): + self.index.names = ["roger", "harold"] + assertRaisesRegexp(ValueError, "^Length", testit) + + def test_constructor(self): + index = RangeIndex(5) + expected = np.arange(5, dtype=np.int64) + self.assertIsInstance(index, RangeIndex) + self.assertEqual(index._start, 0) + self.assertEqual(index._stop, 5) + self.assertEqual(index._step, 1) + self.assertEqual(index.name, None) + tm.assert_index_equal(Index(expected), index) + + index = RangeIndex(1, 5) + expected = np.arange(1, 5, dtype=np.int64) + self.assertIsInstance(index, RangeIndex) + self.assertEqual(index._start, 1) + tm.assert_index_equal(Index(expected), index) + + index = RangeIndex(1, 5, 2) + expected = np.arange(1, 5, 2, dtype=np.int64) + self.assertIsInstance(index, RangeIndex) + self.assertEqual(index._step, 2) + tm.assert_index_equal(Index(expected), index) + + index = RangeIndex() + expected = np.empty(0, dtype=np.int64) + self.assertIsInstance(index, RangeIndex) + self.assertEqual(index._start, 0) + self.assertEqual(index._stop, 0) + self.assertEqual(index._step, 1) + tm.assert_index_equal(Index(expected), index) + + index = RangeIndex(name='Foo') + self.assertIsInstance(index, RangeIndex) + self.assertEqual(index.name, 'Foo') + + # we don't allow on a bare Index + self.assertRaises(TypeError, lambda: Index(0, 1000)) + + # invalid args + for i in [Index(['a', 'b']), + Series(['a', 'b']), + np.array(['a', 'b']), + [], + 'foo', + datetime(2000, 1, 1, 0, 0), + np.arange(0, 10)]: + self.assertRaises(TypeError, lambda: RangeIndex(i)) + + def test_constructor_same(self): + + # pass thru w and w/o copy + index = RangeIndex(1, 5, 2) + result = RangeIndex(index, copy=False) + self.assertTrue(result.identical(index)) + + result = RangeIndex(index, copy=True) + self.assertTrue(result.equals(index)) + + result = RangeIndex(index) + self.assertTrue(result.equals(index)) + + self.assertRaises(TypeError, + lambda: RangeIndex(index, dtype='float64')) + + def test_constructor_range(self): + + self.assertRaises(TypeError, lambda: RangeIndex(range(1, 5, 2))) + + result = RangeIndex.from_range(range(1, 5, 2)) + expected = RangeIndex(1, 5, 2) + self.assertTrue(result.equals(expected)) + + result = RangeIndex.from_range(range(5, 6)) + expected = RangeIndex(5, 6, 1) + self.assertTrue(result.equals(expected)) + + # an invalid range + result = RangeIndex.from_range(range(5, 1)) + expected = RangeIndex(0, 0, 1) + self.assertTrue(result.equals(expected)) + + result = RangeIndex.from_range(range(5)) + expected = RangeIndex(0, 5, 1) + self.assertTrue(result.equals(expected)) + + result = Index(range(1, 5, 2)) + expected = RangeIndex(1, 5, 2) + self.assertTrue(result.equals(expected)) + + self.assertRaises(TypeError, + lambda: Index(range(1, 5, 2), dtype='float64')) + + def test_numeric_compat2(self): + # validate that we are handling the RangeIndex overrides to numeric ops + # and returning RangeIndex where possible + + idx = RangeIndex(0, 10, 2) + + result = idx * 2 + expected = RangeIndex(0, 20, 4) + self.assertTrue(result.equals(expected)) + + result = idx + 2 + expected = RangeIndex(2, 12, 2) + self.assertTrue(result.equals(expected)) + + result = idx - 2 + expected = RangeIndex(-2, 8, 2) + self.assertTrue(result.equals(expected)) + + # truediv under PY3 + result = idx / 2 + if PY3: + expected = RangeIndex(0, 5, 1) + else: + expected = RangeIndex(0, 5, 1).astype('float64') + self.assertTrue(result.equals(expected)) + + result = idx / 4 + expected = RangeIndex(0, 10, 2).values / 4 + self.assertTrue(result.equals(expected)) + + result = idx // 1 + expected = idx._int64index // 1 + tm.assert_index_equal(result, expected, exact=True) + + # __mul__ + result = idx * idx + expected = Index(idx.values * idx.values) + tm.assert_index_equal(result, expected, exact=True) + + # __pow__ + idx = RangeIndex(0, 1000, 2) + result = idx ** 2 + expected = idx._int64index ** 2 + tm.assert_index_equal(Index(result.values), expected, exact=True) + + # __floordiv__ + idx = RangeIndex(0, 1000, 2) + result = idx // 2 + expected = idx._int64index // 2 + tm.assert_index_equal(result, expected, exact=True) + + idx = RangeIndex(0, 1000, 1) + result = idx // 2 + expected = idx._int64index // 2 + tm.assert_index_equal(result, expected, exact=True) + + def test_constructor_corner(self): + arr = np.array([1, 2, 3, 4], dtype=object) + index = RangeIndex(1, 5) + self.assertEqual(index.values.dtype, np.int64) + self.assertTrue(index.equals(arr)) + + # non-int raise Exception + self.assertRaises(TypeError, RangeIndex, '1', '10', '1') + self.assertRaises(TypeError, RangeIndex, 1.1, 10.2, 1.3) + + # invalid passed type + self.assertRaises(TypeError, + lambda: RangeIndex(1, 5, dtype='float64')) + + def test_copy(self): + i = RangeIndex(5, name='Foo') + i_copy = i.copy() + self.assertTrue(i_copy is not i) + self.assertTrue(i_copy.identical(i)) + self.assertEqual(i_copy._start, 0) + self.assertEqual(i_copy._stop, 5) + self.assertEqual(i_copy._step, 1) + self.assertEqual(i_copy.name, 'Foo') + + def test_repr(self): + i = RangeIndex(5, name='Foo') + result = repr(i) + if PY3: + expected = "RangeIndex(start=0, stop=5, step=1, name='Foo')" + else: + expected = "RangeIndex(start=0, stop=5, step=1, name=u'Foo')" + self.assertTrue(result, expected) + + result = eval(result) + self.assertTrue(result.equals(i)) + + i = RangeIndex(5, 0, -1) + result = repr(i) + expected = "RangeIndex(start=5, stop=0, step=-1)" + self.assertEqual(result, expected) + + result = eval(result) + self.assertTrue(result.equals(i)) + + def test_insert(self): + + idx = RangeIndex(5, name='Foo') + result = idx[1:4] + + # test 0th element + self.assertTrue(idx[0:4].equals( + result.insert(0, idx[0]))) + + def test_delete(self): + + idx = RangeIndex(5, name='Foo') + expected = idx[1:].astype(int) + result = idx.delete(0) + self.assertTrue(result.equals(expected)) + self.assertEqual(result.name, expected.name) + + expected = idx[:-1].astype(int) + result = idx.delete(-1) + self.assertTrue(result.equals(expected)) + self.assertEqual(result.name, expected.name) + + with tm.assertRaises((IndexError, ValueError)): + # either depending on numpy version + result = idx.delete(len(idx)) + + def test_view(self): + super(TestRangeIndex, self).test_view() + + i = RangeIndex(name='Foo') + i_view = i.view() + self.assertEqual(i_view.name, 'Foo') + + i_view = i.view('i8') + tm.assert_numpy_array_equal(i, i_view) + + i_view = i.view(RangeIndex) + tm.assert_index_equal(i, i_view) + + def test_dtype(self): + self.assertEqual(self.index.dtype, np.int64) + + def test_is_monotonic(self): + self.assertTrue(self.index.is_monotonic) + self.assertTrue(self.index.is_monotonic_increasing) + self.assertFalse(self.index.is_monotonic_decreasing) + + index = RangeIndex(4, 0, -1) + self.assertFalse(index.is_monotonic) + self.assertTrue(index.is_monotonic_decreasing) + + index = RangeIndex(1, 2) + self.assertTrue(index.is_monotonic) + self.assertTrue(index.is_monotonic_increasing) + self.assertTrue(index.is_monotonic_decreasing) + + def test_equals(self): + + equiv_pairs = [(RangeIndex(0, 9, 2), RangeIndex(0, 10, 2)), + (RangeIndex(0), RangeIndex(1, -1, 3)), + (RangeIndex(1, 2, 3), RangeIndex(1, 3, 4)), + (RangeIndex(0, -9, -2), RangeIndex(0, -10, -2))] + for left, right in equiv_pairs: + self.assertTrue(left.equals(right)) + self.assertTrue(right.equals(left)) + + def test_logical_compat(self): + idx = self.create_index() + self.assertEqual(idx.all(), idx.values.all()) + self.assertEqual(idx.any(), idx.values.any()) + + def test_identical(self): + i = Index(self.index.copy()) + self.assertTrue(i.identical(self.index)) + + # we don't allow object dtype for RangeIndex + if isinstance(self.index, RangeIndex): + return + + same_values_different_type = Index(i, dtype=object) + self.assertFalse(i.identical(same_values_different_type)) + + i = self.index.copy(dtype=object) + i = i.rename('foo') + same_values = Index(i, dtype=object) + self.assertTrue(same_values.identical(self.index.copy(dtype=object))) + + self.assertFalse(i.identical(self.index)) + self.assertTrue(Index(same_values, name='foo', dtype=object + ).identical(i)) + + self.assertFalse( + self.index.copy(dtype=object) + .identical(self.index.copy(dtype='int64'))) + + def test_get_indexer(self): + target = RangeIndex(10) + indexer = self.index.get_indexer(target) + expected = np.array([0, -1, 1, -1, 2, -1, 3, -1, 4, -1]) + self.assert_numpy_array_equal(indexer, expected) + + def test_get_indexer_pad(self): + target = RangeIndex(10) + indexer = self.index.get_indexer(target, method='pad') + expected = np.array([0, 0, 1, 1, 2, 2, 3, 3, 4, 4]) + self.assert_numpy_array_equal(indexer, expected) + + def test_get_indexer_backfill(self): + target = RangeIndex(10) + indexer = self.index.get_indexer(target, method='backfill') + expected = np.array([0, 1, 1, 2, 2, 3, 3, 4, 4, 5]) + self.assert_numpy_array_equal(indexer, expected) + + def test_join_outer(self): + # join with Int64Index + other = Int64Index(np.arange(25, 14, -1)) + + res, lidx, ridx = self.index.join(other, how='outer', + return_indexers=True) + noidx_res = self.index.join(other, how='outer') + self.assertTrue(res.equals(noidx_res)) + + eres = Int64Index([0, 2, 4, 6, 8, 10, 12, 14, 15, 16, 17, 18, + 19, 20, 21, 22, 23, 24, 25]) + elidx = np.array([0, 1, 2, 3, 4, 5, 6, 7, -1, 8, -1, 9, + -1, -1, -1, -1, -1, -1, -1], dtype=np.int64) + eridx = np.array([-1, -1, -1, -1, -1, -1, -1, -1, 10, 9, 8, 7, 6, + 5, 4, 3, 2, 1, 0], dtype=np.int64) + + self.assertIsInstance(res, Int64Index) + self.assertFalse(isinstance(res, RangeIndex)) + self.assertTrue(res.equals(eres)) + self.assert_numpy_array_equal(lidx, elidx) + self.assert_numpy_array_equal(ridx, eridx) + + # join with RangeIndex + other = RangeIndex(25, 14, -1) + + res, lidx, ridx = self.index.join(other, how='outer', + return_indexers=True) + noidx_res = self.index.join(other, how='outer') + self.assertTrue(res.equals(noidx_res)) + + self.assertIsInstance(res, Int64Index) + self.assertFalse(isinstance(res, RangeIndex)) + self.assertTrue(res.equals(eres)) + self.assert_numpy_array_equal(lidx, elidx) + self.assert_numpy_array_equal(ridx, eridx) + + def test_join_inner(self): + # Join with non-RangeIndex + other = Int64Index(np.arange(25, 14, -1)) + + res, lidx, ridx = self.index.join(other, how='inner', + return_indexers=True) + + # no guarantee of sortedness, so sort for comparison purposes + ind = res.argsort() + res = res.take(ind) + lidx = lidx.take(ind) + ridx = ridx.take(ind) + + eres = Int64Index([16, 18]) + elidx = np.array([8, 9]) + eridx = np.array([9, 7]) + + self.assertIsInstance(res, Int64Index) + self.assertTrue(res.equals(eres)) + self.assert_numpy_array_equal(lidx, elidx) + self.assert_numpy_array_equal(ridx, eridx) + + # Join two RangeIndex + other = RangeIndex(25, 14, -1) + + res, lidx, ridx = self.index.join(other, how='inner', + return_indexers=True) + + self.assertIsInstance(res, RangeIndex) + self.assertTrue(res.equals(eres)) + self.assert_numpy_array_equal(lidx, elidx) + self.assert_numpy_array_equal(ridx, eridx) + + def test_join_left(self): + # Join with Int64Index + other = Int64Index(np.arange(25, 14, -1)) + + res, lidx, ridx = self.index.join(other, how='left', + return_indexers=True) + eres = self.index + eridx = np.array([-1, -1, -1, -1, -1, -1, -1, -1, 9, 7], + dtype=np.int64) + + self.assertIsInstance(res, RangeIndex) + self.assertTrue(res.equals(eres)) + self.assertIsNone(lidx) + self.assert_numpy_array_equal(ridx, eridx) + + # Join withRangeIndex + other = Int64Index(np.arange(25, 14, -1)) + + res, lidx, ridx = self.index.join(other, how='left', + return_indexers=True) + + self.assertIsInstance(res, RangeIndex) + self.assertTrue(res.equals(eres)) + self.assertIsNone(lidx) + self.assert_numpy_array_equal(ridx, eridx) + + def test_join_right(self): + # Join with Int64Index + other = Int64Index(np.arange(25, 14, -1)) + + res, lidx, ridx = self.index.join(other, how='right', + return_indexers=True) + eres = other + elidx = np.array([-1, -1, -1, -1, -1, -1, -1, 9, -1, 8, -1], + dtype=np.int64) + + self.assertIsInstance(other, Int64Index) + self.assertTrue(res.equals(eres)) + self.assert_numpy_array_equal(lidx, elidx) + self.assertIsNone(ridx) + + # Join withRangeIndex + other = RangeIndex(25, 14, -1) + + res, lidx, ridx = self.index.join(other, how='right', + return_indexers=True) + eres = other + + self.assertIsInstance(other, RangeIndex) + self.assertTrue(res.equals(eres)) + self.assert_numpy_array_equal(lidx, elidx) + self.assertIsNone(ridx) + + def test_join_non_int_index(self): + other = Index([3, 6, 7, 8, 10], dtype=object) + + outer = self.index.join(other, how='outer') + outer2 = other.join(self.index, how='outer') + expected = Index([0, 2, 3, 4, 6, 7, 8, 10, 12, 14, + 16, 18], dtype=object) + self.assertTrue(outer.equals(outer2)) + self.assertTrue(outer.equals(expected)) + + inner = self.index.join(other, how='inner') + inner2 = other.join(self.index, how='inner') + expected = Index([6, 8, 10], dtype=object) + self.assertTrue(inner.equals(inner2)) + self.assertTrue(inner.equals(expected)) + + left = self.index.join(other, how='left') + self.assertTrue(left.equals(self.index)) + + left2 = other.join(self.index, how='left') + self.assertTrue(left2.equals(other)) + + right = self.index.join(other, how='right') + self.assertTrue(right.equals(other)) + + right2 = other.join(self.index, how='right') + self.assertTrue(right2.equals(self.index)) + + def test_join_non_unique(self): + other = Index([4, 4, 3, 3]) + + res, lidx, ridx = self.index.join(other, return_indexers=True) + + eres = Int64Index([0, 2, 4, 4, 6, 8, 10, 12, 14, 16, 18]) + elidx = np.array([0, 1, 2, 2, 3, 4, 5, 6, 7, 8, 9], dtype=np.int64) + eridx = np.array([-1, -1, 0, 1, -1, -1, -1, -1, -1, -1, -1], + dtype=np.int64) + + self.assertTrue(res.equals(eres)) + self.assert_numpy_array_equal(lidx, elidx) + self.assert_numpy_array_equal(ridx, eridx) + + def test_join_self(self): + kinds = 'outer', 'inner', 'left', 'right' + for kind in kinds: + joined = self.index.join(self.index, how=kind) + self.assertIs(self.index, joined) + + def test_intersection(self): + # intersect with Int64Index + other = Index(np.arange(1, 6)) + result = self.index.intersection(other) + expected = np.sort(np.intersect1d(self.index.values, other.values)) + self.assert_numpy_array_equal(result, expected) + + result = other.intersection(self.index) + expected = np.sort(np.asarray(np.intersect1d(self.index.values, + other.values))) + self.assert_numpy_array_equal(result, expected) + + # intersect with increasing RangeIndex + other = RangeIndex(1, 6) + result = self.index.intersection(other) + expected = np.sort(np.intersect1d(self.index.values, other.values)) + self.assert_numpy_array_equal(result, expected) + + # intersect with decreasing RangeIndex + other = RangeIndex(5, 0, -1) + result = self.index.intersection(other) + expected = np.sort(np.intersect1d(self.index.values, other.values)) + self.assert_numpy_array_equal(result, expected) + + def test_intersect_str_dates(self): + dt_dates = [datetime(2012, 2, 9), datetime(2012, 2, 22)] + + i1 = Index(dt_dates, dtype=object) + i2 = Index(['aa'], dtype=object) + res = i2.intersection(i1) + + self.assertEqual(len(res), 0) + + def test_union_noncomparable(self): + from datetime import datetime, timedelta + # corner case, non-Int64Index + now = datetime.now() + other = Index([now + timedelta(i) for i in range(4)], dtype=object) + result = self.index.union(other) + expected = np.concatenate((self.index, other)) + self.assert_numpy_array_equal(result, expected) + + result = other.union(self.index) + expected = np.concatenate((other, self.index)) + self.assert_numpy_array_equal(result, expected) + + def test_nbytes(self): + + # memory savings vs int index + i = RangeIndex(0, 1000) + self.assertTrue(i.nbytes < i.astype(int).nbytes / 10) + + # constant memory usage + i2 = RangeIndex(0, 10) + self.assertEqual(i.nbytes, i2.nbytes) + + def test_cant_or_shouldnt_cast(self): + # can't + self.assertRaises(TypeError, RangeIndex, 'foo', 'bar', 'baz') + + # shouldn't + self.assertRaises(TypeError, RangeIndex, '0', '1', '2') + + def test_view_Index(self): + self.index.view(Index) + + def test_prevent_casting(self): + result = self.index.astype('O') + self.assertEqual(result.dtype, np.object_) + + def test_take_preserve_name(self): + index = RangeIndex(1, 5, name='foo') + taken = index.take([3, 0, 1]) + self.assertEqual(index.name, taken.name) + + def test_print_unicode_columns(self): + df = pd.DataFrame( + {u("\u05d0"): [1, 2, 3], "\u05d1": [4, 5, 6], "c": [7, 8, 9]}) + repr(df.columns) # should not raise UnicodeDecodeError + + def test_repr_roundtrip(self): + tm.assert_index_equal(eval(repr(self.index)), self.index) + + def test_slice_keep_name(self): + idx = RangeIndex(1, 2, name='asdf') + self.assertEqual(idx.name, idx[1:].name) + + def test_explicit_conversions(self): + + # GH 8608 + # add/sub are overriden explicity for Float/Int Index + idx = RangeIndex(5) + + # float conversions + arr = np.arange(5, dtype='int64') * 3.2 + expected = Float64Index(arr) + fidx = idx * 3.2 + tm.assert_index_equal(fidx, expected) + fidx = 3.2 * idx + tm.assert_index_equal(fidx, expected) + + # interops with numpy arrays + expected = Float64Index(arr) + a = np.zeros(5, dtype='float64') + result = fidx - a + tm.assert_index_equal(result, expected) + + expected = Float64Index(-arr) + a = np.zeros(5, dtype='float64') + result = a - fidx + tm.assert_index_equal(result, expected) + + def test_duplicates(self): + for ind in self.indices: + if not len(ind): + continue + idx = self.indices[ind] + self.assertTrue(idx.is_unique) + self.assertFalse(idx.has_duplicates) + + def test_ufunc_compat(self): + idx = RangeIndex(5) + result = np.sin(idx) + expected = Float64Index(np.sin(np.arange(5, dtype='int64'))) + tm.assert_index_equal(result, expected) + + def test_extended_gcd(self): + result = self.index._extended_gcd(6, 10) + self.assertEqual(result[0], result[1] * 6 + result[2] * 10) + self.assertEqual(2, result[0]) + + result = self.index._extended_gcd(10, 6) + self.assertEqual(2, result[1] * 10 + result[2] * 6) + self.assertEqual(2, result[0]) + + def test_min_fitting_element(self): + result = RangeIndex(0, 20, 2)._min_fitting_element(1) + self.assertEqual(2, result) + + result = RangeIndex(1, 6)._min_fitting_element(1) + self.assertEqual(1, result) + + result = RangeIndex(18, -2, -2)._min_fitting_element(1) + self.assertEqual(2, result) + + result = RangeIndex(5, 0, -1)._min_fitting_element(1) + self.assertEqual(1, result) + + def test_max_fitting_element(self): + result = RangeIndex(0, 20, 2)._max_fitting_element(17) + self.assertEqual(16, result) + + result = RangeIndex(1, 6)._max_fitting_element(4) + self.assertEqual(4, result) + + result = RangeIndex(18, -2, -2)._max_fitting_element(17) + self.assertEqual(16, result) + + result = RangeIndex(5, 0, -1)._max_fitting_element(4) + self.assertEqual(4, result) + + def test_pickle_compat_construction(self): + # RangeIndex() is a valid constructor + pass + + def test_slice_specialised(self): + + # scalar indexing + res = self.index[1] + expected = 2 + self.assertEqual(res, expected) + + res = self.index[-1] + expected = 18 + self.assertEqual(res, expected) + + # slicing + # slice value completion + index = self.index[:] + expected = self.index + self.assert_numpy_array_equal(index, expected) + + # positive slice values + index = self.index[7:10:2] + expected = np.array([14, 18]) + self.assert_numpy_array_equal(index, expected) + + # negative slice values + index = self.index[-1:-5:-2] + expected = np.array([18, 14]) + self.assert_numpy_array_equal(index, expected) + + # stop overshoot + index = self.index[2:100:4] + expected = np.array([4, 12]) + self.assert_numpy_array_equal(index, expected) + + # reverse + index = self.index[::-1] + expected = self.index.values[::-1] + self.assert_numpy_array_equal(index, expected) + + index = self.index[-8::-1] + expected = np.array([4, 2, 0]) + self.assert_numpy_array_equal(index, expected) + + index = self.index[-40::-1] + expected = np.array([]) + self.assert_numpy_array_equal(index, expected) + + index = self.index[40::-1] + expected = self.index.values[40::-1] + self.assert_numpy_array_equal(index, expected) + + index = self.index[10::-1] + expected = self.index.values[::-1] + self.assert_numpy_array_equal(index, expected) + + def test_len_specialised(self): + + # make sure that our len is the same as + # np.arange calc + + for step in np.arange(1, 6, 1): + + arr = np.arange(0, 5, step) + i = RangeIndex(0, 5, step) + self.assertEqual(len(i), len(arr)) + + i = RangeIndex(5, 0, step) + self.assertEqual(len(i), 0) + + for step in np.arange(-6, -1, 1): + + arr = np.arange(5, 0, step) + i = RangeIndex(5, 0, step) + self.assertEqual(len(i), len(arr)) + + i = RangeIndex(0, 5, step) + self.assertEqual(len(i), 0) + + class DatetimeLike(Base): def test_shift_identity(self): @@ -4122,24 +4894,25 @@ def test_numeric_compat(self): result = idx // 1 tm.assert_index_equal(result, idx) - result = idx * np.array(5,dtype='int64') - tm.assert_index_equal(result, self._holder(np.arange(5,dtype='int64')*5)) + result = idx * np.array(5, dtype='int64') + tm.assert_index_equal(result, + self._holder(np.arange(5, dtype='int64') * 5)) - result = idx * np.arange(5,dtype='int64') + result = idx * np.arange(5, dtype='int64') tm.assert_index_equal(result, didx) - result = idx * Series(np.arange(5,dtype='int64')) + result = idx * Series(np.arange(5, dtype='int64')) tm.assert_index_equal(result, didx) - result = idx * Series(np.arange(5,dtype='float64')+0.1) + result = idx * Series(np.arange(5, dtype='float64') + 0.1) tm.assert_index_equal(result, - Float64Index(np.arange(5,dtype='float64')*(np.arange(5,dtype='float64')+0.1))) - + self._holder(np.arange(5, dtype='float64') * ( + np.arange(5, dtype='float64') + 0.1))) # invalid - self.assertRaises(TypeError, lambda : idx * idx) - self.assertRaises(ValueError, lambda : idx * self._holder(np.arange(3))) - self.assertRaises(ValueError, lambda : idx * np.array([1,2])) + self.assertRaises(TypeError, lambda: idx * idx) + self.assertRaises(ValueError, lambda: idx * self._holder(np.arange(3))) + self.assertRaises(ValueError, lambda: idx * np.array([1, 2])) def test_pickle_compat_construction(self): pass @@ -4842,8 +5615,9 @@ def test_iter(self): self.assertEqual(result, expected) def test_legacy_pickle(self): - if compat.PY3: - raise nose.SkipTest("testing for legacy pickles not support on py3") + if PY3: + raise nose.SkipTest("testing for legacy pickles not " + "support on py3") path = tm.get_data_path('multiindex_v1.pickle') obj = pd.read_pickle(path) @@ -5926,10 +6700,11 @@ def test_repr_with_unicode_data(self): def test_repr_roundtrip(self): - mi = MultiIndex.from_product([list('ab'),range(3)],names=['first','second']) + mi = MultiIndex.from_product([list('ab'), range(3)], + names=['first', 'second']) str(mi) - if compat.PY3: + if PY3: tm.assert_index_equal(eval(repr(mi)), mi, exact=True) else: result = eval(repr(mi)) @@ -5943,16 +6718,17 @@ def test_repr_roundtrip(self): tm.assert_index_equal(result, mi_u, exact=True) # formatting - if compat.PY3: + if PY3: str(mi) else: compat.text_type(mi) # long format - mi = MultiIndex.from_product([list('abcdefg'),range(10)],names=['first','second']) + mi = MultiIndex.from_product([list('abcdefg'), range(10)], + names=['first', 'second']) result = str(mi) - if compat.PY3: + if PY3: tm.assert_index_equal(eval(repr(mi)), mi, exact=True) else: result = eval(repr(mi)) @@ -5973,7 +6749,7 @@ def test_unicode_string_with_unicode(self): d = {"a": [u("\u05d0"), 2, 3], "b": [4, 5, 6], "c": [7, 8, 9]} idx = pd.DataFrame(d).set_index(["a", "b"]).index - if compat.PY3: + if PY3: str(idx) else: compat.text_type(idx) @@ -5982,7 +6758,7 @@ def test_bytestring_with_unicode(self): d = {"a": [u("\u05d0"), 2, 3], "b": [4, 5, 6], "c": [7, 8, 9]} idx = pd.DataFrame(d).set_index(["a", "b"]).index - if compat.PY3: + if PY3: bytes(idx) else: str(idx) diff --git a/pandas/tests/test_indexing.py b/pandas/tests/test_indexing.py index c6d80a08ad61a..5c3e4c01a965a 100644 --- a/pandas/tests/test_indexing.py +++ b/pandas/tests/test_indexing.py @@ -4352,25 +4352,29 @@ def check_invalid(index, loc=None, iloc=None, ix=None, getitem=None): # related 236/4850 # trying to access with a float index - s = Series(np.arange(len(index)),index=index) + s = Series(np.arange(len(index)), index=index) if iloc is None: iloc = TypeError - self.assertRaises(iloc, lambda : s.iloc[3.5]) + self.assertRaises(iloc, lambda: s.iloc[3.5]) if loc is None: loc = TypeError - self.assertRaises(loc, lambda : s.loc[3.5]) + self.assertRaises(loc, lambda: s.loc[3.5]) if ix is None: ix = TypeError - self.assertRaises(ix, lambda : s.ix[3.5]) + self.assertRaises(ix, lambda: s.ix[3.5]) if getitem is None: getitem = TypeError - self.assertRaises(getitem, lambda : s[3.5]) + self.assertRaises(getitem, lambda: s[3.5]) - for index in [ tm.makeStringIndex, tm.makeUnicodeIndex, tm.makeIntIndex, - tm.makeDateIndex, tm.makePeriodIndex ]: - check_invalid(index()) - check_invalid(Index(np.arange(5) * 2.5),loc=KeyError, ix=KeyError, getitem=KeyError) + for index in [tm.makeStringIndex, tm.makeUnicodeIndex, + tm.makeIntIndex, tm.makeRangeIndex, + tm.makeDateIndex, tm.makePeriodIndex]: + check_invalid(index()) + check_invalid(Index(np.arange(5) * 2.5), + loc=KeyError, + ix=KeyError, + getitem=KeyError) def check_index(index, error): index = index() @@ -4472,37 +4476,38 @@ def check_slicing_positional(index): ############ # IntIndex # ############ - index = tm.makeIntIndex() - s = Series(np.arange(len(index),dtype='int64')+10,index+5) + for index in [tm.makeIntIndex(), tm.makeRangeIndex()]: - # this is positional - result1 = s[2:5] - result4 = s.iloc[2:5] - assert_series_equal(result1, result4) + s = Series(np.arange(len(index), dtype='int64') + 10, index + 5) - # these are all label based - result2 = s.ix[2:5] - result3 = s.loc[2:5] - assert_series_equal(result2, result3) + # this is positional + result1 = s[2:5] + result4 = s.iloc[2:5] + assert_series_equal(result1, result4) + + # these are all label based + result2 = s.ix[2:5] + result3 = s.loc[2:5] + assert_series_equal(result2, result3) - # float slicers on an int index - expected = Series([11,12,13],index=[6,7,8]) - for method in [lambda x: x.loc, lambda x: x.ix]: - result = method(s)[6.0:8.5] - assert_series_equal(result, expected) + # float slicers on an int index + expected = Series([11, 12, 13], index=[6, 7, 8]) + for method in [lambda x: x.loc, lambda x: x.ix]: + result = method(s)[6.0:8.5] + assert_series_equal(result, expected) - result = method(s)[5.5:8.5] - assert_series_equal(result, expected) + result = method(s)[5.5:8.5] + assert_series_equal(result, expected) - result = method(s)[5.5:8.0] - assert_series_equal(result, expected) + result = method(s)[5.5:8.0] + assert_series_equal(result, expected) - # make all float slicing fail for [] with an int index - self.assertRaises(TypeError, lambda : s[6.0:8]) - self.assertRaises(TypeError, lambda : s[6.0:8.0]) - self.assertRaises(TypeError, lambda : s[6:8.0]) + # make all float slicing fail for [] with an int index + self.assertRaises(TypeError, lambda: s[6.0:8]) + self.assertRaises(TypeError, lambda: s[6.0:8.0]) + self.assertRaises(TypeError, lambda: s[6:8.0]) - check_iloc_compat(s) + check_iloc_compat(s) ############## # FloatIndex # @@ -4658,19 +4663,20 @@ def f(): self.assertRaises(FutureWarning, f) # slices - for index in [ tm.makeIntIndex, tm.makeFloatIndex, - tm.makeStringIndex, tm.makeUnicodeIndex, - tm.makeDateIndex, tm.makePeriodIndex ]: + for index in [tm.makeIntIndex, tm.makeRangeIndex, tm.makeFloatIndex, + tm.makeStringIndex, tm.makeUnicodeIndex, + tm.makeDateIndex, tm.makePeriodIndex]: index = index(5) - for s in [ Series(range(5),index=index), DataFrame(np.random.randn(5,2),index=index) ]: + for s in [Series(range(5), index=index), + DataFrame(np.random.randn(5, 2), index=index)]: # getitem - self.assertRaises(FutureWarning, lambda : + self.assertRaises(FutureWarning, lambda: s.iloc[3.0:4]) - self.assertRaises(FutureWarning, lambda : + self.assertRaises(FutureWarning, lambda: s.iloc[3.0:4.0]) - self.assertRaises(FutureWarning, lambda : + self.assertRaises(FutureWarning, lambda: s.iloc[3:4.0]) # setitem diff --git a/pandas/tests/test_series.py b/pandas/tests/test_series.py index d37ac530d02e8..a2b1a84e78f22 100644 --- a/pandas/tests/test_series.py +++ b/pandas/tests/test_series.py @@ -826,6 +826,9 @@ def test_constructor(self): def test_constructor_empty(self): empty = Series() empty2 = Series([]) + + # the are Index() and RangeIndex() which don't compare type equal + # but are just .equals assert_series_equal(empty, empty2, check_index_type=False) empty = Series(index=lrange(10)) @@ -1226,7 +1229,7 @@ def test_constructor_dict(self): def test_constructor_dict_multiindex(self): check = lambda result, expected: tm.assert_series_equal( - result, expected, check_dtype=True, check_index_type=True, + result, expected, check_dtype=True, check_series_type=True) d = {('a', 'a'): 0., ('b', 'a'): 1., ('b', 'c'): 2.} _d = sorted(d.items()) @@ -7418,6 +7421,7 @@ def test_reindex_nan(self): assert_series_equal(ts.reindex(i), ts.iloc[j]) ts.index = ts.index.astype('object') + # reindex coerces index.dtype to float, loc/iloc doesn't assert_series_equal(ts.reindex(i), ts.iloc[j], check_index_type=False) diff --git a/pandas/tests/test_strings.py b/pandas/tests/test_strings.py index 0013a6579718a..269d272525ce6 100644 --- a/pandas/tests/test_strings.py +++ b/pandas/tests/test_strings.py @@ -1324,7 +1324,7 @@ def test_split_no_pat_with_nonzero_n(self): s = Series(['split once', 'split once too!']) result = s.str.split(n=1) expected = Series({0: ['split', 'once'], 1: ['split', 'once too!']}) - tm.assert_series_equal(expected, result) + tm.assert_series_equal(expected, result, check_index_type=False) def test_split_to_dataframe(self): s = Series(['nosplit', 'alsonosplit']) @@ -1393,7 +1393,7 @@ def test_split_to_dataframe_expand(self): def test_split_to_multiindex_expand(self): idx = Index(['nosplit', 'alsonosplit']) result = idx.str.split('_', expand=True) - exp = Index([np.array(['nosplit']), np.array(['alsonosplit'])]) + exp = idx tm.assert_index_equal(result, exp) self.assertEqual(result.nlevels, 1) @@ -1446,7 +1446,7 @@ def test_rsplit_to_dataframe_expand(self): def test_rsplit_to_multiindex_expand(self): idx = Index(['nosplit', 'alsonosplit']) result = idx.str.rsplit('_', expand=True) - exp = Index([np.array(['nosplit']), np.array(['alsonosplit'])]) + exp = idx tm.assert_index_equal(result, exp) self.assertEqual(result.nlevels, 1) diff --git a/pandas/tests/test_testing.py b/pandas/tests/test_testing.py index 13c0b6a08f6e7..58c4285b8394e 100644 --- a/pandas/tests/test_testing.py +++ b/pandas/tests/test_testing.py @@ -283,9 +283,8 @@ def test_index_equal_message(self): \\[right\\]: 2, MultiIndex\\(levels=\\[\\[u?'A', u?'B'\\], \\[1, 2, 3, 4\\]\\], labels=\\[\\[0, 0, 1, 1\\], \\[0, 1, 2, 3\\]\\]\\)""" idx1 = pd.Index([1, 2, 3]) - idx2 = pd.MultiIndex.from_tuples([('A', 1), ('A', 2), ('B', 3), ('B', 4)]) - with assertRaisesRegexp(AssertionError, expected): - assert_index_equal(idx1, idx2) + idx2 = pd.MultiIndex.from_tuples([('A', 1), ('A', 2), + ('B', 3), ('B', 4)]) with assertRaisesRegexp(AssertionError, expected): assert_index_equal(idx1, idx2, exact=False) @@ -471,8 +470,8 @@ def test_series_equal_message(self): expected = """Series are different Series length are different -\\[left\\]: 3, Int64Index\\(\\[0, 1, 2\\], dtype='int64'\\) -\\[right\\]: 4, Int64Index\\(\\[0, 1, 2, 3\\], dtype='int64'\\)""" +\\[left\\]: 3, RangeIndex\\(start=0, stop=3, step=1\\) +\\[right\\]: 4, RangeIndex\\(start=0, stop=4, step=1\\)""" with assertRaisesRegexp(AssertionError, expected): assert_series_equal(pd.Series([1, 2, 3]), pd.Series([1, 2, 3, 4])) @@ -526,12 +525,11 @@ def test_frame_equal_message(self): expected = """DataFrame are different DataFrame shape \\(number of rows\\) are different -\\[left\\]: 3, Int64Index\\(\\[0, 1, 2\\], dtype='int64'\\) -\\[right\\]: 4, Int64Index\\(\\[0, 1, 2, 3\\], dtype='int64'\\)""" +\\[left\\]: 3, RangeIndex\\(start=0, stop=3, step=1\\) +\\[right\\]: 4, RangeIndex\\(start=0, stop=4, step=1\\)""" with assertRaisesRegexp(AssertionError, expected): - assert_frame_equal(pd.DataFrame({'A':[1, 2, 3]}), - pd.DataFrame({'A':[1, 2, 3, 4]})) - + assert_frame_equal(pd.DataFrame({'A': [1, 2, 3]}), + pd.DataFrame({'A': [1, 2, 3, 4]})) expected = """DataFrame are different @@ -539,9 +537,8 @@ def test_frame_equal_message(self): \\[left\\]: 2, Index\\(\\[u?'A', u?'B'\\], dtype='object'\\) \\[right\\]: 1, Index\\(\\[u?'A'\\], dtype='object'\\)""" with assertRaisesRegexp(AssertionError, expected): - assert_frame_equal(pd.DataFrame({'A':[1, 2, 3], 'B':[4, 5, 6]}), - pd.DataFrame({'A':[1, 2, 3]})) - + assert_frame_equal(pd.DataFrame({'A': [1, 2, 3], 'B': [4, 5, 6]}), + pd.DataFrame({'A': [1, 2, 3]})) expected = """DataFrame\\.index are different @@ -549,10 +546,10 @@ def test_frame_equal_message(self): \\[left\\]: Index\\(\\[u?'a', u?'b', u?'c'\\], dtype='object'\\) \\[right\\]: Index\\(\\[u?'a', u?'b', u?'d'\\], dtype='object'\\)""" with assertRaisesRegexp(AssertionError, expected): - assert_frame_equal(pd.DataFrame({'A':[1, 2, 3], 'B':[4, 5, 6]}, - index=['a', 'b', 'c']), - pd.DataFrame({'A':[1, 2, 3], 'B':[4, 5, 6]}, - index=['a', 'b', 'd'])) + assert_frame_equal(pd.DataFrame({'A': [1, 2, 3], 'B': [4, 5, 6]}, + index=['a', 'b', 'c']), + pd.DataFrame({'A': [1, 2, 3], 'B': [4, 5, 6]}, + index=['a', 'b', 'd'])) expected = """DataFrame\\.columns are different @@ -560,11 +557,10 @@ def test_frame_equal_message(self): \\[left\\]: Index\\(\\[u?'A', u?'B'\\], dtype='object'\\) \\[right\\]: Index\\(\\[u?'A', u?'b'\\], dtype='object'\\)""" with assertRaisesRegexp(AssertionError, expected): - assert_frame_equal(pd.DataFrame({'A':[1, 2, 3], 'B':[4, 5, 6]}, - index=['a', 'b', 'c']), - pd.DataFrame({'A':[1, 2, 3], 'b':[4, 5, 6]}, - index=['a', 'b', 'c'])) - + assert_frame_equal(pd.DataFrame({'A': [1, 2, 3], 'B': [4, 5, 6]}, + index=['a', 'b', 'c']), + pd.DataFrame({'A': [1, 2, 3], 'b': [4, 5, 6]}, + index=['a', 'b', 'c'])) expected = """DataFrame\\.iloc\\[:, 1\\] are different diff --git a/pandas/tseries/tests/test_base.py b/pandas/tseries/tests/test_base.py index bf37bd4afe1da..2a1e59154f3d1 100644 --- a/pandas/tseries/tests/test_base.py +++ b/pandas/tseries/tests/test_base.py @@ -731,13 +731,13 @@ def test_ops_compat(self): # multiply for offset in offsets: - self.assertRaises(TypeError, lambda : rng * offset) + self.assertRaises(TypeError, lambda: rng * offset) # divide - expected = Int64Index((np.arange(10)+1)*12,name='foo') + expected = Int64Index((np.arange(10) + 1) * 12, name='foo') for offset in offsets: result = rng / offset - tm.assert_index_equal(result,expected) + tm.assert_index_equal(result, expected, exact=False) # divide with nats rng = TimedeltaIndex(['1 days', pd.NaT, '2 days'], name='foo') diff --git a/pandas/util/testing.py b/pandas/util/testing.py index 1c21863415c62..685d89fee53b5 100644 --- a/pandas/util/testing.py +++ b/pandas/util/testing.py @@ -36,8 +36,9 @@ from pandas.computation import expressions as expr -from pandas import (bdate_range, CategoricalIndex, DatetimeIndex, TimedeltaIndex, PeriodIndex, - Index, MultiIndex, Series, DataFrame, Panel, Panel4D) +from pandas import (bdate_range, CategoricalIndex, DatetimeIndex, + TimedeltaIndex, PeriodIndex, RangeIndex, Index, MultiIndex, + Series, DataFrame, Panel, Panel4D) from pandas.util.decorators import deprecate from pandas import _testing from pandas.io.common import urlopen @@ -599,19 +600,22 @@ def assert_equal(a, b, msg=""): ... AssertionError: 5.2 was really a dead parrot: 5.2 != 1.2 """ - assert a == b, "%s: %r != %r" % (msg.format(a,b), a, b) + assert a == b, "%s: %r != %r" % (msg.format(a, b), a, b) -def assert_index_equal(left, right, exact=False, check_names=True, - check_less_precise=False, check_exact=True, obj='Index'): +def assert_index_equal(left, right, exact='equiv', check_names=True, + check_less_precise=False, check_exact=True, + obj='Index'): """Check that left and right Index are equal. Parameters ---------- left : Index right : Index - exact : bool, default False - Whether to check the Index class, dtype and inferred_type are identical. + exact : bool / string {'equiv'}, default False + Whether to check the Index class, dtype and inferred_type + are identical. If 'equiv', then RangeIndex can be substitued for + Int64Index as well check_names : bool, default True Whether to check the names attribute. check_less_precise : bool, default False @@ -626,9 +630,19 @@ def assert_index_equal(left, right, exact=False, check_names=True, def _check_types(l, r, obj='Index'): if exact: - if type(l) != type(r): - msg = '{0} classes are different'.format(obj) - raise_assert_detail(obj, msg, l, r) + + if exact == 'equiv': + if type(l) != type(r): + # allow equivalence of Int64Index/RangeIndex + types = set([type(l).__name__, type(r).__name__]) + if len(types - set(['Int64Index', 'RangeIndex'])): + msg = '{0} classes are not equivalent'.format(obj) + raise_assert_detail(obj, msg, l, r) + else: + if type(l) != type(r): + msg = '{0} classes are different'.format(obj) + raise_assert_detail(obj, msg, l, r) + assert_attr_equal('dtype', l, r, obj=obj) # allow string-like to have different inferred_types @@ -642,7 +656,8 @@ def _get_ilevel_values(index, level): unique = index.levels[level] labels = index.labels[level] filled = take_1d(unique.values, labels, fill_value=unique._na_value) - values = unique._simple_new(filled, index.names[level], + values = unique._simple_new(filled, + name=index.names[level], freq=getattr(unique, 'freq', None), tz=getattr(unique, 'tz', None)) return values @@ -652,7 +667,7 @@ def _get_ilevel_values(index, level): assertIsInstance(right, Index, '[index] ') # class / dtype comparison - _check_types(left, right) + _check_types(left, right, obj=obj) # level comparison if left.nlevels != right.nlevels: @@ -876,7 +891,7 @@ def assert_numpy_array_equal(left, right, # This could be refactored to use the NDFrame.equals method def assert_series_equal(left, right, check_dtype=True, - check_index_type=True, + check_index_type='equiv', check_series_type=True, check_less_precise=False, check_names=True, @@ -892,8 +907,9 @@ def assert_series_equal(left, right, check_dtype=True, right : Series check_dtype : bool, default True Whether to check the Series dtype is identical. - check_index_type : bool, default False - Whether to check the Index class, dtype and inferred_type are identical. + check_index_type : bool / string {'equiv'}, default False + Whether to check the Index class, dtype and inferred_type + are identical. check_series_type : bool, default False Whether to check the Series class is identical. check_less_precise : bool, default False @@ -958,8 +974,8 @@ def assert_series_equal(left, right, check_dtype=True, # This could be refactored to use the NDFrame.equals method def assert_frame_equal(left, right, check_dtype=True, - check_index_type=True, - check_column_type=True, + check_index_type='equiv', + check_column_type='equiv', check_frame_type=True, check_less_precise=False, check_names=True, @@ -976,10 +992,12 @@ def assert_frame_equal(left, right, check_dtype=True, right : DataFrame check_dtype : bool, default True Whether to check the DataFrame dtype is identical. - check_index_type : bool, default False - Whether to check the Index class, dtype and inferred_type are identical. - check_column_type : bool, default False - Whether to check the columns class, dtype and inferred_type are identical. + check_index_type : bool / string {'equiv'}, default False + Whether to check the Index class, dtype and inferred_type + are identical. + check_column_type : bool / string {'equiv'}, default False + Whether to check the columns class, dtype and inferred_type + are identical. check_frame_type : bool, default False Whether to check the DataFrame class is identical. check_less_precise : bool, default False @@ -1106,6 +1124,7 @@ def assert_copy(iter1, iter2, **eql_kwargs): def getCols(k): return string.ascii_uppercase[:k] + def getArangeMat(): return np.arange(N * K).reshape((N, K)) @@ -1118,38 +1137,50 @@ def makeStringIndex(k=10, name=None): def makeUnicodeIndex(k=10, name=None): return Index(randu_array(nchars=10, size=k)) + def makeCategoricalIndex(k=10, n=3, name=None): """ make a length k index or n categories """ x = rands_array(nchars=4, size=n) - return CategoricalIndex(np.random.choice(x,k), name=name) + return CategoricalIndex(np.random.choice(x, k), name=name) + def makeBoolIndex(k=10, name=None): if k == 1: return Index([True], name=name) elif k == 2: - return Index([False,True], name=name) - return Index([False,True] + [False]*(k-2), name=name) + return Index([False, True], name=name) + return Index([False, True] + [False] * (k - 2), name=name) + def makeIntIndex(k=10, name=None): return Index(lrange(k), name=name) + +def makeRangeIndex(k=10, name=None): + return RangeIndex(0, k, 1, name=name) + + def makeFloatIndex(k=10, name=None): values = sorted(np.random.random_sample(k)) - np.random.random_sample(1) return Index(values * (10 ** np.random.randint(0, 9)), name=name) + def makeDateIndex(k=10, freq='B', name=None): dt = datetime(2000, 1, 1) dr = bdate_range(dt, periods=k, freq=freq, name=name) return DatetimeIndex(dr, name=name) + def makeTimedeltaIndex(k=10, freq='D', name=None): return TimedeltaIndex(start='1 day', periods=k, freq=freq, name=name) + def makePeriodIndex(k=10, name=None): dt = datetime(2000, 1, 1) dr = PeriodIndex(start=dt, periods=k, freq='B', name=name) return dr + def all_index_generator(k=10): """Generator which can be iterated over to get instances of all the various index classes. @@ -1165,6 +1196,7 @@ def all_index_generator(k=10): for make_index_func in all_make_index_funcs: yield make_index_func(k=k) + def all_timeseries_index_generator(k=10): """Generator which can be iterated over to get instances of all the classes which represent time-seires.