Skip to content

ENH: slicing with decreasing monotonic indexes #8680

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 2 commits into from
Nov 2, 2014
Merged
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 2 additions & 0 deletions doc/source/api.rst
Original file line number Diff line number Diff line change
@@ -1166,6 +1166,8 @@ Attributes

Index.values
Index.is_monotonic
Index.is_monotonic_increasing
Index.is_monotonic_decreasing
Index.is_unique
Index.dtype
Index.inferred_type
28 changes: 26 additions & 2 deletions doc/source/whatsnew/v0.15.1.txt
Original file line number Diff line number Diff line change
@@ -146,6 +146,29 @@ API changes

s.dt.hour

- support for slicing with monotonic decreasing indexes, even if ``start`` or ``stop`` is
not found in the index (:issue:`7860`):

.. ipython:: python

s = pd.Series(['a', 'b', 'c', 'd'], [4, 3, 2, 1])
s

previous behavior:

.. code-block:: python

In [8]: s.loc[3.5:1.5]
KeyError: 3.5

current behavior:

.. ipython:: python

s.loc[3.5:1.5]

- added Index properties `is_monotonic_increasing` and `is_monotonic_decreasing` (:issue:`8680`).

.. _whatsnew_0151.enhancements:

Enhancements
@@ -208,8 +231,9 @@ Bug Fixes
- Bug in ix/loc block splitting on setitem (manifests with integer-like dtypes, e.g. datetime64) (:issue:`8607`)




- Bug when doing label based indexing with integers not found in the index for
non-unique but monotonic indexes (:issue:`8680`).
- Bug when indexing a Float64Index with ``np.nan`` on numpy 1.7 (:issue:`8980`).



2 changes: 1 addition & 1 deletion pandas/core/generic.py
Original file line number Diff line number Diff line change
@@ -1461,7 +1461,7 @@ def xs(self, key, axis=0, level=None, copy=None, drop_level=True):
name=self.index[loc])

else:
result = self[loc]
result = self.iloc[loc]
Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This was surprising to encounter (needed to change it to fix some tests), but maybe it was there for a reason? I don't think there is any reason for loc here to do label based rather than integer indexing?

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

could be
I have tried to not touch xs recently. it seems s but fragile and multi slicing obviates the need for it anyhow

result.index = new_index

# this could be a view
43 changes: 28 additions & 15 deletions pandas/core/index.py
Original file line number Diff line number Diff line change
@@ -573,8 +573,22 @@ def _mpl_repr(self):

@property
def is_monotonic(self):
""" return if the index has monotonic (only equaly or increasing) values """
return self._engine.is_monotonic
""" alias for is_monotonic_increasing (deprecated) """
return self._engine.is_monotonic_increasing

@property
def is_monotonic_increasing(self):
""" return if the index is monotonic increasing (only equal or
increasing) values
"""
return self._engine.is_monotonic_increasing

@property
def is_monotonic_decreasing(self):
""" return if the index is monotonic decreasing (only equal or
decreasing values
"""
return self._engine.is_monotonic_decreasing

def is_lexsorted_for_tuple(self, tup):
return True
@@ -1988,16 +2002,12 @@ def _get_slice(starting_value, offset, search_side, slice_property,
slc += offset

except KeyError:
if self.is_monotonic:

# we are duplicated but non-unique
# so if we have an indexer then we are done
# else search for it (GH 7523)
if not is_unique and is_integer(search_value):
slc = search_value
else:
slc = self.searchsorted(search_value,
side=search_side)
if self.is_monotonic_increasing:
slc = self.searchsorted(search_value, side=search_side)
elif self.is_monotonic_decreasing:
search_side = 'right' if search_side == 'left' else 'left'
slc = len(self) - self[::-1].searchsorted(search_value,
side=search_side)
else:
raise
return slc
@@ -2431,10 +2441,13 @@ def __contains__(self, other):
def get_loc(self, key):
try:
if np.all(np.isnan(key)):
nan_idxs = self._nan_idxs
try:
return self._nan_idxs.item()
except ValueError:
return self._nan_idxs
return nan_idxs.item()
except (ValueError, IndexError):
# should only need to catch ValueError here but on numpy
# 1.7 .item() can raise IndexError when NaNs are present
return nan_idxs
except (TypeError, NotImplementedError):
pass
return super(Float64Index, self).get_loc(key)
39 changes: 25 additions & 14 deletions pandas/index.pyx
Original file line number Diff line number Diff line change
@@ -77,7 +77,7 @@ cdef class IndexEngine:
bint over_size_threshold

cdef:
bint unique, monotonic
bint unique, monotonic_inc, monotonic_dec
bint initialized, monotonic_check, unique_check

def __init__(self, vgetter, n):
@@ -89,7 +89,8 @@ cdef class IndexEngine:
self.monotonic_check = 0

self.unique = 0
self.monotonic = 0
self.monotonic_inc = 0
self.monotonic_dec = 0

def __contains__(self, object val):
self._ensure_mapping_populated()
@@ -134,7 +135,7 @@ cdef class IndexEngine:
if is_definitely_invalid_key(val):
raise TypeError

if self.over_size_threshold and self.is_monotonic:
if self.over_size_threshold and self.is_monotonic_increasing:
if not self.is_unique:
return self._get_loc_duplicates(val)
values = self._get_index_values()
@@ -158,7 +159,7 @@ cdef class IndexEngine:
cdef:
Py_ssize_t diff

if self.is_monotonic:
if self.is_monotonic_increasing:
values = self._get_index_values()
left = values.searchsorted(val, side='left')
right = values.searchsorted(val, side='right')
@@ -210,25 +211,35 @@ cdef class IndexEngine:

return self.unique == 1

property is_monotonic:
property is_monotonic_increasing:

def __get__(self):
if not self.monotonic_check:
self._do_monotonic_check()

return self.monotonic == 1
return self.monotonic_inc == 1

property is_monotonic_decreasing:

def __get__(self):
if not self.monotonic_check:
self._do_monotonic_check()

return self.monotonic_dec == 1

cdef inline _do_monotonic_check(self):
try:
values = self._get_index_values()
self.monotonic, unique = self._call_monotonic(values)
self.monotonic_inc, self.monotonic_dec, unique = \
self._call_monotonic(values)

if unique is not None:
self.unique = unique
self.unique_check = 1

except TypeError:
self.monotonic = 0
self.monotonic_inc = 0
self.monotonic_dec = 0
self.monotonic_check = 1

cdef _get_index_values(self):
@@ -345,7 +356,7 @@ cdef class Int64Engine(IndexEngine):
return _hash.Int64HashTable(n)

def _call_monotonic(self, values):
return algos.is_monotonic_int64(values)
return algos.is_monotonic_int64(values, timelike=False)

def get_pad_indexer(self, other, limit=None):
return algos.pad_int64(self._get_index_values(), other,
@@ -435,7 +446,7 @@ cdef class Float64Engine(IndexEngine):
return result

def _call_monotonic(self, values):
return algos.is_monotonic_float64(values)
return algos.is_monotonic_float64(values, timelike=False)

def get_pad_indexer(self, other, limit=None):
return algos.pad_float64(self._get_index_values(), other,
@@ -489,7 +500,7 @@ cdef class ObjectEngine(IndexEngine):
return _hash.PyObjectHashTable(n)

def _call_monotonic(self, values):
return algos.is_monotonic_object(values)
return algos.is_monotonic_object(values, timelike=False)

def get_pad_indexer(self, other, limit=None):
return algos.pad_object(self._get_index_values(), other,
@@ -506,7 +517,7 @@ cdef class DatetimeEngine(Int64Engine):
return 'M8[ns]'

def __contains__(self, object val):
if self.over_size_threshold and self.is_monotonic:
if self.over_size_threshold and self.is_monotonic_increasing:
if not self.is_unique:
return self._get_loc_duplicates(val)
values = self._get_index_values()
@@ -521,15 +532,15 @@ cdef class DatetimeEngine(Int64Engine):
return self.vgetter().view('i8')

def _call_monotonic(self, values):
return algos.is_monotonic_int64(values)
return algos.is_monotonic_int64(values, timelike=True)

cpdef get_loc(self, object val):
if is_definitely_invalid_key(val):
raise TypeError

# Welcome to the spaghetti factory

if self.over_size_threshold and self.is_monotonic:
if self.over_size_threshold and self.is_monotonic_increasing:
if not self.is_unique:
val = _to_i8(val)
return self._get_loc_duplicates(val)
32 changes: 26 additions & 6 deletions pandas/src/generate_code.py
Original file line number Diff line number Diff line change
@@ -539,31 +539,51 @@ def diff_2d_%(name)s(ndarray[%(c_type)s, ndim=2] arr,

is_monotonic_template = """@cython.boundscheck(False)
@cython.wraparound(False)
def is_monotonic_%(name)s(ndarray[%(c_type)s] arr):
def is_monotonic_%(name)s(ndarray[%(c_type)s] arr, bint timelike):
'''
Returns
-------
is_monotonic, is_unique
is_monotonic_inc, is_monotonic_dec, is_unique
'''
cdef:
Py_ssize_t i, n
%(c_type)s prev, cur
bint is_unique = 1
bint is_monotonic_inc = 1
bint is_monotonic_dec = 1

n = len(arr)

if n < 2:
return True, True
if n == 1:
if arr[0] != arr[0] or (timelike and arr[0] == iNaT):
# single value is NaN
return False, False, True
else:
return True, True, True
elif n < 2:
return True, True, True

if timelike and arr[0] == iNaT:
return False, False, None

prev = arr[0]
for i in range(1, n):
cur = arr[i]
if timelike and cur == iNaT:
return False, False, None
if cur < prev:
return False, None
is_monotonic_inc = 0
elif cur > prev:
is_monotonic_dec = 0
elif cur == prev:
is_unique = 0
else:
# cur or prev is NaN
return False, False, None
if not is_monotonic_inc and not is_monotonic_dec:
return False, False, None
prev = cur
return True, is_unique
return is_monotonic_inc, is_monotonic_dec, is_unique
"""

map_indices_template = """@cython.wraparound(False)
Loading