Skip to content

REF: delegate attrs for CategoricalIndex, IntervalIndex #30605

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 1 commit into from
Jan 2, 2020
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
74 changes: 33 additions & 41 deletions pandas/core/indexes/category.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,7 +10,7 @@
from pandas._typing import AnyArrayLike
import pandas.compat as compat
from pandas.compat.numpy import function as nv
from pandas.util._decorators import Appender, Substitution, cache_readonly
from pandas.util._decorators import Appender, cache_readonly

from pandas.core.dtypes.common import (
ensure_platform_int,
Expand All @@ -26,7 +26,6 @@
from pandas.core import accessor
from pandas.core.algorithms import take_1d
from pandas.core.arrays.categorical import Categorical, _recode_for_categories, contains
from pandas.core.base import _shared_docs
import pandas.core.common as com
import pandas.core.indexes.base as ibase
from pandas.core.indexes.base import Index, _index_shared_docs, maybe_extract_name
Expand All @@ -37,6 +36,12 @@
_index_doc_kwargs.update(dict(target_klass="CategoricalIndex"))


@accessor.delegate_names(
delegate=Categorical,
accessors=["codes", "categories", "ordered"],
typ="property",
overwrite=True,
)
@accessor.delegate_names(
delegate=Categorical,
accessors=[
Expand All @@ -50,6 +55,12 @@
"as_unordered",
"min",
"max",
"is_dtype_equal",
"tolist",
"_internal_get_values",
"_reverse_indexer",
"searchsorted",
"argsort",
],
typ="method",
overwrite=True,
Expand Down Expand Up @@ -147,6 +158,20 @@ class CategoricalIndex(Index, accessor.PandasDelegate):

_typ = "categoricalindex"

_raw_inherit = {
"argsort",
"_internal_get_values",
"tolist",
"codes",
"categories",
"ordered",
"_reverse_indexer",
"searchsorted",
}

codes: np.ndarray
categories: Index

@property
def _engine_type(self):
# self.codes can have dtype int8, int16, int32 or int64, so we need
Expand Down Expand Up @@ -370,29 +395,6 @@ def _wrap_setop_result(self, other, result):
name = get_op_result_name(self, other)
return self._shallow_copy(result, name=name)

def _internal_get_values(self):
# override base Index version to get the numpy array representation of
# the underlying Categorical
return self._data._internal_get_values()

def tolist(self):
return self._data.tolist()

@property
def codes(self):
return self._data.codes

@property
def categories(self):
return self._data.categories

@property
def ordered(self):
return self._data.ordered

def _reverse_indexer(self):
return self._data._reverse_indexer()

@Appender(_index_shared_docs["contains"] % _index_doc_kwargs)
def __contains__(self, key) -> bool:
# if key is a NaN, check if any NaN is in self.
Expand Down Expand Up @@ -429,9 +431,6 @@ def fillna(self, value, downcast=None):
self._assert_can_do_op(value)
return CategoricalIndex(self._data.fillna(value), name=self.name)

def argsort(self, *args, **kwargs):
return self.values.argsort(*args, **kwargs)

@cache_readonly
def _engine(self):
# we are going to look things up with the codes themselves.
Expand Down Expand Up @@ -539,11 +538,6 @@ def get_value(self, series: AnyArrayLike, key: Any):
# we might be a positional inexer
return super().get_value(series, key)

@Substitution(klass="CategoricalIndex")
@Appender(_shared_docs["searchsorted"])
def searchsorted(self, value, side="left", sorter=None):
return self._data.searchsorted(value, side=side, sorter=sorter)

@Appender(_index_shared_docs["where"])
def where(self, cond, other=None):
# TODO: Investigate an alternative implementation with
Expand Down Expand Up @@ -746,9 +740,6 @@ def take(self, indices, axis=0, allow_fill=True, fill_value=None, **kwargs):
)
return self._create_from_codes(taken)

def is_dtype_equal(self, other):
return self._data.is_dtype_equal(other)

take_nd = take

@Appender(_index_shared_docs["_maybe_cast_slice_bound"])
Expand Down Expand Up @@ -882,10 +873,6 @@ def _concat_same_dtype(self, to_concat, name):
result.name = name
return result

def _codes_for_groupby(self, sort, observed):
""" Return a Categorical adjusted for groupby """
return self.values._codes_for_groupby(sort, observed)

@classmethod
def _add_comparison_methods(cls):
""" add in comparison methods """
Expand All @@ -911,13 +898,18 @@ def _evaluate_compare(self, other):
cls.__le__ = _make_compare(operator.le)
cls.__ge__ = _make_compare(operator.ge)

def _delegate_property_get(self, name, *args, **kwargs):
""" method delegation to the ._values """
prop = getattr(self._values, name)
return prop # no wrapping for now

def _delegate_method(self, name, *args, **kwargs):
""" method delegation to the ._values """
method = getattr(self._values, name)
if "inplace" in kwargs:
raise ValueError("cannot use inplace with CategoricalIndex")
res = method(*args, **kwargs)
if is_scalar(res):
if is_scalar(res) or name in self._raw_inherit:
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

can you add a comment here on what is happening

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

is_scalar is the heavier check, can you put ‘name in self._raw_inherit’ as the first check.

return res
return CategoricalIndex(res, name=self.name)

Expand Down
137 changes: 39 additions & 98 deletions pandas/core/indexes/interval.py
Original file line number Diff line number Diff line change
Expand Up @@ -37,6 +37,7 @@
from pandas.core.dtypes.generic import ABCSeries
from pandas.core.dtypes.missing import isna

from pandas.core import accessor
from pandas.core.algorithms import take_1d
from pandas.core.arrays.interval import IntervalArray, _interval_shared_docs
import pandas.core.common as com
Expand Down Expand Up @@ -181,7 +182,28 @@ def func(intvidx_self, other, sort=False):
),
)
)
class IntervalIndex(IntervalMixin, Index):
@accessor.delegate_names(
delegate=IntervalArray,
accessors=[
"_ndarray_values",
"length",
"size",
"left",
"right",
"mid",
"closed",
"dtype",
],
typ="property",
overwrite=True,
)
@accessor.delegate_names(
delegate=IntervalArray,
accessors=["__array__", "overlaps", "contains"],
typ="method",
overwrite=True,
)
class IntervalIndex(IntervalMixin, Index, accessor.PandasDelegate):
_typ = "intervalindex"
_comparables = ["name"]
_attributes = ["name", "closed"]
Expand All @@ -192,6 +214,8 @@ class IntervalIndex(IntervalMixin, Index):
# Immutable, so we are able to cache computations like isna in '_mask'
_mask = None

_raw_inherit = {"_ndarray_values", "__array__", "overlaps", "contains"}
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

same as above, should we also add this attribute as an empty dict in Index?

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

id rather not have a dummy attr on Index, but im building up to implementing ExtensionIndex which will have it

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

see my comment below


# --------------------------------------------------------------------
# Constructors

Expand Down Expand Up @@ -388,30 +412,6 @@ def to_tuples(self, na_tuple=True):
def _multiindex(self):
return MultiIndex.from_arrays([self.left, self.right], names=["left", "right"])

@property
def left(self):
"""
Return the left endpoints of each Interval in the IntervalIndex as
an Index.
"""
return self._data._left

@property
def right(self):
"""
Return the right endpoints of each Interval in the IntervalIndex as
an Index.
"""
return self._data._right

@property
def closed(self):
"""
Whether the intervals are closed on the left-side, right-side, both or
neither.
"""
return self._data._closed

@Appender(
_interval_shared_docs["set_closed"]
% dict(
Expand All @@ -434,25 +434,8 @@ def closed(self):
)
)
def set_closed(self, closed):
if closed not in _VALID_CLOSED:
raise ValueError(f"invalid option for 'closed': {closed}")

# return self._shallow_copy(closed=closed)
array = self._data.set_closed(closed)
return self._simple_new(array, self.name)

@property
def length(self):
"""
Return an Index with entries denoting the length of each Interval in
the IntervalIndex.
"""
return self._data.length

@property
def size(self):
# Avoid materializing ndarray[Interval]
return self._data.size
return self._simple_new(array, self.name) # TODO: can we use _shallow_copy?

def __len__(self) -> int:
return len(self.left)
Expand All @@ -468,16 +451,6 @@ def values(self):
def _values(self):
return self._data

@cache_readonly
def _ndarray_values(self) -> np.ndarray:
return np.array(self._data)

def __array__(self, result=None):
"""
The array interface, return my values.
"""
return self._ndarray_values

def __array_wrap__(self, result, context=None):
# we don't want the superclass implementation
return result
Expand Down Expand Up @@ -506,13 +479,6 @@ def astype(self, dtype, copy=True):
return self._shallow_copy(new_values.left, new_values.right)
return super().astype(dtype, copy=copy)

@cache_readonly
def dtype(self):
"""
Return the dtype object of the underlying data.
"""
return self._data.dtype

@property
def inferred_type(self) -> str:
"""Return a string of the type inferred from the values"""
Expand Down Expand Up @@ -1177,44 +1143,6 @@ def equals(self, other) -> bool:
and self.closed == other.closed
)

@Appender(
_interval_shared_docs["contains"]
% dict(
klass="IntervalIndex",
examples=textwrap.dedent(
"""\
>>> intervals = pd.IntervalIndex.from_tuples([(0, 1), (1, 3), (2, 4)])
>>> intervals
IntervalIndex([(0, 1], (1, 3], (2, 4]],
closed='right',
dtype='interval[int64]')
>>> intervals.contains(0.5)
array([ True, False, False])
"""
),
)
)
def contains(self, other):
return self._data.contains(other)

@Appender(
_interval_shared_docs["overlaps"]
% dict(
klass="IntervalIndex",
examples=textwrap.dedent(
"""\
>>> intervals = pd.IntervalIndex.from_tuples([(0, 1), (1, 3), (2, 4)])
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

It looks like this means that we'll lose any IntervalIndex specific portions of docstrings? And the docstring will just be whatever the IntervalArray docstring is? I'm fine with this since the usage is basically identical between the two so I don't think it will cause any confusion. Will allow for some cleanup of the docstrings on the IntervalArray side but that can be done later.

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

It looks like this means that we'll lose any IntervalIndex specific portions of docstrings?

Yes

>>> intervals
IntervalIndex([(0, 1], (1, 3], (2, 4]],
closed='right',
dtype='interval[int64]')
"""
),
)
)
def overlaps(self, other):
return self._data.overlaps(other)

@Appender(_index_shared_docs["intersection"])
@SetopCheck(op_name="intersection")
def intersection(
Expand Down Expand Up @@ -1314,6 +1242,19 @@ def is_all_dates(self) -> bool:

# TODO: arithmetic operations

def _delegate_property_get(self, name, *args, **kwargs):
""" method delegation to the ._values """
prop = getattr(self._data, name)
return prop # no wrapping for now

def _delegate_method(self, name, *args, **kwargs):
""" method delegation to the ._data """
method = getattr(self._data, name)
res = method(*args, **kwargs)
if is_scalar(res) or name in self._raw_inherit:
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

comment.

can we not put these methods on Index?

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

You mean remove them from IntervalIndex? No idea. @jschendel?

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

no the delegation methods that you added could be on Index itself.

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

i see. yes, the point of this exercise is to build up to moving as much of it to to either ExtensionIndex or even Index as possible. Doing this on II/CI before refactoring is just to limit the scope.

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

its fine, but then will have to clean later

return res
return type(self)(res, name=self.name)


IntervalIndex._add_logical_methods_disabled()

Expand Down