Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
17 commits
Select commit Hold shift + click to select a range
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 4 additions & 0 deletions pandas/_libs/indexing.pyx
Original file line number Diff line number Diff line change
Expand Up @@ -17,4 +17,8 @@ cdef class _NDFrameIndexerBase:
ndim = self._ndim
if ndim is None:
ndim = self._ndim = self.obj.ndim
if ndim > 2:
msg = ("NDFrameIndexer does not support NDFrame objects with"
" ndim > 2")
raise ValueError(msg)
return ndim
88 changes: 6 additions & 82 deletions pandas/core/indexing.py
Original file line number Diff line number Diff line change
Expand Up @@ -11,7 +11,7 @@
from pandas.core.dtypes.common import (
ensure_platform_int, is_float, is_integer, is_integer_dtype, is_iterator,
is_list_like, is_numeric_dtype, is_scalar, is_sequence, is_sparse)
from pandas.core.dtypes.generic import ABCDataFrame, ABCPanel, ABCSeries
from pandas.core.dtypes.generic import ABCDataFrame, ABCSeries
from pandas.core.dtypes.missing import _infer_fill_value, isna

import pandas.core.common as com
Expand Down Expand Up @@ -450,10 +450,6 @@ def _setitem_with_indexer(self, indexer, value):
self.obj._maybe_update_cacher(clear=True)
return self.obj

# set using setitem (Panel and > dims)
elif self.ndim >= 3:
return self.obj.__setitem__(indexer, value)

# set
item_labels = self.obj._get_axis(info_axis)

Expand Down Expand Up @@ -642,9 +638,6 @@ def can_do_equal_len():
elif isinstance(value, ABCDataFrame):
value = self._align_frame(indexer, value)

if isinstance(value, ABCPanel):
value = self._align_panel(indexer, value)

# check for chained assignment
self.obj._check_is_chained_assignment_possible()

Expand Down Expand Up @@ -690,7 +683,6 @@ def ravel(i):
sum_aligners = sum(aligners)
single_aligner = sum_aligners == 1
is_frame = self.obj.ndim == 2
is_panel = self.obj.ndim >= 3
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I think we are going to need a new error if self.obj.ndim.ndim > 2 (e.g. a numpy array); need to check both for setting & getting (it *might be already there but not sure about tests)

obj = self.obj

# are we a single alignable value on a non-primary
Expand All @@ -702,11 +694,6 @@ def ravel(i):
if is_frame:
single_aligner = single_aligner and aligners[0]

# panel
elif is_panel:
single_aligner = (single_aligner and
(aligners[1] or aligners[2]))

# we have a frame, with multiple indexers on both axes; and a
# series, so need to broadcast (see GH5206)
if (sum_aligners == self.ndim and
Expand Down Expand Up @@ -738,38 +725,14 @@ def ravel(i):
return ser.reindex(new_ix)._values

# 2 dims
elif single_aligner and is_frame:
elif single_aligner:

# reindex along index
ax = self.obj.axes[1]
if ser.index.equals(ax) or not len(ax):
return ser._values.copy()
return ser.reindex(ax)._values

# >2 dims
elif single_aligner:

broadcast = []
for n, labels in enumerate(self.obj._get_plane_axes(i)):

# reindex along the matching dimensions
if len(labels & ser.index):
ser = ser.reindex(labels)
else:
broadcast.append((n, len(labels)))

# broadcast along other dims
ser = ser._values.copy()
for (axis, l) in broadcast:
shape = [-1] * (len(broadcast) + 1)
shape[axis] = l
ser = np.tile(ser, l).reshape(shape)

if self.obj.ndim == 3:
ser = ser.T

return ser

elif is_scalar(indexer):
ax = self.obj._get_axis(1)

Expand All @@ -782,7 +745,6 @@ def ravel(i):

def _align_frame(self, indexer, df):
is_frame = self.obj.ndim == 2
is_panel = self.obj.ndim >= 3

if isinstance(indexer, tuple):

Expand All @@ -802,21 +764,6 @@ def _align_frame(self, indexer, df):
else:
sindexers.append(i)

# panel
if is_panel:

# need to conform to the convention
# as we are not selecting on the items axis
# and we have a single indexer
# GH 7763
if len(sindexers) == 1 and sindexers[0] != 0:
df = df.T

if idx is None:
idx = df.index
if cols is None:
cols = df.columns

if idx is not None and cols is not None:

if df.index.equals(idx) and df.columns.equals(cols):
Expand All @@ -843,24 +790,8 @@ def _align_frame(self, indexer, df):
val = df.reindex(index=ax)._values
return val

elif is_scalar(indexer) and is_panel:
idx = self.obj.axes[1]
cols = self.obj.axes[2]

# by definition we are indexing on the 0th axis
# a passed in dataframe which is actually a transpose
# of what is needed
if idx.equals(df.index) and cols.equals(df.columns):
return df.copy()._values

return df.reindex(idx, columns=cols)._values

raise ValueError('Incompatible indexer with DataFrame')

def _align_panel(self, indexer, df):
raise NotImplementedError("cannot set using an indexer with a Panel "
"yet!")

def _getitem_tuple(self, tup):
try:
return self._getitem_lowerdim(tup)
Expand Down Expand Up @@ -1059,13 +990,6 @@ def _getitem_nested_tuple(self, tup):
# has the dim of the obj changed?
# GH 7199
if obj.ndim < current_ndim:

# GH 7516
# if had a 3 dim and are going to a 2d
# axes are reversed on a DataFrame
if i >= 1 and current_ndim == 3 and obj.ndim == 2:
obj = obj.T

axis -= 1

return obj
Expand Down Expand Up @@ -1562,8 +1486,8 @@ class _LocIndexer(_LocationIndexer):

- A boolean array of the same length as the axis being sliced,
e.g. ``[True, False, True]``.
- A ``callable`` function with one argument (the calling Series, DataFrame
or Panel) and that returns valid output for indexing (one of the above)
- A ``callable`` function with one argument (the calling Series or
DataFrame) and that returns valid output for indexing (one of the above)

See more at :ref:`Selection by Label <indexing.label>`

Expand Down Expand Up @@ -1931,8 +1855,8 @@ class _iLocIndexer(_LocationIndexer):
- A list or array of integers, e.g. ``[4, 3, 0]``.
- A slice object with ints, e.g. ``1:7``.
- A boolean array.
- A ``callable`` function with one argument (the calling Series, DataFrame
or Panel) and that returns valid output for indexing (one of the above).
- A ``callable`` function with one argument (the calling Series or
DataFrame) and that returns valid output for indexing (one of the above).
This is useful in method chains, when you don't have a reference to the
calling object, but would like to base your selection on some value.

Expand Down
113 changes: 113 additions & 0 deletions pandas/tests/indexing/test_indexing.py
Original file line number Diff line number Diff line change
Expand Up @@ -11,11 +11,14 @@

import pandas as pd
from pandas import DataFrame, Index, NaT, Series
from pandas.core.generic import NDFrame
from pandas.core.indexing import (
_maybe_numeric_slice, _non_reducing_slice, validate_indices)
from pandas.tests.indexing.common import Base, _mklbl
import pandas.util.testing as tm

ignore_ix = pytest.mark.filterwarnings("ignore:\\n.ix:FutureWarning")

# ------------------------------------------------------------------------
# Indexing test cases

Expand Down Expand Up @@ -53,6 +56,93 @@ def test_setitem_ndarray_1d(self):
with pytest.raises(ValueError):
df[2:5] = np.arange(1, 4) * 1j

@pytest.mark.parametrize('index', tm.all_index_generator(5),
ids=lambda x: type(x).__name__)
@pytest.mark.parametrize('obj', [
lambda i: Series(np.arange(len(i)), index=i),
lambda i: DataFrame(
np.random.randn(len(i), len(i)), index=i, columns=i)
], ids=['Series', 'DataFrame'])
@pytest.mark.parametrize('idxr, idxr_id', [
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

might be worth making these a fixture at somepoint (the iteration over indexers)

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

the iteration over indexers

yeah. i appreciate we have duplication of parameterization at the moment, and fixtures may be of use. didn't carry on down that route at the moment as i wasn't sure if these tests were what was required.

separately for iteration over the indexes:

xref #25748 (comment). There is a indices fixture in pandas\tests\indexes\conftest.py. Maybe worth promoting that up a level so that it can also be used in pandas\tests\indexing

(lambda x: x, 'getitem'),
(lambda x: x.loc, 'loc'),
(lambda x: x.iloc, 'iloc'),
pytest.param(lambda x: x.ix, 'ix', marks=ignore_ix)
])
def test_getitem_ndarray_3d(self, index, obj, idxr, idxr_id):
# GH 25567
obj = obj(index)
idxr = idxr(obj)
nd3 = np.random.randint(5, size=(2, 2, 2))

msg = (r"Buffer has wrong number of dimensions \(expected 1,"
r" got 3\)|"
"The truth value of an array with more than one element is"
" ambiguous|"
"Cannot index with multidimensional key|"
r"Wrong number of dimensions. values.ndim != ndim \[3 != 1\]|"
"unhashable type: 'numpy.ndarray'" # TypeError
)

if (isinstance(obj, Series) and idxr_id == 'getitem'
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

ugg this is why getitem is so complicated.

and index.inferred_type in [
'string', 'datetime64', 'period', 'timedelta64',
'boolean', 'categorical']):
idxr[nd3]
else:
if (isinstance(obj, DataFrame) and idxr_id == 'getitem'
and index.inferred_type == 'boolean'):
error = TypeError
else:
error = ValueError

with pytest.raises(error, match=msg):
idxr[nd3]

@pytest.mark.parametrize('index', tm.all_index_generator(5),
ids=lambda x: type(x).__name__)
@pytest.mark.parametrize('obj', [
lambda i: Series(np.arange(len(i)), index=i),
lambda i: DataFrame(
np.random.randn(len(i), len(i)), index=i, columns=i)
], ids=['Series', 'DataFrame'])
@pytest.mark.parametrize('idxr, idxr_id', [
(lambda x: x, 'setitem'),
(lambda x: x.loc, 'loc'),
(lambda x: x.iloc, 'iloc'),
pytest.param(lambda x: x.ix, 'ix', marks=ignore_ix)
])
def test_setitem_ndarray_3d(self, index, obj, idxr, idxr_id):
# GH 25567
obj = obj(index)
idxr = idxr(obj)
nd3 = np.random.randint(5, size=(2, 2, 2))

msg = (r"Buffer has wrong number of dimensions \(expected 1,"
r" got 3\)|"
"The truth value of an array with more than one element is"
" ambiguous|"
"Only 1-dimensional input arrays are supported|"
"'pandas._libs.interval.IntervalTree' object has no attribute"
" 'set_value'|" # AttributeError
"unhashable type: 'numpy.ndarray'|" # TypeError
r"^\[\[\[" # pandas.core.indexing.IndexingError
)

if ((idxr_id == 'iloc')
or ((isinstance(obj, Series) and idxr_id == 'setitem'
and index.inferred_type in [
'floating', 'string', 'datetime64', 'period', 'timedelta64',
'boolean', 'categorical']))
or (idxr_id == 'ix' and index.inferred_type in [
'string', 'datetime64', 'period', 'boolean'])):
idxr[nd3] = 0
else:
with pytest.raises(
(ValueError, AttributeError, TypeError,
pd.core.indexing.IndexingError), match=msg):
idxr[nd3] = 0

def test_inf_upcast(self):
# GH 16957
# We should be able to use np.inf as a key
Expand Down Expand Up @@ -1015,3 +1105,26 @@ def test_extension_array_cross_section_converts():

result = df.iloc[0]
tm.assert_series_equal(result, expected)


@pytest.mark.parametrize('idxr, error, error_message', [
(lambda x: x,
AttributeError,
"'numpy.ndarray' object has no attribute 'get'"),
(lambda x: x.loc,
AttributeError,
"type object 'NDFrame' has no attribute '_AXIS_ALIASES'"),
(lambda x: x.iloc,
AttributeError,
"type object 'NDFrame' has no attribute '_AXIS_ALIASES'"),
pytest.param(
lambda x: x.ix,
ValueError,
"NDFrameIndexer does not support NDFrame objects with ndim > 2",
marks=ignore_ix)
])
def test_ndframe_indexing_raises(idxr, error, error_message):
# GH 25567
frame = NDFrame(np.random.randint(5, size=(2, 2, 2)))
with pytest.raises(error, match=error_message):
idxr(frame)[0]