pandas-dev · jreback · Feb 8, 2014 · Feb 7, 2014
diff --git a/doc/source/10min.rst b/doc/source/10min.rst
@@ -273,25 +273,6 @@ For getting fast access to a scalar (equiv to the prior method)
 
    df.iat[1,1]
 
-There is one signficant departure from standard python/numpy slicing semantics.
-python/numpy allow slicing past the end of an array without an associated
-error.
-
-.. ipython:: python
-
-    # these are allowed in python/numpy.
-    x = list('abcdef')
-    x[4:10]
-    x[8:10]
-
-Pandas will detect this and raise ``IndexError``, rather than return an empty
-structure.
-
-::
-
-    >>> df.iloc[:,8:10]
-    IndexError: out-of-bounds on slice (end)
-
 Boolean Indexing
 ~~~~~~~~~~~~~~~~
 

diff --git a/doc/source/indexing.rst b/doc/source/indexing.rst
@@ -77,8 +77,9 @@ of multi-axis indexing.
   See more at :ref:`Selection by Label <indexing.label>`
 
 - ``.iloc`` is strictly integer position based (from ``0`` to ``length-1`` of
-  the axis), will raise ``IndexError`` when the requested indicies are out of
-  bounds. Allowed inputs are:
+  the axis), will raise ``IndexError`` if a single index is requested and it
+  is out-of-bounds, otherwise it will conform the bounds to size of the object.
+  Allowed inputs are:
 
   - An integer e.g. ``5``
   - A list or array of integers ``[4, 3, 0]``
@@ -420,12 +421,19 @@ python/numpy allow slicing past the end of an array without an associated error.
     x[4:10]
     x[8:10]
 
-Pandas will detect this and raise ``IndexError``, rather than return an empty structure.
+- as of v0.14.0, ``iloc`` will now accept out-of-bounds indexers, e.g. a value that exceeds the length of the object being
+  indexed. These will be excluded. This will make pandas conform more with pandas/numpy indexing of out-of-bounds
+  values. A single indexer that is out-of-bounds and drops the dimensions of the object will still raise
+  ``IndexError`` (:issue:`6296`). This could result in an empty axis (e.g. an empty DataFrame being returned)
 
-::
+  .. ipython:: python
 
-    >>> df.iloc[:,3:6]
-    IndexError: out-of-bounds on slice (end)
+      df = DataFrame(np.random.randn(5,2),columns=list('AB'))
+      df
+      df.iloc[[4,5,6]]
+      df.iloc[4:6]
+      df.iloc[:,2:3]
+      df.iloc[:,1:3]
 
 .. _indexing.basics.partial_setting:
 

diff --git a/doc/source/release.rst b/doc/source/release.rst
@@ -56,6 +56,10 @@ New features
 API Changes
 ~~~~~~~~~~~
 
+- ``iloc`` will now accept out-of-bounds indexers, e.g. a value that exceeds the length of the object being
+  indexed. These will be excluded. This will make pandas conform more with pandas/numpy indexing of out-of-bounds
+  values. A single indexer that is out-of-bounds and drops the dimensions of the object will still raise
+  ``IndexError`` (:issue:`6296`)
 
 Experimental Features
 ~~~~~~~~~~~~~~~~~~~~~

diff --git a/doc/source/v0.14.0.txt b/doc/source/v0.14.0.txt
@@ -15,6 +15,20 @@ Highlights include:
 API changes
 ~~~~~~~~~~~
 
+- ``iloc`` will now accept out-of-bounds indexers, e.g. a value that exceeds the length of the object being
+  indexed. These will be excluded. This will make pandas conform more with pandas/numpy indexing of out-of-bounds
+  values. A single indexer that is out-of-bounds and drops the dimensions of the object will still raise
+  ``IndexError`` (:issue:`6296`). This could result in an empty axis (e.g. an empty DataFrame being returned)
+
+  .. ipython:: python
+
+      df = DataFrame(np.random.randn(5,2),columns=list('AB'))
+      df
+      df.iloc[[4,5,6]]
+      df.iloc[4:6]
+      df.iloc[:,2:3]
+      df.iloc[:,1:3]
+
 Prior Version Deprecations/Changes
 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
 

diff --git a/pandas/core/generic.py b/pandas/core/generic.py
@@ -1756,10 +1756,6 @@ def head(self, n=5):
         l = len(self)
         if l == 0 or n==0:
             return self
-        if n > l:
-            n = l
-        elif n < -l:
-            n = -l
         return self.iloc[:n]
 
     def tail(self, n=5):
@@ -1769,10 +1765,6 @@ def tail(self, n=5):
         l = len(self)
         if l == 0 or n == 0:
             return self
-        if n > l:
-            n = l
-        elif n < -l:
-            n = -l
         return self.iloc[-n:]
 
     #----------------------------------------------------------------------

diff --git a/pandas/core/index.py b/pandas/core/index.py
@@ -621,9 +621,15 @@ def __getitem__(self, key):
             if com._is_bool_indexer(key):
                 key = np.asarray(key)
 
-            result = arr_idx[key]
-            if result.ndim > 1:
-                return result
+            try:
+                result = arr_idx[key]
+                if result.ndim > 1:
+                    return result
+            except (IndexError):
+                if not len(key):
+                    result = []
+                else:
+                    raise
 
             return Index(result, name=self.name)
 

diff --git a/pandas/core/indexing.py b/pandas/core/indexing.py
@@ -73,6 +73,29 @@ def _get_loc(self, key, axis=0):
         return self.obj._ixs(key, axis=axis)
 
     def _slice(self, obj, axis=0, raise_on_error=False, typ=None):
+
+        # make out-of-bounds into bounds of the object
+        if typ == 'iloc':
+            ax = self.obj._get_axis(axis)
+            l = len(ax)
+            start = obj.start
+            stop = obj.stop
+            step = obj.step
+            if start is not None:
+                # degenerate to return nothing
+                if start >= l:
+                    return self._getitem_axis(tuple(),axis=axis)
+
+                # equiv to a null slice
+                elif start <= -l:
+                    start = None
+            if stop is not None:
+                if stop > l:
+                    stop = None
+                elif stop <= -l:
+                    stop = None
+            obj = slice(start,stop,step)
+
         return self.obj._slice(obj, axis=axis, raise_on_error=raise_on_error,
                                typ=typ)
 
@@ -1188,14 +1211,23 @@ def _getitem_tuple(self, tup):
             pass
 
         retval = self.obj
+        axis=0
         for i, key in enumerate(tup):
             if i >= self.obj.ndim:
                 raise IndexingError('Too many indexers')
 
             if _is_null_slice(key):
+                axis += 1
                 continue
 
-            retval = getattr(retval, self.name)._getitem_axis(key, axis=i)
+            retval = getattr(retval, self.name)._getitem_axis(key, axis=axis)
+
+            # if the dim was reduced, then pass a lower-dim the next time
+            if retval.ndim<self.ndim:
+                axis -= 1
+
+            # try to get for the next axis
+            axis += 1
 
         return retval
 
@@ -1224,17 +1256,28 @@ def _getitem_axis(self, key, axis=0):
         # a single integer or a list of integers
         else:
 
+            ax = self.obj._get_axis(axis)
             if _is_list_like(key):
 
+                # coerce the key to not exceed the maximum size of the index
+                arr = np.array(key)
+                l = len(ax)
+                if len(arr) and (arr.max() >= l or arr.min() <= -l):
+                    key = arr[(arr>-l) & (arr<l)]
+
                 # force an actual list
                 key = list(key)
+
             else:
                 key = self._convert_scalar_indexer(key, axis)
 
                 if not com.is_integer(key):
                     raise TypeError("Cannot index by location index with a "
                                     "non-integer key")
 
+                if key > len(ax):
+                    raise IndexError("single indexer is out-of-bounds")
+
             return self._get_loc(key, axis=axis)
 
     def _convert_to_indexer(self, obj, axis=0, is_setter=False):

diff --git a/pandas/core/internals.py b/pandas/core/internals.py
@@ -3246,7 +3246,7 @@ def reindex_indexer(self, new_axis, indexer, axis=1, fill_value=None,
         pandas-indexer with -1's only.
         """
         # trying to reindex on an axis with duplicates
-        if not allow_dups and not self.axes[axis].is_unique:
+        if not allow_dups and not self.axes[axis].is_unique and len(indexer):
             raise ValueError("cannot reindex from a duplicate axis")
 
         if not self.is_consolidated():

diff --git a/pandas/tests/test_generic.py b/pandas/tests/test_generic.py
@@ -873,7 +873,7 @@ def test_equals(self):
 
         s2[0] = 9.9
         self.assert_(not s1.equals(s2))
-        
+
         idx = MultiIndex.from_tuples([(0, 'a'), (1, 'b'), (2, 'c')])
         s1 = Series([1, 2, np.nan], index=idx)
         s2 = s1.copy()
@@ -900,17 +900,17 @@ def test_equals(self):
         # different dtype
         different = df1.copy()
         different['floats'] = different['floats'].astype('float32')
-        self.assert_(not df1.equals(different)) 
+        self.assert_(not df1.equals(different))
 
         # different index
         different_index = -index
         different = df2.set_index(different_index)
-        self.assert_(not df1.equals(different))        
+        self.assert_(not df1.equals(different))
 
         # different columns
         different = df2.copy()
         different.columns = df2.columns[::-1]
-        self.assert_(not df1.equals(different))        
+        self.assert_(not df1.equals(different))
 
         # DatetimeIndex
         index = pd.date_range('2000-1-1', periods=10, freq='T')

diff --git a/pandas/tests/test_indexing.py b/pandas/tests/test_indexing.py
@@ -339,6 +339,72 @@ def test_repeated_getitem_dups(self):
         result = df.loc[:,0].loc['A']
         assert_series_equal(result,expected)
 
+    def test_iloc_exceeds_bounds(self):
+
+        # GH6296
+        # iloc should allow indexers that exceed the bounds
+        df = DataFrame(np.random.random_sample((20,5)), columns=list('ABCDE'))
+        expected = df
+        result = df.iloc[:,[0,1,2,3,4,5]]
+        assert_frame_equal(result,expected)
+
+        result = df.iloc[[1,30]]
+        expected = df.iloc[[1]]
+        assert_frame_equal(result,expected)
+
+        result = df.iloc[[1,-30]]
+        expected = df.iloc[[1]]
+        assert_frame_equal(result,expected)
+
+        result = df.iloc[:,4:10]
+        expected = df.iloc[:,4:]
+        assert_frame_equal(result,expected)
+
+        result = df.iloc[:,-4:-10]
+        expected = df.iloc[:,-4:]
+        assert_frame_equal(result,expected)
+
+        result = df.iloc[[100]]
+        expected = DataFrame(columns=df.columns)
+        assert_frame_equal(result,expected)
+
+        # still raise on a single indexer
+        def f():
+            df.iloc[30]
+        self.assertRaises(IndexError, f)
+
+        s = df['A']
+        result = s.iloc[[100]]
+        expected = Series()
+        assert_series_equal(result,expected)
+
+        result = s.iloc[[-100]]
+        expected = Series()
+        assert_series_equal(result,expected)
+
+        # slice
+        result = s.iloc[18:30]
+        expected = s.iloc[18:]
+        assert_series_equal(result,expected)
+
+        # doc example
+        df = DataFrame(np.random.randn(5,2),columns=list('AB'))
+        result = df.iloc[[4,5,6]]
+        expected = df.iloc[[4]]
+        assert_frame_equal(result,expected)
+
+        result = df.iloc[4:6]
+        expected = df.iloc[[4]]
+        assert_frame_equal(result,expected)
+
+        result = df.iloc[:,2:3]
+        expected = DataFrame(index=df.index)
+        assert_frame_equal(result,expected)
+
+        result = df.iloc[:,1:3]
+        expected = df.iloc[:,[1]]
+        assert_frame_equal(result,expected)
+
     def test_iloc_getitem_int(self):
 
         # integer
@@ -442,14 +508,6 @@ def test_iloc_getitem_multiindex(self):
         xp = df.xs('b',drop_level=False)
         assert_frame_equal(rs,xp)
 
-    def test_iloc_getitem_out_of_bounds(self):
-
-        # out-of-bounds slice
-        self.assertRaises(IndexError, self.frame_ints.iloc.__getitem__, tuple([slice(None),slice(1,5,None)]))
-        self.assertRaises(IndexError, self.frame_ints.iloc.__getitem__, tuple([slice(None),slice(-5,3,None)]))
-        self.assertRaises(IndexError, self.frame_ints.iloc.__getitem__, tuple([slice(1,5,None)]))
-        self.assertRaises(IndexError, self.frame_ints.iloc.__getitem__, tuple([slice(-5,3,None)]))
-
     def test_iloc_setitem(self):
         df = self.frame_ints
 
@@ -738,12 +796,6 @@ def test_iloc_getitem_frame(self):
         expected = df.ix[[2,4,6,8]]
         assert_frame_equal(result, expected)
 
-        # out-of-bounds slice
-        self.assertRaises(IndexError, df.iloc.__getitem__, tuple([slice(None),slice(1,5,None)]))
-        self.assertRaises(IndexError, df.iloc.__getitem__, tuple([slice(None),slice(-5,3,None)]))
-        self.assertRaises(IndexError, df.iloc.__getitem__, tuple([slice(1,11,None)]))
-        self.assertRaises(IndexError, df.iloc.__getitem__, tuple([slice(-11,3,None)]))
-
         # try with labelled frame
         df = DataFrame(np.random.randn(10, 4), index=list('abcdefghij'), columns=list('ABCD'))