Skip to content

Commit 4ffc3ef

Browse files
xflr6jreback
authored andcommitted
PERF: speed-up DateFrame.itertuples() with namedtuples
1 parent 23ce980 commit 4ffc3ef

File tree

4 files changed

+18
-6
lines changed

4 files changed

+18
-6
lines changed

asv_bench/benchmarks/frame_methods.py

Lines changed: 10 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -653,6 +653,16 @@ def j(self):
653653
self.df3[0]
654654

655655

656+
class frame_itertuples(object):
657+
658+
def setup(self):
659+
self.df = DataFrame(np.random.randn(50000, 10))
660+
661+
def time_frame_itertuples(self):
662+
for row in self.df.itertuples():
663+
pass
664+
665+
656666
class frame_mask_bools(object):
657667
goal_time = 0.2
658668

doc/source/whatsnew/v0.17.1.txt

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -108,7 +108,7 @@ API changes
108108
- ``Series.sort_index()`` now correctly handles the ``inplace`` option (:issue:`11402`)
109109
- ``SparseArray.__iter__()`` now does not cause ``PendingDeprecationWarning`` in Python 3.5 (:issue:`11622`)
110110

111-
- ``DataFrame.itertuples()`` now returns ``namedtuple`` objects, when possible. (:issue:`11269`)
111+
- ``DataFrame.itertuples()`` now returns ``namedtuple`` objects, when possible. (:issue:`11269`, :issue:`11625`)
112112
- ``Series.ptp`` will now ignore missing values by default (:issue:`11163`)
113113

114114
.. _whatsnew_0171.deprecations:

pandas/core/frame.py

Lines changed: 5 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -43,7 +43,7 @@
4343
import pandas.computation.expressions as expressions
4444
from pandas.computation.eval import eval as _eval
4545
from numpy import percentile as _quantile
46-
from pandas.compat import(range, zip, lrange, lmap, lzip, StringIO, u,
46+
from pandas.compat import(range, map, zip, lrange, lmap, lzip, StringIO, u,
4747
OrderedDict, raise_with_traceback)
4848
from pandas import compat
4949
from pandas.sparse.array import SparseArray
@@ -664,7 +664,7 @@ def itertuples(self, index=True, name="Pandas"):
664664
index : boolean, default True
665665
If True, return the index as the first element of the tuple.
666666
name : string, default "Pandas"
667-
The name of the returned namedtuple.
667+
The name of the returned namedtuples or None to return regular tuples.
668668
669669
Notes
670670
-----
@@ -703,13 +703,13 @@ def itertuples(self, index=True, name="Pandas"):
703703

704704
# Python 3 supports at most 255 arguments to constructor, and
705705
# things get slow with this many fields in Python 2
706-
if len(self.columns) + index < 256:
706+
if name is not None and len(self.columns) + index < 256:
707707
# `rename` is unsupported in Python 2.6
708708
try:
709709
itertuple = collections.namedtuple(
710710
name, fields+list(self.columns), rename=True)
711-
return (itertuple(*row) for row in zip(*arrays))
712-
except:
711+
return map(itertuple._make, zip(*arrays))
712+
except Exception:
713713
pass
714714

715715
# fallback to regular tuples

pandas/tests/test_frame.py

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -5554,6 +5554,8 @@ def test_itertuples(self):
55545554
dfaa = df[['a', 'a']]
55555555
self.assertEqual(list(dfaa.itertuples()), [(0, 1, 1), (1, 2, 2), (2, 3, 3)])
55565556

5557+
self.assertEqual(repr(list(df.itertuples(name=None))), '[(0, 1, 4), (1, 2, 5), (2, 3, 6)]')
5558+
55575559
tup = next(df.itertuples(name='TestName'))
55585560

55595561
# no support for field renaming in Python 2.6, regular tuples are returned

0 commit comments

Comments
 (0)