2
2
3
3
4
4
from pandas.core.dtypes.common import is_list_like, is_scalar
5
+ from pandas.core.dtypes.generic import ABCDataFrame, ABCSeries
6
+
5
7
from pandas.core.reshape.concat import concat
6
- from pandas import Series, DataFrame, MultiIndex, Index
8
+ from pandas.core.series import Series
7
9
from pandas.core.groupby import Grouper
8
10
from pandas.core.reshape.util import cartesian_product
9
- from pandas.core.index import _get_combined_index
11
+ from pandas.core.index import Index, _get_combined_index
10
12
from pandas.compat import range, lrange, zip
11
13
from pandas import compat
12
14
import pandas.core.common as com
15
+ from pandas.util._decorators import Appender, Substitution
16
+
17
+ from pandas.core.frame import _shared_docs
18
+ # Note: We need to make sure `frame` is imported before `pivot`, otherwise
19
+ # _shared_docs['pivot_table'] will not yet exist. TODO: Fix this dependency
20
+
13
21
import numpy as np
14
22
15
23
24
+ @Substitution('\ndata : DataFrame')
25
+ @Appender(_shared_docs['pivot_table'], indents=1)
16
26
def pivot_table(data, values=None, index=None, columns=None, aggfunc='mean',
17
27
fill_value=None, margins=False, dropna=True,
18
28
margins_name='All'):
19
- """
20
- Create a spreadsheet-style pivot table as a DataFrame. The levels in the
21
- pivot table will be stored in MultiIndex objects (hierarchical indexes) on
22
- the index and columns of the result DataFrame
23
-
24
- Parameters
25
- ----------
26
- data : DataFrame
27
- values : column to aggregate, optional
28
- index : column, Grouper, array, or list of the previous
29
- If an array is passed, it must be the same length as the data. The list
30
- can contain any of the other types (except list).
31
- Keys to group by on the pivot table index. If an array is passed, it
32
- is being used as the same manner as column values.
33
- columns : column, Grouper, array, or list of the previous
34
- If an array is passed, it must be the same length as the data. The list
35
- can contain any of the other types (except list).
36
- Keys to group by on the pivot table column. If an array is passed, it
37
- is being used as the same manner as column values.
38
- aggfunc : function or list of functions, default numpy.mean
39
- If list of functions passed, the resulting pivot table will have
40
- hierarchical columns whose top level are the function names (inferred
41
- from the function objects themselves)
42
- fill_value : scalar, default None
43
- Value to replace missing values with
44
- margins : boolean, default False
45
- Add all row / columns (e.g. for subtotal / grand totals)
46
- dropna : boolean, default True
47
- Do not include columns whose entries are all NaN
48
- margins_name : string, default 'All'
49
- Name of the row / column that will contain the totals
50
- when margins is True.
51
-
52
- Examples
53
- --------
54
- >>> df = pd.DataFrame({"A": ["foo", "foo", "foo", "foo", "foo",
55
- ... "bar", "bar", "bar", "bar"],
56
- ... "B": ["one", "one", "one", "two", "two",
57
- ... "one", "one", "two", "two"],
58
- ... "C": ["small", "large", "large", "small",
59
- ... "small", "large", "small", "small",
60
- ... "large"],
61
- ... "D": [1, 2, 2, 3, 3, 4, 5, 6, 7]})
62
- >>> df
63
- A B C D
64
- 0 foo one small 1
65
- 1 foo one large 2
66
- 2 foo one large 2
67
- 3 foo two small 3
68
- 4 foo two small 3
69
- 5 bar one large 4
70
- 6 bar one small 5
71
- 7 bar two small 6
72
- 8 bar two large 7
73
-
74
- >>> table = pivot_table(df, values='D', index=['A', 'B'],
75
- ... columns=['C'], aggfunc=np.sum)
76
- >>> table
77
- ... # doctest: +NORMALIZE_WHITESPACE
78
- C large small
79
- A B
80
- bar one 4.0 5.0
81
- two 7.0 6.0
82
- foo one 4.0 1.0
83
- two NaN 6.0
84
-
85
- Returns
86
- -------
87
- table : DataFrame
88
-
89
- See also
90
- --------
91
- DataFrame.pivot : pivot without aggregation that can handle
92
- non-numeric data
93
- """
94
29
index = _convert_by(index)
95
30
columns = _convert_by(columns)
96
31
@@ -162,6 +97,7 @@ def pivot_table(data, values=None, index=None, columns=None, aggfunc='mean',
162
97
table = agged.unstack(to_unstack)
163
98
164
99
if not dropna:
100
+ from pandas import MultiIndex
165
101
try:
166
102
m = MultiIndex.from_arrays(cartesian_product(table.index.levels),
167
103
names=table.index.names)
@@ -176,7 +112,7 @@ def pivot_table(data, values=None, index=None, columns=None, aggfunc='mean',
176
112
except AttributeError:
177
113
pass # it's a single level or a series
178
114
179
- if isinstance(table, DataFrame ):
115
+ if isinstance(table, ABCDataFrame ):
180
116
table = table.sort_index(axis=1)
181
117
182
118
if fill_value is not None:
@@ -197,16 +133,13 @@ def pivot_table(data, values=None, index=None, columns=None, aggfunc='mean',
197
133
if len(index) == 0 and len(columns) > 0:
198
134
table = table.T
199
135
200
- # GH 15193 Makse sure empty columns are removed if dropna=True
201
- if isinstance(table, DataFrame ) and dropna:
136
+ # GH 15193 Make sure empty columns are removed if dropna=True
137
+ if isinstance(table, ABCDataFrame ) and dropna:
202
138
table = table.dropna(how='all', axis=1)
203
139
204
140
return table
205
141
206
142
207
- DataFrame.pivot_table = pivot_table
208
-
209
-
210
143
def _add_margins(table, data, values, rows, cols, aggfunc,
211
144
margins_name='All', fill_value=None):
212
145
if not isinstance(margins_name, compat.string_types):
@@ -230,7 +163,7 @@ def _add_margins(table, data, values, rows, cols, aggfunc,
230
163
else:
231
164
key = margins_name
232
165
233
- if not values and isinstance(table, Series ):
166
+ if not values and isinstance(table, ABCSeries ):
234
167
# If there are no values and the table is a series, then there is only
235
168
# one column in the data. Compute grand margin and return it.
236
169
return table.append(Series({key: grand_margin[margins_name]}))
@@ -257,6 +190,7 @@ def _add_margins(table, data, values, rows, cols, aggfunc,
257
190
else:
258
191
row_margin[k] = grand_margin[k[0]]
259
192
193
+ from pandas import DataFrame
260
194
margin_dummy = DataFrame(row_margin, columns=[key]).T
261
195
262
196
row_names = result.index.names
@@ -402,7 +336,7 @@ def _convert_by(by):
402
336
if by is None:
403
337
by = []
404
338
elif (is_scalar(by) or
405
- isinstance(by, (np.ndarray, Index, Series , Grouper)) or
339
+ isinstance(by, (np.ndarray, Index, ABCSeries , Grouper)) or
406
340
hasattr(by, '__call__')):
407
341
by = [by]
408
342
else:
@@ -523,6 +457,7 @@ def crosstab(index, columns, values=None, rownames=None, colnames=None,
523
457
if values is not None and aggfunc is None:
524
458
raise ValueError("values cannot be used without an aggfunc.")
525
459
460
+ from pandas import DataFrame
526
461
df = DataFrame(data, index=common_idx)
527
462
if values is None:
528
463
df['__dummy__'] = 0
@@ -620,7 +555,7 @@ def _get_names(arrs, names, prefix='row'):
620
555
if names is None:
621
556
names = []
622
557
for i, arr in enumerate(arrs):
623
- if isinstance(arr, Series ) and arr.name is not None:
558
+ if isinstance(arr, ABCSeries ) and arr.name is not None:
624
559
names.append(arr.name)
625
560
else:
626
561
names.append('%s_%d' % (prefix, i))
0 commit comments