Skip to content

Commit 9a01577

Browse files
charlesdong1991jreback
authored andcommitted
REF: Move generic methods to aggregation.py (#30856)
1 parent 24d7c06 commit 9a01577

File tree

4 files changed

+298
-272
lines changed

4 files changed

+298
-272
lines changed

pandas/core/aggregation.py

+198
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,198 @@
1+
"""
2+
aggregation.py contains utility functions to handle multiple named and lambda
3+
kwarg aggregations in groupby and DataFrame/Series aggregation
4+
"""
5+
6+
from collections import defaultdict
7+
from functools import partial
8+
from typing import Any, DefaultDict, List, Sequence, Tuple
9+
10+
from pandas.core.dtypes.common import is_dict_like, is_list_like
11+
12+
import pandas.core.common as com
13+
from pandas.core.indexes.api import Index
14+
15+
16+
def is_multi_agg_with_relabel(**kwargs) -> bool:
17+
"""
18+
Check whether kwargs passed to .agg look like multi-agg with relabeling.
19+
20+
Parameters
21+
----------
22+
**kwargs : dict
23+
24+
Returns
25+
-------
26+
bool
27+
28+
Examples
29+
--------
30+
>>> is_multi_agg_with_relabel(a='max')
31+
False
32+
>>> is_multi_agg_with_relabel(a_max=('a', 'max'),
33+
... a_min=('a', 'min'))
34+
True
35+
>>> is_multi_agg_with_relabel()
36+
False
37+
"""
38+
return all(isinstance(v, tuple) and len(v) == 2 for v in kwargs.values()) and (
39+
len(kwargs) > 0
40+
)
41+
42+
43+
def normalize_keyword_aggregation(kwargs: dict) -> Tuple[dict, List[str], List[int]]:
44+
"""
45+
Normalize user-provided "named aggregation" kwargs.
46+
Transforms from the new ``Mapping[str, NamedAgg]`` style kwargs
47+
to the old Dict[str, List[scalar]]].
48+
49+
Parameters
50+
----------
51+
kwargs : dict
52+
53+
Returns
54+
-------
55+
aggspec : dict
56+
The transformed kwargs.
57+
columns : List[str]
58+
The user-provided keys.
59+
col_idx_order : List[int]
60+
List of columns indices.
61+
62+
Examples
63+
--------
64+
>>> normalize_keyword_aggregation({'output': ('input', 'sum')})
65+
({'input': ['sum']}, ('output',), [('input', 'sum')])
66+
"""
67+
# Normalize the aggregation functions as Mapping[column, List[func]],
68+
# process normally, then fixup the names.
69+
# TODO: aggspec type: typing.Dict[str, List[AggScalar]]
70+
# May be hitting https://github.com/python/mypy/issues/5958
71+
# saying it doesn't have an attribute __name__
72+
aggspec: DefaultDict = defaultdict(list)
73+
order = []
74+
columns, pairs = list(zip(*kwargs.items()))
75+
76+
for name, (column, aggfunc) in zip(columns, pairs):
77+
aggspec[column].append(aggfunc)
78+
order.append((column, com.get_callable_name(aggfunc) or aggfunc))
79+
80+
# uniquify aggfunc name if duplicated in order list
81+
uniquified_order = _make_unique_kwarg_list(order)
82+
83+
# GH 25719, due to aggspec will change the order of assigned columns in aggregation
84+
# uniquified_aggspec will store uniquified order list and will compare it with order
85+
# based on index
86+
aggspec_order = [
87+
(column, com.get_callable_name(aggfunc) or aggfunc)
88+
for column, aggfuncs in aggspec.items()
89+
for aggfunc in aggfuncs
90+
]
91+
uniquified_aggspec = _make_unique_kwarg_list(aggspec_order)
92+
93+
# get the new indice of columns by comparison
94+
col_idx_order = Index(uniquified_aggspec).get_indexer(uniquified_order)
95+
return aggspec, columns, col_idx_order
96+
97+
98+
def _make_unique_kwarg_list(
99+
seq: Sequence[Tuple[Any, Any]]
100+
) -> Sequence[Tuple[Any, Any]]:
101+
"""Uniquify aggfunc name of the pairs in the order list
102+
103+
Examples:
104+
--------
105+
>>> kwarg_list = [('a', '<lambda>'), ('a', '<lambda>'), ('b', '<lambda>')]
106+
>>> _make_unique_kwarg_list(kwarg_list)
107+
[('a', '<lambda>_0'), ('a', '<lambda>_1'), ('b', '<lambda>')]
108+
"""
109+
return [
110+
(pair[0], "_".join([pair[1], str(seq[:i].count(pair))]))
111+
if seq.count(pair) > 1
112+
else pair
113+
for i, pair in enumerate(seq)
114+
]
115+
116+
117+
# TODO: Can't use, because mypy doesn't like us setting __name__
118+
# error: "partial[Any]" has no attribute "__name__"
119+
# the type is:
120+
# typing.Sequence[Callable[..., ScalarResult]]
121+
# -> typing.Sequence[Callable[..., ScalarResult]]:
122+
123+
124+
def _managle_lambda_list(aggfuncs: Sequence[Any]) -> Sequence[Any]:
125+
"""
126+
Possibly mangle a list of aggfuncs.
127+
128+
Parameters
129+
----------
130+
aggfuncs : Sequence
131+
132+
Returns
133+
-------
134+
mangled: list-like
135+
A new AggSpec sequence, where lambdas have been converted
136+
to have unique names.
137+
138+
Notes
139+
-----
140+
If just one aggfunc is passed, the name will not be mangled.
141+
"""
142+
if len(aggfuncs) <= 1:
143+
# don't mangle for .agg([lambda x: .])
144+
return aggfuncs
145+
i = 0
146+
mangled_aggfuncs = []
147+
for aggfunc in aggfuncs:
148+
if com.get_callable_name(aggfunc) == "<lambda>":
149+
aggfunc = partial(aggfunc)
150+
aggfunc.__name__ = f"<lambda_{i}>"
151+
i += 1
152+
mangled_aggfuncs.append(aggfunc)
153+
154+
return mangled_aggfuncs
155+
156+
157+
def maybe_mangle_lambdas(agg_spec: Any) -> Any:
158+
"""
159+
Make new lambdas with unique names.
160+
161+
Parameters
162+
----------
163+
agg_spec : Any
164+
An argument to GroupBy.agg.
165+
Non-dict-like `agg_spec` are pass through as is.
166+
For dict-like `agg_spec` a new spec is returned
167+
with name-mangled lambdas.
168+
169+
Returns
170+
-------
171+
mangled : Any
172+
Same type as the input.
173+
174+
Examples
175+
--------
176+
>>> maybe_mangle_lambdas('sum')
177+
'sum'
178+
>>> maybe_mangle_lambdas([lambda: 1, lambda: 2]) # doctest: +SKIP
179+
[<function __main__.<lambda_0>,
180+
<function pandas...._make_lambda.<locals>.f(*args, **kwargs)>]
181+
"""
182+
is_dict = is_dict_like(agg_spec)
183+
if not (is_dict or is_list_like(agg_spec)):
184+
return agg_spec
185+
mangled_aggspec = type(agg_spec)() # dict or OrderdDict
186+
187+
if is_dict:
188+
for key, aggfuncs in agg_spec.items():
189+
if is_list_like(aggfuncs) and not is_dict_like(aggfuncs):
190+
mangled_aggfuncs = _managle_lambda_list(aggfuncs)
191+
else:
192+
mangled_aggfuncs = aggfuncs
193+
194+
mangled_aggspec[key] = mangled_aggfuncs
195+
else:
196+
mangled_aggspec = _managle_lambda_list(agg_spec)
197+
198+
return mangled_aggspec

0 commit comments

Comments
 (0)