21
21
22
22
import numpy as np
23
23
24
- from pandas ._config import option_context
24
+ from pandas ._config import (
25
+ get_option ,
26
+ option_context ,
27
+ )
25
28
26
29
from pandas ._libs import lib
27
30
from pandas ._typing import (
@@ -82,6 +85,7 @@ def frame_apply(
82
85
result_type : str | None = None ,
83
86
args = None ,
84
87
kwargs = None ,
88
+ renamer = None ,
85
89
) -> FrameApply :
86
90
"""construct and return a row or column based frame apply object"""
87
91
axis = obj ._get_axis_number (axis )
@@ -98,6 +102,7 @@ def frame_apply(
98
102
result_type = result_type ,
99
103
args = args ,
100
104
kwargs = kwargs ,
105
+ renamer = renamer ,
101
106
)
102
107
103
108
@@ -112,6 +117,7 @@ def __init__(
112
117
result_type : str | None ,
113
118
args ,
114
119
kwargs ,
120
+ renamer = None ,
115
121
):
116
122
self .obj = obj
117
123
self .raw = raw
@@ -141,6 +147,7 @@ def f(x):
141
147
142
148
self .orig_f : AggFuncType = func
143
149
self .f : AggFuncType = f
150
+ self .renamer = renamer
144
151
145
152
@abc .abstractmethod
146
153
def apply (self ) -> DataFrame | Series :
@@ -164,10 +171,16 @@ def agg(self) -> DataFrame | Series | None:
164
171
return self .apply_str ()
165
172
166
173
if is_dict_like (arg ):
167
- return self .agg_dict_like ()
174
+ if get_option ("new_udf_methods" ):
175
+ return self .new_dict_like ("agg" )
176
+ else :
177
+ return self .agg_dict_like ()
168
178
elif is_list_like (arg ):
169
179
# we require a list, but not a 'str'
170
- return self .agg_list_like ()
180
+ if get_option ("new_udf_methods" ):
181
+ return self .new_list_like ("agg" )
182
+ else :
183
+ return self .agg_list_like ()
171
184
172
185
if callable (arg ):
173
186
f = com .get_cython_func (arg )
@@ -408,6 +421,70 @@ def agg_list_like(self) -> DataFrame | Series:
408
421
)
409
422
return concatenated .reindex (full_ordered_index , copy = False )
410
423
424
+ def new_list_like (self , method : str ) -> DataFrame | Series :
425
+ """
426
+ Compute aggregation in the case of a list-like argument.
427
+
428
+ Returns
429
+ -------
430
+ Result of aggregation.
431
+ """
432
+ from pandas .core .reshape .concat import concat
433
+
434
+ obj = self .obj
435
+ arg = cast (List [AggFuncTypeBase ], self .f )
436
+
437
+ results = []
438
+ keys = []
439
+ result_dim = None
440
+
441
+ for a in arg :
442
+ name = None
443
+ try :
444
+ if isinstance (a , (tuple , list )):
445
+ # Handle (name, value) pairs
446
+ name , a = a
447
+ new_res = getattr (obj , method )(a )
448
+ if result_dim is None :
449
+ result_dim = getattr (new_res , "ndim" , 0 )
450
+ elif getattr (new_res , "ndim" , 0 ) != result_dim :
451
+ raise ValueError (
452
+ "cannot combine transform and aggregation operations"
453
+ )
454
+ except TypeError :
455
+ pass
456
+ else :
457
+ results .append (new_res )
458
+
459
+ # make sure we find a good name
460
+ if name is None :
461
+ name = com .get_callable_name (a ) or a
462
+ keys .append (name )
463
+
464
+ # if we are empty
465
+ if not len (results ):
466
+ raise ValueError ("no results" )
467
+
468
+ try :
469
+ concatenated = concat (results , keys = keys , axis = 1 , sort = False )
470
+ except TypeError :
471
+ # we are concatting non-NDFrame objects,
472
+ # e.g. a list of scalars
473
+ from pandas import Series
474
+
475
+ result = Series (results , index = keys , name = obj .name )
476
+ return result
477
+ else :
478
+ # Concat uses the first index to determine the final indexing order.
479
+ # The union of a shorter first index with the other indices causes
480
+ # the index sorting to be different from the order of the aggregating
481
+ # functions. Reindex if this is the case.
482
+ index_size = concatenated .index .size
483
+ full_ordered_index = next (
484
+ result .index for result in results if result .index .size == index_size
485
+ )
486
+ return concatenated .reindex (full_ordered_index , copy = False )
487
+
411
488
def agg_dict_like (self ) -> DataFrame | Series :
412
489
"""
413
490
Compute aggregation in the case of a dict-like argument.
@@ -486,6 +563,86 @@ def agg_dict_like(self) -> DataFrame | Series:
486
563
487
564
return result
488
565
566
+ def new_dict_like (self , method : str ) -> DataFrame | Series :
567
+ """
568
+ Compute aggregation in the case of a dict-like argument.
569
+
570
+ Returns
571
+ -------
572
+ Result of aggregation.
573
+ """
574
+ from pandas import Index
575
+ from pandas .core .reshape .concat import concat
576
+
577
+ obj = self .obj
578
+ arg = cast (AggFuncTypeDict , self .f )
579
+
580
+ if not isinstance (obj , SelectionMixin ):
581
+ # i.e. obj is Series or DataFrame
582
+ selected_obj = obj
583
+ selection = None
584
+ else :
585
+ selected_obj = obj ._selected_obj
586
+ selection = obj ._selection
587
+
588
+ arg = self .normalize_dictlike_arg ("agg" , selected_obj , arg )
589
+
590
+ if selected_obj .ndim == 1 :
591
+ # key only used for output
592
+ colg = obj ._gotitem (selection , ndim = 1 )
593
+ results = {key : getattr (colg , method )(how ) for key , how in arg .items ()}
594
+
595
+ else :
596
+ # key used for column selection and output
597
+ results = {
598
+ key : getattr (obj ._gotitem (key , ndim = 1 ), method )(how )
599
+ for key , how in arg .items ()
600
+ }
601
+ if self .renamer is not None :
602
+ for key , columns in self .renamer .items ():
603
+ results [key ].columns = columns
604
+
605
+ # Avoid making two isinstance calls in all and any below
606
+ if isinstance (results , dict ):
607
+ is_ndframe = [isinstance (r , ABCNDFrame ) for r in results .values ()]
608
+ else :
609
+ is_ndframe = [isinstance (r , ABCNDFrame ) for r in results ]
610
+
611
+ # combine results
612
+ if all (is_ndframe ):
613
+ keys_to_use = [k for k in arg .keys () if not results [k ].empty ]
614
+ keys_to_use = keys_to_use if keys_to_use != [] else arg .keys ()
615
+ if selected_obj .ndim == 2 :
616
+ # keys are columns, so we can preserve names
617
+ ktu = Index (keys_to_use )
618
+ ktu ._set_names (selected_obj .columns .names )
619
+ keys_to_use = ktu
620
+ keys = None if selected_obj .ndim == 1 else keys_to_use
621
+ result = concat ({k : results [k ] for k in keys_to_use }, keys = keys , axis = 1 )
622
+ if result .ndim == 1 :
623
+ result = result .to_frame ()
624
+ elif any (is_ndframe ):
625
+ # There is a mix of NDFrames and scalars
626
+ raise ValueError (
627
+ "cannot perform both aggregation "
628
+ "and transformation operations "
629
+ "simultaneously"
630
+ )
631
+ else :
632
+ from pandas import Series
633
+
634
+ # we have a dict of scalars
635
+ # GH 36212 use name only if obj is a series
636
+ if obj .ndim == 1 :
637
+ obj = cast ("Series" , obj )
638
+ name = obj .name
639
+ else :
640
+ name = None
641
+
642
+ result = Series (results , index = arg .keys (), name = name )
643
+
644
+ return result
645
+
489
646
def apply_str (self ) -> DataFrame | Series :
490
647
"""
491
648
Compute apply in case of a string.
@@ -522,6 +679,35 @@ def apply_multiple(self) -> DataFrame | Series:
522
679
"""
523
680
return self .obj .aggregate (self .f , self .axis , * self .args , ** self .kwargs )
524
681
682
+ def new_apply_multiple (self ) -> DataFrame | Series :
683
+ """
684
+ Compute apply in case of a list-like or dict-like.
685
+
686
+ Returns
687
+ -------
688
+ result: Series, DataFrame, or None
689
+ Result when self.f is a list-like or dict-like, None otherwise.
690
+ """
691
+ obj = self .obj
692
+ axis = self .axis
693
+
694
+ self .obj = obj if axis == 0 else obj .T
695
+ self .axis = 0
696
+
697
+ try :
698
+ if is_dict_like (self .f ):
699
+ result = self .new_dict_like ("apply" )
700
+ else :
701
+ result = self .new_list_like ("apply" )
702
+ finally :
703
+ self .obj = obj
704
+ self .axis = axis
705
+
706
+ if axis == 1 :
707
+ result = result .T if result is not None else result
708
+
709
+ return result
710
+
525
711
def normalize_dictlike_arg (
526
712
self , how : str , obj : DataFrame | Series , func : AggFuncTypeDict
527
713
) -> AggFuncTypeDict :
@@ -661,7 +847,10 @@ def apply(self) -> DataFrame | Series:
661
847
"""compute the results"""
662
848
# dispatch to agg
663
849
if is_list_like (self .f ):
664
- return self .apply_multiple ()
850
+ if get_option ("new_udf_methods" ):
851
+ return self .new_apply_multiple ()
852
+ else :
853
+ return self .apply_multiple ()
665
854
666
855
# all empty
667
856
if len (self .columns ) == 0 and len (self .index ) == 0 :
@@ -1039,7 +1228,10 @@ def apply(self) -> DataFrame | Series:
1039
1228
1040
1229
# dispatch to agg
1041
1230
if is_list_like (self .f ):
1042
- return self .apply_multiple ()
1231
+ if get_option ("new_udf_methods" ):
1232
+ return self .new_apply_multiple ()
1233
+ else :
1234
+ return self .apply_multiple ()
1043
1235
1044
1236
if isinstance (self .f , str ):
1045
1237
# if we are a string, try to dispatch
@@ -1172,7 +1364,13 @@ def transform(self):
1172
1364
1173
1365
def reconstruct_func (
1174
1366
func : AggFuncType | None , ** kwargs
1175
- ) -> tuple [bool , AggFuncType | None , list [str ] | None , list [int ] | None ]:
1367
+ ) -> tuple [
1368
+ bool ,
1369
+ AggFuncType | None ,
1370
+ list [str ] | None ,
1371
+ list [int ] | None ,
1372
+ dict [str , list [str ]] | None ,
1373
+ ]:
1176
1374
"""
1177
1375
This is the internal function to reconstruct func given if there is relabeling
1178
1376
or not and also normalize the keyword to get new order of columns.
@@ -1204,14 +1402,16 @@ def reconstruct_func(
1204
1402
Examples
1205
1403
--------
1206
1404
>>> reconstruct_func(None, **{"foo": ("col", "min")})
1207
- (True, defaultdict(<class 'list'>, {'col': ['min']}), ('foo',), array([0]))
1405
+ (True, defaultdict(<class 'list'>, {'col': ['min']}), ('foo',), array([0]),
1406
+ defaultdict(<class 'list'>, {'col': ['foo']}))
1208
1407
1209
1408
>>> reconstruct_func("min")
1210
- (False, 'min', None, None)
1409
+ (False, 'min', None, None, None )
1211
1410
"""
1212
1411
relabeling = func is None and is_multi_agg_with_relabel (** kwargs )
1213
1412
columns : list [str ] | None = None
1214
1413
order : list [int ] | None = None
1414
+ renamer : dict [str , list [str ]] | None = None
1215
1415
1216
1416
if not relabeling :
1217
1417
if isinstance (func , list ) and len (func ) > len (set (func )):
@@ -1227,9 +1427,9 @@ def reconstruct_func(
1227
1427
raise TypeError ("Must provide 'func' or tuples of '(column, aggfunc)." )
1228
1428
1229
1429
if relabeling :
1230
- func , columns , order = normalize_keyword_aggregation (kwargs )
1430
+ func , columns , order , renamer = normalize_keyword_aggregation (kwargs )
1231
1431
1232
- return relabeling , func , columns , order
1432
+ return relabeling , func , columns , order , renamer
1233
1433
1234
1434
1235
1435
def is_multi_agg_with_relabel (** kwargs ) -> bool :
@@ -1258,7 +1458,9 @@ def is_multi_agg_with_relabel(**kwargs) -> bool:
1258
1458
)
1259
1459
1260
1460
1261
- def normalize_keyword_aggregation (kwargs : dict ) -> tuple [dict , list [str ], list [int ]]:
1461
+ def normalize_keyword_aggregation (
1462
+ kwargs : dict ,
1463
+ ) -> tuple [dict , list [str ], list [int ], dict [str , list ]]:
1262
1464
"""
1263
1465
Normalize user-provided "named aggregation" kwargs.
1264
1466
Transforms from the new ``Mapping[str, NamedAgg]`` style kwargs
@@ -1280,7 +1482,8 @@ def normalize_keyword_aggregation(kwargs: dict) -> tuple[dict, list[str], list[i
1280
1482
Examples
1281
1483
--------
1282
1484
>>> normalize_keyword_aggregation({"output": ("input", "sum")})
1283
- (defaultdict(<class 'list'>, {'input': ['sum']}), ('output',), array([0]))
1485
+ (defaultdict(<class 'list'>, {'input': ['sum']}), ('output',), array([0]),
1486
+ defaultdict(<class 'list'>, {'input': ['output']}))
1284
1487
"""
1285
1488
from pandas .core .indexes .base import Index
1286
1489
@@ -1290,11 +1493,13 @@ def normalize_keyword_aggregation(kwargs: dict) -> tuple[dict, list[str], list[i
1290
1493
# May be hitting https://github.com/python/mypy/issues/5958
1291
1494
# saying it doesn't have an attribute __name__
1292
1495
aggspec : DefaultDict = defaultdict (list )
1496
+ renamer : DefaultDict = defaultdict (list )
1293
1497
order = []
1294
1498
columns , pairs = list (zip (* kwargs .items ()))
1295
1499
1296
- for column , aggfunc in pairs :
1500
+ for name , ( column , aggfunc ) in zip ( kwargs , pairs ) :
1297
1501
aggspec [column ].append (aggfunc )
1502
+ renamer [column ].append (name )
1298
1503
order .append ((column , com .get_callable_name (aggfunc ) or aggfunc ))
1299
1504
1300
1505
# uniquify aggfunc name if duplicated in order list
@@ -1314,7 +1519,7 @@ def normalize_keyword_aggregation(kwargs: dict) -> tuple[dict, list[str], list[i
1314
1519
col_idx_order = Index (uniquified_aggspec ).get_indexer (uniquified_order )
1315
1520
# error: Incompatible return value type (got "Tuple[defaultdict[Any, Any],
1316
1521
# Any, ndarray]", expected "Tuple[Dict[Any, Any], List[str], List[int]]")
1317
- return aggspec , columns , col_idx_order # type: ignore[return-value]
1522
+ return aggspec , columns , col_idx_order , renamer # type: ignore[return-value]
1318
1523
1319
1524
1320
1525
def _make_unique_kwarg_list (
0 commit comments