@@ -467,7 +467,7 @@ def _selected_obj(self):
467467 def _set_selection_from_grouper (self ):
468468 """ we may need create a selection if we have non-level groupers """
469469 grp = self .grouper
470- if self .as_index and getattr (grp ,'groupings' ,None ) is not None :
470+ if self .as_index and getattr (grp ,'groupings' ,None ) is not None and self . obj . ndim > 1 :
471471 ax = self .obj ._info_axis
472472 groupers = [ g .name for g in grp .groupings if g .level is None and g .name is not None and g .name in ax ]
473473 if len (groupers ):
@@ -759,7 +759,7 @@ def nth(self, n, dropna=None):
759759
760760 Examples
761761 --------
762- >>> DataFrame([[1, np.nan], [1, 4], [5, 6]], columns=['A', 'B'])
762+ >>> df = DataFrame([[1, np.nan], [1, 4], [5, 6]], columns=['A', 'B'])
763763 >>> g = df.groupby('A')
764764 >>> g.nth(0)
765765 A B
@@ -804,7 +804,10 @@ def nth(self, n, dropna=None):
804804 if self .as_index :
805805 ax = self .obj ._info_axis
806806 names = self .grouper .names
807- if all ([ n in ax for n in names ]):
807+ if self .obj .ndim == 1 :
808+ # this is a pass-thru
809+ pass
810+ elif all ([ n in ax for n in names ]):
808811 result .index = Index (self .obj [names ][is_nth ].values .ravel ()).set_names (names )
809812 elif self ._group_selection is not None :
810813 result .index = self .obj ._get_axis (self .axis )[is_nth ]
@@ -821,17 +824,29 @@ def nth(self, n, dropna=None):
821824 "(was passed %s)." % (dropna ),)
822825
823826 # old behaviour, but with all and any support for DataFrames.
824-
827+ # modified in GH 7559 to have better perf
825828 max_len = n if n >= 0 else - 1 - n
829+ dropped = self .obj .dropna (how = dropna , axis = self .axis )
826830
827- def picker (x ):
828- x = x .dropna (how = dropna ) # Note: how is ignored if Series
829- if len (x ) <= max_len :
830- return np .nan
831- else :
832- return x .iloc [n ]
831+ # get a new grouper for our dropped obj
832+ grouper , exclusions , obj = _get_grouper (dropped , key = self .keys , axis = self .axis ,
833+ level = self .level , sort = self .sort )
834+
835+ sizes = obj .groupby (grouper ).size ()
836+ result = obj .groupby (grouper ).nth (n )
837+ mask = (sizes < max_len ).values
838+
839+ # set the results which don't meet the criteria
840+ if len (result ) and mask .any ():
841+ result .loc [mask ] = np .nan
833842
834- return self .agg (picker )
843+ # reset/reindex to the original groups
844+ if len (self .obj ) == len (dropped ):
845+ result .index = self .grouper .result_index
846+ else :
847+ result = result .reindex (self .grouper .result_index )
848+
849+ return result
835850
836851 def cumcount (self , ** kwargs ):
837852 """
@@ -942,21 +957,33 @@ def tail(self, n=5):
942957 def _cumcount_array (self , arr = None , ** kwargs ):
943958 """
944959 arr is where cumcount gets it's values from
960+
961+ note: this is currently implementing sort=False (though the default is sort=True)
962+ for groupby in general
945963 """
946964 ascending = kwargs .pop ('ascending' , True )
947965
948966 if arr is None :
949967 arr = np .arange (self .grouper ._max_groupsize , dtype = 'int64' )
950968
951969 len_index = len (self ._selected_obj .index )
952- cumcounts = np .empty (len_index , dtype = arr .dtype )
970+ cumcounts = np .zeros (len_index , dtype = arr .dtype )
971+ if not len_index :
972+ return cumcounts
973+
974+ indices , values = [], []
975+ for v in self .indices .values ():
976+ indices .append (v )
977+
978+ if ascending :
979+ values .append (arr [:len (v )])
980+ else :
981+ values .append (arr [len (v )- 1 ::- 1 ])
982+
983+ indices = np .concatenate (indices )
984+ values = np .concatenate (values )
985+ cumcounts [indices ] = values
953986
954- if ascending :
955- for v in self .indices .values ():
956- cumcounts [v ] = arr [:len (v )]
957- else :
958- for v in self .indices .values ():
959- cumcounts [v ] = arr [len (v )- 1 ::- 1 ]
960987 return cumcounts
961988
962989 def _index_with_as_index (self , b ):
@@ -1270,6 +1297,7 @@ def group_info(self):
12701297 comp_ids = com ._ensure_int64 (comp_ids )
12711298 return comp_ids , obs_group_ids , ngroups
12721299
1300+
12731301 def _get_compressed_labels (self ):
12741302 all_labels = [ping .labels for ping in self .groupings ]
12751303 if self ._overflow_possible :
@@ -1892,7 +1920,6 @@ def groups(self):
18921920 self ._groups = self .index .groupby (self .grouper )
18931921 return self ._groups
18941922
1895-
18961923def _get_grouper (obj , key = None , axis = 0 , level = None , sort = True ):
18971924 """
18981925 create and return a BaseGrouper, which is an internal
@@ -2141,7 +2168,10 @@ def _wrap_aggregated_output(self, output, names=None):
21412168 if names is not None :
21422169 return DataFrame (output , index = index , columns = names )
21432170 else :
2144- return Series (output , index = index , name = self .name )
2171+ name = self .name
2172+ if name is None :
2173+ name = self ._selected_obj .name
2174+ return Series (output , index = index , name = name )
21452175
21462176 def _wrap_applied_output (self , keys , values , not_indexed_same = False ):
21472177 if len (keys ) == 0 :
0 commit comments