18
18
import numpy as np
19
19
import pytest
20
20
21
- from pandas .compat import pa_version_under6p0
21
+ from pandas .compat import (
22
+ pa_version_under6p0 ,
23
+ pa_version_under7p0 ,
24
+ )
22
25
from pandas .errors import PerformanceWarning
23
26
24
27
import pandas as pd
@@ -167,6 +170,22 @@ def test_reduce_series_numeric(self, data, all_numeric_reductions, skipna):
167
170
168
171
169
172
class TestMethods (base .BaseMethodsTests ):
173
+ def test_argsort (self , data_for_sorting ):
174
+ with tm .maybe_produces_warning (
175
+ PerformanceWarning ,
176
+ pa_version_under7p0
177
+ and getattr (data_for_sorting .dtype , "storage" , "" ) == "pyarrow" ,
178
+ ):
179
+ super ().test_argsort (data_for_sorting )
180
+
181
+ def test_argsort_missing (self , data_missing_for_sorting ):
182
+ with tm .maybe_produces_warning (
183
+ PerformanceWarning ,
184
+ pa_version_under7p0
185
+ and getattr (data_missing_for_sorting .dtype , "storage" , "" ) == "pyarrow" ,
186
+ ):
187
+ super ().test_argsort_missing (data_missing_for_sorting )
188
+
170
189
def test_argmin_argmax (
171
190
self , data_for_sorting , data_missing_for_sorting , na_value , request
172
191
):
@@ -210,6 +229,89 @@ def test_argreduce_series(
210
229
data_missing_for_sorting , op_name , skipna , expected
211
230
)
212
231
232
+ @pytest .mark .parametrize ("dropna" , [True , False ])
233
+ def test_value_counts (self , all_data , dropna , request ):
234
+ all_data = all_data [:10 ]
235
+ if dropna :
236
+ other = all_data [~ all_data .isna ()]
237
+ else :
238
+ other = all_data
239
+ with tm .maybe_produces_warning (
240
+ PerformanceWarning ,
241
+ pa_version_under7p0
242
+ and getattr (all_data .dtype , "storage" , "" ) == "pyarrow"
243
+ and not (dropna and "data_missing" in request .node .nodeid ),
244
+ ):
245
+ result = pd .Series (all_data ).value_counts (dropna = dropna ).sort_index ()
246
+ with tm .maybe_produces_warning (
247
+ PerformanceWarning ,
248
+ pa_version_under7p0
249
+ and getattr (other .dtype , "storage" , "" ) == "pyarrow"
250
+ and not (dropna and "data_missing" in request .node .nodeid ),
251
+ ):
252
+ expected = pd .Series (other ).value_counts (dropna = dropna ).sort_index ()
253
+
254
+ self .assert_series_equal (result , expected )
255
+
256
+ @pytest .mark .filterwarnings ("ignore:Falling back:pandas.errors.PerformanceWarning" )
257
+ def test_value_counts_with_normalize (self , data ):
258
+ super ().test_value_counts_with_normalize (data )
259
+
260
+ def test_argsort_missing_array (self , data_missing_for_sorting ):
261
+ with tm .maybe_produces_warning (
262
+ PerformanceWarning ,
263
+ pa_version_under7p0
264
+ and getattr (data_missing_for_sorting .dtype , "storage" , "" ) == "pyarrow" ,
265
+ ):
266
+ super ().test_argsort_missing (data_missing_for_sorting )
267
+
268
+ @pytest .mark .parametrize (
269
+ "na_position, expected" ,
270
+ [
271
+ ("last" , np .array ([2 , 0 , 1 ], dtype = np .dtype ("intp" ))),
272
+ ("first" , np .array ([1 , 2 , 0 ], dtype = np .dtype ("intp" ))),
273
+ ],
274
+ )
275
+ def test_nargsort (self , data_missing_for_sorting , na_position , expected ):
276
+ # GH 25439
277
+ with tm .maybe_produces_warning (
278
+ PerformanceWarning ,
279
+ pa_version_under7p0
280
+ and getattr (data_missing_for_sorting .dtype , "storage" , "" ) == "pyarrow" ,
281
+ ):
282
+ super ().test_nargsort (data_missing_for_sorting , na_position , expected )
283
+
284
+ @pytest .mark .parametrize ("ascending" , [True , False ])
285
+ def test_sort_values (self , data_for_sorting , ascending , sort_by_key ):
286
+ with tm .maybe_produces_warning (
287
+ PerformanceWarning ,
288
+ pa_version_under7p0
289
+ and getattr (data_for_sorting .dtype , "storage" , "" ) == "pyarrow" ,
290
+ ):
291
+ super ().test_sort_values (data_for_sorting , ascending , sort_by_key )
292
+
293
+ @pytest .mark .parametrize ("ascending" , [True , False ])
294
+ def test_sort_values_missing (
295
+ self , data_missing_for_sorting , ascending , sort_by_key
296
+ ):
297
+ with tm .maybe_produces_warning (
298
+ PerformanceWarning ,
299
+ pa_version_under7p0
300
+ and getattr (data_missing_for_sorting .dtype , "storage" , "" ) == "pyarrow" ,
301
+ ):
302
+ super ().test_sort_values_missing (
303
+ data_missing_for_sorting , ascending , sort_by_key
304
+ )
305
+
306
+ @pytest .mark .parametrize ("ascending" , [True , False ])
307
+ def test_sort_values_frame (self , data_for_sorting , ascending ):
308
+ with tm .maybe_produces_warning (
309
+ PerformanceWarning ,
310
+ pa_version_under7p0
311
+ and getattr (data_for_sorting .dtype , "storage" , "" ) == "pyarrow" ,
312
+ ):
313
+ super ().test_sort_values_frame (data_for_sorting , ascending )
314
+
213
315
214
316
class TestCasting (base .BaseCastingTests ):
215
317
pass
@@ -236,8 +338,41 @@ class TestPrinting(base.BasePrintingTests):
236
338
237
339
238
340
class TestGroupBy (base .BaseGroupbyTests ):
239
- def test_groupby_extension_transform (self , data_for_grouping , request ):
240
- super ().test_groupby_extension_transform (data_for_grouping )
341
+ @pytest .mark .parametrize ("as_index" , [True , False ])
342
+ def test_groupby_extension_agg (self , as_index , data_for_grouping ):
343
+ df = pd .DataFrame ({"A" : [1 , 1 , 2 , 2 , 3 , 3 , 1 , 4 ], "B" : data_for_grouping })
344
+ with tm .maybe_produces_warning (
345
+ PerformanceWarning ,
346
+ pa_version_under7p0
347
+ and getattr (data_for_grouping .dtype , "storage" , "" ) == "pyarrow" ,
348
+ ):
349
+ result = df .groupby ("B" , as_index = as_index ).A .mean ()
350
+ with tm .maybe_produces_warning (
351
+ PerformanceWarning ,
352
+ pa_version_under7p0
353
+ and getattr (data_for_grouping .dtype , "storage" , "" ) == "pyarrow" ,
354
+ ):
355
+ _ , uniques = pd .factorize (data_for_grouping , sort = True )
356
+
357
+ if as_index :
358
+ index = pd .Index ._with_infer (uniques , name = "B" )
359
+ expected = pd .Series ([3.0 , 1.0 , 4.0 ], index = index , name = "A" )
360
+ self .assert_series_equal (result , expected )
361
+ else :
362
+ expected = pd .DataFrame ({"B" : uniques , "A" : [3.0 , 1.0 , 4.0 ]})
363
+ self .assert_frame_equal (result , expected )
364
+
365
+ def test_groupby_extension_transform (self , data_for_grouping ):
366
+ with tm .maybe_produces_warning (
367
+ PerformanceWarning ,
368
+ pa_version_under7p0
369
+ and getattr (data_for_grouping .dtype , "storage" , "" ) == "pyarrow" ,
370
+ ):
371
+ super ().test_groupby_extension_transform (data_for_grouping )
372
+
373
+ @pytest .mark .filterwarnings ("ignore:Falling back:pandas.errors.PerformanceWarning" )
374
+ def test_groupby_extension_apply (self , data_for_grouping , groupby_apply_op ):
375
+ super ().test_groupby_extension_apply (data_for_grouping , groupby_apply_op )
241
376
242
377
243
378
class Test2DCompat (base .Dim2CompatTests ):
0 commit comments