Skip to content

Commit 786c28f

Browse files
authored
TST: Filter/test pyarrow PerformanceWarnings (#48093)
1 parent 8c2d327 commit 786c28f

File tree

3 files changed

+185
-11
lines changed

3 files changed

+185
-11
lines changed

pandas/tests/base/test_value_counts.py

+33-6
Original file line numberDiff line numberDiff line change
@@ -4,6 +4,9 @@
44
import numpy as np
55
import pytest
66

7+
from pandas.compat import pa_version_under7p0
8+
from pandas.errors import PerformanceWarning
9+
710
import pandas as pd
811
from pandas import (
912
DatetimeIndex,
@@ -36,8 +39,16 @@ def test_value_counts(index_or_series_obj):
3639
# TODO(GH#32514): Order of entries with the same count is inconsistent
3740
# on CI (gh-32449)
3841
if obj.duplicated().any():
39-
result = result.sort_index()
40-
expected = expected.sort_index()
42+
with tm.maybe_produces_warning(
43+
PerformanceWarning,
44+
pa_version_under7p0 and getattr(obj.dtype, "storage", "") == "pyarrow",
45+
):
46+
result = result.sort_index()
47+
with tm.maybe_produces_warning(
48+
PerformanceWarning,
49+
pa_version_under7p0 and getattr(obj.dtype, "storage", "") == "pyarrow",
50+
):
51+
expected = expected.sort_index()
4152
tm.assert_series_equal(result, expected)
4253

4354

@@ -70,8 +81,16 @@ def test_value_counts_null(null_obj, index_or_series_obj):
7081
if obj.duplicated().any():
7182
# TODO(GH#32514):
7283
# Order of entries with the same count is inconsistent on CI (gh-32449)
73-
expected = expected.sort_index()
74-
result = result.sort_index()
84+
with tm.maybe_produces_warning(
85+
PerformanceWarning,
86+
pa_version_under7p0 and getattr(obj.dtype, "storage", "") == "pyarrow",
87+
):
88+
expected = expected.sort_index()
89+
with tm.maybe_produces_warning(
90+
PerformanceWarning,
91+
pa_version_under7p0 and getattr(obj.dtype, "storage", "") == "pyarrow",
92+
):
93+
result = result.sort_index()
7594

7695
if not isinstance(result.dtype, np.dtype):
7796
# i.e IntegerDtype
@@ -84,8 +103,16 @@ def test_value_counts_null(null_obj, index_or_series_obj):
84103
if obj.duplicated().any():
85104
# TODO(GH#32514):
86105
# Order of entries with the same count is inconsistent on CI (gh-32449)
87-
expected = expected.sort_index()
88-
result = result.sort_index()
106+
with tm.maybe_produces_warning(
107+
PerformanceWarning,
108+
pa_version_under7p0 and getattr(obj.dtype, "storage", "") == "pyarrow",
109+
):
110+
expected = expected.sort_index()
111+
with tm.maybe_produces_warning(
112+
PerformanceWarning,
113+
pa_version_under7p0 and getattr(obj.dtype, "storage", "") == "pyarrow",
114+
):
115+
result = result.sort_index()
89116
tm.assert_series_equal(result, expected)
90117

91118

pandas/tests/extension/test_string.py

+138-3
Original file line numberDiff line numberDiff line change
@@ -18,7 +18,10 @@
1818
import numpy as np
1919
import pytest
2020

21-
from pandas.compat import pa_version_under6p0
21+
from pandas.compat import (
22+
pa_version_under6p0,
23+
pa_version_under7p0,
24+
)
2225
from pandas.errors import PerformanceWarning
2326

2427
import pandas as pd
@@ -167,6 +170,22 @@ def test_reduce_series_numeric(self, data, all_numeric_reductions, skipna):
167170

168171

169172
class TestMethods(base.BaseMethodsTests):
173+
def test_argsort(self, data_for_sorting):
174+
with tm.maybe_produces_warning(
175+
PerformanceWarning,
176+
pa_version_under7p0
177+
and getattr(data_for_sorting.dtype, "storage", "") == "pyarrow",
178+
):
179+
super().test_argsort(data_for_sorting)
180+
181+
def test_argsort_missing(self, data_missing_for_sorting):
182+
with tm.maybe_produces_warning(
183+
PerformanceWarning,
184+
pa_version_under7p0
185+
and getattr(data_missing_for_sorting.dtype, "storage", "") == "pyarrow",
186+
):
187+
super().test_argsort_missing(data_missing_for_sorting)
188+
170189
def test_argmin_argmax(
171190
self, data_for_sorting, data_missing_for_sorting, na_value, request
172191
):
@@ -210,6 +229,89 @@ def test_argreduce_series(
210229
data_missing_for_sorting, op_name, skipna, expected
211230
)
212231

232+
@pytest.mark.parametrize("dropna", [True, False])
233+
def test_value_counts(self, all_data, dropna, request):
234+
all_data = all_data[:10]
235+
if dropna:
236+
other = all_data[~all_data.isna()]
237+
else:
238+
other = all_data
239+
with tm.maybe_produces_warning(
240+
PerformanceWarning,
241+
pa_version_under7p0
242+
and getattr(all_data.dtype, "storage", "") == "pyarrow"
243+
and not (dropna and "data_missing" in request.node.nodeid),
244+
):
245+
result = pd.Series(all_data).value_counts(dropna=dropna).sort_index()
246+
with tm.maybe_produces_warning(
247+
PerformanceWarning,
248+
pa_version_under7p0
249+
and getattr(other.dtype, "storage", "") == "pyarrow"
250+
and not (dropna and "data_missing" in request.node.nodeid),
251+
):
252+
expected = pd.Series(other).value_counts(dropna=dropna).sort_index()
253+
254+
self.assert_series_equal(result, expected)
255+
256+
@pytest.mark.filterwarnings("ignore:Falling back:pandas.errors.PerformanceWarning")
257+
def test_value_counts_with_normalize(self, data):
258+
super().test_value_counts_with_normalize(data)
259+
260+
def test_argsort_missing_array(self, data_missing_for_sorting):
261+
with tm.maybe_produces_warning(
262+
PerformanceWarning,
263+
pa_version_under7p0
264+
and getattr(data_missing_for_sorting.dtype, "storage", "") == "pyarrow",
265+
):
266+
super().test_argsort_missing(data_missing_for_sorting)
267+
268+
@pytest.mark.parametrize(
269+
"na_position, expected",
270+
[
271+
("last", np.array([2, 0, 1], dtype=np.dtype("intp"))),
272+
("first", np.array([1, 2, 0], dtype=np.dtype("intp"))),
273+
],
274+
)
275+
def test_nargsort(self, data_missing_for_sorting, na_position, expected):
276+
# GH 25439
277+
with tm.maybe_produces_warning(
278+
PerformanceWarning,
279+
pa_version_under7p0
280+
and getattr(data_missing_for_sorting.dtype, "storage", "") == "pyarrow",
281+
):
282+
super().test_nargsort(data_missing_for_sorting, na_position, expected)
283+
284+
@pytest.mark.parametrize("ascending", [True, False])
285+
def test_sort_values(self, data_for_sorting, ascending, sort_by_key):
286+
with tm.maybe_produces_warning(
287+
PerformanceWarning,
288+
pa_version_under7p0
289+
and getattr(data_for_sorting.dtype, "storage", "") == "pyarrow",
290+
):
291+
super().test_sort_values(data_for_sorting, ascending, sort_by_key)
292+
293+
@pytest.mark.parametrize("ascending", [True, False])
294+
def test_sort_values_missing(
295+
self, data_missing_for_sorting, ascending, sort_by_key
296+
):
297+
with tm.maybe_produces_warning(
298+
PerformanceWarning,
299+
pa_version_under7p0
300+
and getattr(data_missing_for_sorting.dtype, "storage", "") == "pyarrow",
301+
):
302+
super().test_sort_values_missing(
303+
data_missing_for_sorting, ascending, sort_by_key
304+
)
305+
306+
@pytest.mark.parametrize("ascending", [True, False])
307+
def test_sort_values_frame(self, data_for_sorting, ascending):
308+
with tm.maybe_produces_warning(
309+
PerformanceWarning,
310+
pa_version_under7p0
311+
and getattr(data_for_sorting.dtype, "storage", "") == "pyarrow",
312+
):
313+
super().test_sort_values_frame(data_for_sorting, ascending)
314+
213315

214316
class TestCasting(base.BaseCastingTests):
215317
pass
@@ -236,8 +338,41 @@ class TestPrinting(base.BasePrintingTests):
236338

237339

238340
class TestGroupBy(base.BaseGroupbyTests):
239-
def test_groupby_extension_transform(self, data_for_grouping, request):
240-
super().test_groupby_extension_transform(data_for_grouping)
341+
@pytest.mark.parametrize("as_index", [True, False])
342+
def test_groupby_extension_agg(self, as_index, data_for_grouping):
343+
df = pd.DataFrame({"A": [1, 1, 2, 2, 3, 3, 1, 4], "B": data_for_grouping})
344+
with tm.maybe_produces_warning(
345+
PerformanceWarning,
346+
pa_version_under7p0
347+
and getattr(data_for_grouping.dtype, "storage", "") == "pyarrow",
348+
):
349+
result = df.groupby("B", as_index=as_index).A.mean()
350+
with tm.maybe_produces_warning(
351+
PerformanceWarning,
352+
pa_version_under7p0
353+
and getattr(data_for_grouping.dtype, "storage", "") == "pyarrow",
354+
):
355+
_, uniques = pd.factorize(data_for_grouping, sort=True)
356+
357+
if as_index:
358+
index = pd.Index._with_infer(uniques, name="B")
359+
expected = pd.Series([3.0, 1.0, 4.0], index=index, name="A")
360+
self.assert_series_equal(result, expected)
361+
else:
362+
expected = pd.DataFrame({"B": uniques, "A": [3.0, 1.0, 4.0]})
363+
self.assert_frame_equal(result, expected)
364+
365+
def test_groupby_extension_transform(self, data_for_grouping):
366+
with tm.maybe_produces_warning(
367+
PerformanceWarning,
368+
pa_version_under7p0
369+
and getattr(data_for_grouping.dtype, "storage", "") == "pyarrow",
370+
):
371+
super().test_groupby_extension_transform(data_for_grouping)
372+
373+
@pytest.mark.filterwarnings("ignore:Falling back:pandas.errors.PerformanceWarning")
374+
def test_groupby_extension_apply(self, data_for_grouping, groupby_apply_op):
375+
super().test_groupby_extension_apply(data_for_grouping, groupby_apply_op)
241376

242377

243378
class Test2DCompat(base.Dim2CompatTests):

pandas/tests/test_algos.py

+14-2
Original file line numberDiff line numberDiff line change
@@ -9,6 +9,8 @@
99
algos as libalgos,
1010
hashtable as ht,
1111
)
12+
from pandas.compat import pa_version_under7p0
13+
from pandas.errors import PerformanceWarning
1214
import pandas.util._test_decorators as td
1315

1416
from pandas.core.dtypes.common import (
@@ -50,7 +52,13 @@ class TestFactorize:
5052
@pytest.mark.parametrize("sort", [True, False])
5153
def test_factorize(self, index_or_series_obj, sort):
5254
obj = index_or_series_obj
53-
result_codes, result_uniques = obj.factorize(sort=sort)
55+
with tm.maybe_produces_warning(
56+
PerformanceWarning,
57+
sort
58+
and pa_version_under7p0
59+
and getattr(obj.dtype, "storage", "") == "pyarrow",
60+
):
61+
result_codes, result_uniques = obj.factorize(sort=sort)
5462

5563
constructor = Index
5664
if isinstance(obj, MultiIndex):
@@ -64,7 +72,11 @@ def test_factorize(self, index_or_series_obj, sort):
6472
expected_uniques = expected_uniques.astype(object)
6573

6674
if sort:
67-
expected_uniques = expected_uniques.sort_values()
75+
with tm.maybe_produces_warning(
76+
PerformanceWarning,
77+
pa_version_under7p0 and getattr(obj.dtype, "storage", "") == "pyarrow",
78+
):
79+
expected_uniques = expected_uniques.sort_values()
6880

6981
# construct an integer ndarray so that
7082
# `expected_uniques.take(expected_codes)` is equal to `obj`

0 commit comments

Comments
 (0)