Skip to content

Commit d8905e4

Browse files
TST (string dtype): duplicate pandas/tests/indexes/object tests specifically for string dtypes (#60117)
1 parent 9e10119 commit d8905e4

File tree

5 files changed

+148
-91
lines changed

5 files changed

+148
-91
lines changed

pandas/tests/indexes/object/test_astype.py

-18
Original file line numberDiff line numberDiff line change
@@ -3,25 +3,7 @@
33
from pandas import (
44
Index,
55
NaT,
6-
Series,
76
)
8-
import pandas._testing as tm
9-
10-
11-
def test_astype_str_from_bytes():
12-
# https://github.com/pandas-dev/pandas/issues/38607
13-
# GH#49658 pre-2.0 Index called .values.astype(str) here, which effectively
14-
# did a .decode() on the bytes object. In 2.0 we go through
15-
# ensure_string_array which does f"{val}"
16-
idx = Index(["あ", b"a"], dtype="object")
17-
result = idx.astype(str)
18-
expected = Index(["あ", "a"], dtype="str")
19-
tm.assert_index_equal(result, expected)
20-
21-
# while we're here, check that Series.astype behaves the same
22-
result = Series(idx).astype(str)
23-
expected = Series(expected, dtype="str")
24-
tm.assert_series_equal(result, expected)
257

268

279
def test_astype_invalid_nas_to_tdt64_raises():

pandas/tests/indexes/object/test_indexing.py

+9-73
Original file line numberDiff line numberDiff line change
@@ -3,12 +3,8 @@
33
import numpy as np
44
import pytest
55

6-
from pandas._libs.missing import (
7-
NA,
8-
is_matching_na,
9-
)
6+
from pandas._libs.missing import is_matching_na
107

11-
import pandas as pd
128
from pandas import Index
139
import pandas._testing as tm
1410

@@ -23,13 +19,13 @@ class TestGetIndexer:
2319
)
2420
def test_get_indexer_strings(self, method, expected):
2521
expected = np.array(expected, dtype=np.intp)
26-
index = Index(["b", "c"])
22+
index = Index(["b", "c"], dtype=object)
2723
actual = index.get_indexer(["a", "b", "c", "d"], method=method)
2824

2925
tm.assert_numpy_array_equal(actual, expected)
3026

31-
def test_get_indexer_strings_raises(self, using_infer_string):
32-
index = Index(["b", "c"])
27+
def test_get_indexer_strings_raises(self):
28+
index = Index(["b", "c"], dtype=object)
3329

3430
msg = "|".join(
3531
[
@@ -68,13 +64,9 @@ def test_get_indexer_with_NA_values(
6864

6965

7066
class TestGetIndexerNonUnique:
71-
def test_get_indexer_non_unique_nas(
72-
self, nulls_fixture, request, using_infer_string
73-
):
67+
def test_get_indexer_non_unique_nas(self, nulls_fixture):
7468
# even though this isn't non-unique, this should still work
75-
if using_infer_string and (nulls_fixture is None or nulls_fixture is NA):
76-
request.applymarker(pytest.mark.xfail(reason="NAs are cast to NaN"))
77-
index = Index(["a", "b", nulls_fixture])
69+
index = Index(["a", "b", nulls_fixture], dtype=object)
7870
indexer, missing = index.get_indexer_non_unique([nulls_fixture])
7971

8072
expected_indexer = np.array([2], dtype=np.intp)
@@ -83,7 +75,7 @@ def test_get_indexer_non_unique_nas(
8375
tm.assert_numpy_array_equal(missing, expected_missing)
8476

8577
# actually non-unique
86-
index = Index(["a", nulls_fixture, "b", nulls_fixture])
78+
index = Index(["a", nulls_fixture, "b", nulls_fixture], dtype=object)
8779
indexer, missing = index.get_indexer_non_unique([nulls_fixture])
8880

8981
expected_indexer = np.array([1, 3], dtype=np.intp)
@@ -92,10 +84,10 @@ def test_get_indexer_non_unique_nas(
9284

9385
# matching-but-not-identical nans
9486
if is_matching_na(nulls_fixture, float("NaN")):
95-
index = Index(["a", float("NaN"), "b", float("NaN")])
87+
index = Index(["a", float("NaN"), "b", float("NaN")], dtype=object)
9688
match_but_not_identical = True
9789
elif is_matching_na(nulls_fixture, Decimal("NaN")):
98-
index = Index(["a", Decimal("NaN"), "b", Decimal("NaN")])
90+
index = Index(["a", Decimal("NaN"), "b", Decimal("NaN")], dtype=object)
9991
match_but_not_identical = True
10092
else:
10193
match_but_not_identical = False
@@ -156,59 +148,3 @@ def test_get_indexer_non_unique_np_nats(self, np_nat_fixture, np_nat_fixture2):
156148
expected_indexer = np.array([1, 3], dtype=np.intp)
157149
tm.assert_numpy_array_equal(indexer, expected_indexer)
158150
tm.assert_numpy_array_equal(missing, expected_missing)
159-
160-
161-
class TestSliceLocs:
162-
@pytest.mark.parametrize(
163-
"in_slice,expected",
164-
[
165-
# error: Slice index must be an integer or None
166-
(pd.IndexSlice[::-1], "yxdcb"),
167-
(pd.IndexSlice["b":"y":-1], ""), # type: ignore[misc]
168-
(pd.IndexSlice["b"::-1], "b"), # type: ignore[misc]
169-
(pd.IndexSlice[:"b":-1], "yxdcb"), # type: ignore[misc]
170-
(pd.IndexSlice[:"y":-1], "y"), # type: ignore[misc]
171-
(pd.IndexSlice["y"::-1], "yxdcb"), # type: ignore[misc]
172-
(pd.IndexSlice["y"::-4], "yb"), # type: ignore[misc]
173-
# absent labels
174-
(pd.IndexSlice[:"a":-1], "yxdcb"), # type: ignore[misc]
175-
(pd.IndexSlice[:"a":-2], "ydb"), # type: ignore[misc]
176-
(pd.IndexSlice["z"::-1], "yxdcb"), # type: ignore[misc]
177-
(pd.IndexSlice["z"::-3], "yc"), # type: ignore[misc]
178-
(pd.IndexSlice["m"::-1], "dcb"), # type: ignore[misc]
179-
(pd.IndexSlice[:"m":-1], "yx"), # type: ignore[misc]
180-
(pd.IndexSlice["a":"a":-1], ""), # type: ignore[misc]
181-
(pd.IndexSlice["z":"z":-1], ""), # type: ignore[misc]
182-
(pd.IndexSlice["m":"m":-1], ""), # type: ignore[misc]
183-
],
184-
)
185-
def test_slice_locs_negative_step(self, in_slice, expected, any_string_dtype):
186-
index = Index(list("bcdxy"), dtype=any_string_dtype)
187-
188-
s_start, s_stop = index.slice_locs(in_slice.start, in_slice.stop, in_slice.step)
189-
result = index[s_start : s_stop : in_slice.step]
190-
expected = Index(list(expected), dtype=any_string_dtype)
191-
tm.assert_index_equal(result, expected)
192-
193-
def test_slice_locs_negative_step_oob(self, any_string_dtype):
194-
index = Index(list("bcdxy"), dtype=any_string_dtype)
195-
196-
result = index[-10:5:1]
197-
tm.assert_index_equal(result, index)
198-
199-
result = index[4:-10:-1]
200-
expected = Index(list("yxdcb"), dtype=any_string_dtype)
201-
tm.assert_index_equal(result, expected)
202-
203-
def test_slice_locs_dup(self):
204-
index = Index(["a", "a", "b", "c", "d", "d"])
205-
assert index.slice_locs("a", "d") == (0, 6)
206-
assert index.slice_locs(end="d") == (0, 6)
207-
assert index.slice_locs("a", "c") == (0, 4)
208-
assert index.slice_locs("b", "d") == (2, 6)
209-
210-
index2 = index[::-1]
211-
assert index2.slice_locs("d", "a") == (0, 6)
212-
assert index2.slice_locs(end="a") == (0, 6)
213-
assert index2.slice_locs("d", "b") == (0, 4)
214-
assert index2.slice_locs("c", "a") == (2, 6)

pandas/tests/indexes/string/__init__.py

Whitespace-only changes.
+21
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,21 @@
1+
from pandas import (
2+
Index,
3+
Series,
4+
)
5+
import pandas._testing as tm
6+
7+
8+
def test_astype_str_from_bytes():
9+
# https://github.com/pandas-dev/pandas/issues/38607
10+
# GH#49658 pre-2.0 Index called .values.astype(str) here, which effectively
11+
# did a .decode() on the bytes object. In 2.0 we go through
12+
# ensure_string_array which does f"{val}"
13+
idx = Index(["あ", b"a"], dtype="object")
14+
result = idx.astype(str)
15+
expected = Index(["あ", "a"], dtype="str")
16+
tm.assert_index_equal(result, expected)
17+
18+
# while we're here, check that Series.astype behaves the same
19+
result = Series(idx).astype(str)
20+
expected = Series(expected, dtype="str")
21+
tm.assert_series_equal(result, expected)
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,118 @@
1+
import numpy as np
2+
import pytest
3+
4+
import pandas as pd
5+
from pandas import Index
6+
import pandas._testing as tm
7+
8+
9+
class TestGetIndexer:
10+
@pytest.mark.parametrize(
11+
"method,expected",
12+
[
13+
("pad", [-1, 0, 1, 1]),
14+
("backfill", [0, 0, 1, -1]),
15+
],
16+
)
17+
def test_get_indexer_strings(self, any_string_dtype, method, expected):
18+
expected = np.array(expected, dtype=np.intp)
19+
index = Index(["b", "c"], dtype=any_string_dtype)
20+
actual = index.get_indexer(["a", "b", "c", "d"], method=method)
21+
22+
tm.assert_numpy_array_equal(actual, expected)
23+
24+
def test_get_indexer_strings_raises(self, any_string_dtype):
25+
index = Index(["b", "c"], dtype=any_string_dtype)
26+
27+
msg = "|".join(
28+
[
29+
"operation 'sub' not supported for dtype 'str",
30+
r"unsupported operand type\(s\) for -: 'str' and 'str'",
31+
]
32+
)
33+
with pytest.raises(TypeError, match=msg):
34+
index.get_indexer(["a", "b", "c", "d"], method="nearest")
35+
36+
with pytest.raises(TypeError, match=msg):
37+
index.get_indexer(["a", "b", "c", "d"], method="pad", tolerance=2)
38+
39+
with pytest.raises(TypeError, match=msg):
40+
index.get_indexer(
41+
["a", "b", "c", "d"], method="pad", tolerance=[2, 2, 2, 2]
42+
)
43+
44+
45+
class TestGetIndexerNonUnique:
46+
@pytest.mark.xfail(reason="TODO(infer_string)", strict=False)
47+
def test_get_indexer_non_unique_nas(self, any_string_dtype, nulls_fixture):
48+
index = Index(["a", "b", None], dtype=any_string_dtype)
49+
indexer, missing = index.get_indexer_non_unique([nulls_fixture])
50+
51+
expected_indexer = np.array([2], dtype=np.intp)
52+
expected_missing = np.array([], dtype=np.intp)
53+
tm.assert_numpy_array_equal(indexer, expected_indexer)
54+
tm.assert_numpy_array_equal(missing, expected_missing)
55+
56+
# actually non-unique
57+
index = Index(["a", None, "b", None], dtype=any_string_dtype)
58+
indexer, missing = index.get_indexer_non_unique([nulls_fixture])
59+
60+
expected_indexer = np.array([1, 3], dtype=np.intp)
61+
tm.assert_numpy_array_equal(indexer, expected_indexer)
62+
tm.assert_numpy_array_equal(missing, expected_missing)
63+
64+
65+
class TestSliceLocs:
66+
@pytest.mark.parametrize(
67+
"in_slice,expected",
68+
[
69+
# error: Slice index must be an integer or None
70+
(pd.IndexSlice[::-1], "yxdcb"),
71+
(pd.IndexSlice["b":"y":-1], ""), # type: ignore[misc]
72+
(pd.IndexSlice["b"::-1], "b"), # type: ignore[misc]
73+
(pd.IndexSlice[:"b":-1], "yxdcb"), # type: ignore[misc]
74+
(pd.IndexSlice[:"y":-1], "y"), # type: ignore[misc]
75+
(pd.IndexSlice["y"::-1], "yxdcb"), # type: ignore[misc]
76+
(pd.IndexSlice["y"::-4], "yb"), # type: ignore[misc]
77+
# absent labels
78+
(pd.IndexSlice[:"a":-1], "yxdcb"), # type: ignore[misc]
79+
(pd.IndexSlice[:"a":-2], "ydb"), # type: ignore[misc]
80+
(pd.IndexSlice["z"::-1], "yxdcb"), # type: ignore[misc]
81+
(pd.IndexSlice["z"::-3], "yc"), # type: ignore[misc]
82+
(pd.IndexSlice["m"::-1], "dcb"), # type: ignore[misc]
83+
(pd.IndexSlice[:"m":-1], "yx"), # type: ignore[misc]
84+
(pd.IndexSlice["a":"a":-1], ""), # type: ignore[misc]
85+
(pd.IndexSlice["z":"z":-1], ""), # type: ignore[misc]
86+
(pd.IndexSlice["m":"m":-1], ""), # type: ignore[misc]
87+
],
88+
)
89+
def test_slice_locs_negative_step(self, in_slice, expected, any_string_dtype):
90+
index = Index(list("bcdxy"), dtype=any_string_dtype)
91+
92+
s_start, s_stop = index.slice_locs(in_slice.start, in_slice.stop, in_slice.step)
93+
result = index[s_start : s_stop : in_slice.step]
94+
expected = Index(list(expected), dtype=any_string_dtype)
95+
tm.assert_index_equal(result, expected)
96+
97+
def test_slice_locs_negative_step_oob(self, any_string_dtype):
98+
index = Index(list("bcdxy"), dtype=any_string_dtype)
99+
100+
result = index[-10:5:1]
101+
tm.assert_index_equal(result, index)
102+
103+
result = index[4:-10:-1]
104+
expected = Index(list("yxdcb"), dtype=any_string_dtype)
105+
tm.assert_index_equal(result, expected)
106+
107+
def test_slice_locs_dup(self, any_string_dtype):
108+
index = Index(["a", "a", "b", "c", "d", "d"], dtype=any_string_dtype)
109+
assert index.slice_locs("a", "d") == (0, 6)
110+
assert index.slice_locs(end="d") == (0, 6)
111+
assert index.slice_locs("a", "c") == (0, 4)
112+
assert index.slice_locs("b", "d") == (2, 6)
113+
114+
index2 = index[::-1]
115+
assert index2.slice_locs("d", "a") == (0, 6)
116+
assert index2.slice_locs(end="a") == (0, 6)
117+
assert index2.slice_locs("d", "b") == (0, 4)
118+
assert index2.slice_locs("c", "a") == (2, 6)

0 commit comments

Comments
 (0)