pandas-dev · jorisvandenbossche · Oct 30, 2024 · Oct 28, 2024 · Oct 28, 2024 · jorisvandenbossche
diff --git a/pandas/tests/indexes/object/test_astype.py b/pandas/tests/indexes/object/test_astype.py
@@ -3,25 +3,7 @@
 from pandas import (
     Index,
     NaT,
-    Series,
 )
-import pandas._testing as tm
-
-
-def test_astype_str_from_bytes():
-    # https://github.com/pandas-dev/pandas/issues/38607
-    # GH#49658 pre-2.0 Index called .values.astype(str) here, which effectively
-    #  did a .decode() on the bytes object.  In 2.0 we go through
-    #  ensure_string_array which does f"{val}"
-    idx = Index(["あ", b"a"], dtype="object")
-    result = idx.astype(str)
-    expected = Index(["あ", "a"], dtype="str")
-    tm.assert_index_equal(result, expected)
-
-    # while we're here, check that Series.astype behaves the same
-    result = Series(idx).astype(str)
-    expected = Series(expected, dtype="str")
-    tm.assert_series_equal(result, expected)
 
 
 def test_astype_invalid_nas_to_tdt64_raises():

diff --git a/pandas/tests/indexes/object/test_indexing.py b/pandas/tests/indexes/object/test_indexing.py
@@ -3,12 +3,8 @@
 import numpy as np
 import pytest
 
-from pandas._libs.missing import (
-    NA,
-    is_matching_na,
-)
+from pandas._libs.missing import is_matching_na
 
-import pandas as pd
 from pandas import Index
 import pandas._testing as tm
 
@@ -23,13 +19,13 @@ class TestGetIndexer:
     )
     def test_get_indexer_strings(self, method, expected):
         expected = np.array(expected, dtype=np.intp)
-        index = Index(["b", "c"])
+        index = Index(["b", "c"], dtype=object)
         actual = index.get_indexer(["a", "b", "c", "d"], method=method)
 
         tm.assert_numpy_array_equal(actual, expected)
 
-    def test_get_indexer_strings_raises(self, using_infer_string):
-        index = Index(["b", "c"])
+    def test_get_indexer_strings_raises(self):
+        index = Index(["b", "c"], dtype=object)
 
         msg = "|".join(
             [
@@ -68,13 +64,9 @@ def test_get_indexer_with_NA_values(
 
 
 class TestGetIndexerNonUnique:
-    def test_get_indexer_non_unique_nas(
-        self, nulls_fixture, request, using_infer_string
-    ):
+    def test_get_indexer_non_unique_nas(self, nulls_fixture):
         # even though this isn't non-unique, this should still work
-        if using_infer_string and (nulls_fixture is None or nulls_fixture is NA):
-            request.applymarker(pytest.mark.xfail(reason="NAs are cast to NaN"))
-        index = Index(["a", "b", nulls_fixture])
+        index = Index(["a", "b", nulls_fixture], dtype=object)
         indexer, missing = index.get_indexer_non_unique([nulls_fixture])
 
         expected_indexer = np.array([2], dtype=np.intp)
@@ -83,7 +75,7 @@ def test_get_indexer_non_unique_nas(
         tm.assert_numpy_array_equal(missing, expected_missing)
 
         # actually non-unique
-        index = Index(["a", nulls_fixture, "b", nulls_fixture])
+        index = Index(["a", nulls_fixture, "b", nulls_fixture], dtype=object)
         indexer, missing = index.get_indexer_non_unique([nulls_fixture])
 
         expected_indexer = np.array([1, 3], dtype=np.intp)
@@ -92,10 +84,10 @@ def test_get_indexer_non_unique_nas(
 
         # matching-but-not-identical nans
         if is_matching_na(nulls_fixture, float("NaN")):
-            index = Index(["a", float("NaN"), "b", float("NaN")])
+            index = Index(["a", float("NaN"), "b", float("NaN")], dtype=object)
             match_but_not_identical = True
         elif is_matching_na(nulls_fixture, Decimal("NaN")):
-            index = Index(["a", Decimal("NaN"), "b", Decimal("NaN")])
+            index = Index(["a", Decimal("NaN"), "b", Decimal("NaN")], dtype=object)
             match_but_not_identical = True
         else:
             match_but_not_identical = False
@@ -156,59 +148,3 @@ def test_get_indexer_non_unique_np_nats(self, np_nat_fixture, np_nat_fixture2):
             expected_indexer = np.array([1, 3], dtype=np.intp)
             tm.assert_numpy_array_equal(indexer, expected_indexer)
             tm.assert_numpy_array_equal(missing, expected_missing)
-
-
-class TestSliceLocs:
-    @pytest.mark.parametrize(
-        "in_slice,expected",
-        [
-            # error: Slice index must be an integer or None
-            (pd.IndexSlice[::-1], "yxdcb"),
-            (pd.IndexSlice["b":"y":-1], ""),  # type: ignore[misc]
-            (pd.IndexSlice["b"::-1], "b"),  # type: ignore[misc]
-            (pd.IndexSlice[:"b":-1], "yxdcb"),  # type: ignore[misc]
-            (pd.IndexSlice[:"y":-1], "y"),  # type: ignore[misc]
-            (pd.IndexSlice["y"::-1], "yxdcb"),  # type: ignore[misc]
-            (pd.IndexSlice["y"::-4], "yb"),  # type: ignore[misc]
-            # absent labels
-            (pd.IndexSlice[:"a":-1], "yxdcb"),  # type: ignore[misc]
-            (pd.IndexSlice[:"a":-2], "ydb"),  # type: ignore[misc]
-            (pd.IndexSlice["z"::-1], "yxdcb"),  # type: ignore[misc]
-            (pd.IndexSlice["z"::-3], "yc"),  # type: ignore[misc]
-            (pd.IndexSlice["m"::-1], "dcb"),  # type: ignore[misc]
-            (pd.IndexSlice[:"m":-1], "yx"),  # type: ignore[misc]
-            (pd.IndexSlice["a":"a":-1], ""),  # type: ignore[misc]
-            (pd.IndexSlice["z":"z":-1], ""),  # type: ignore[misc]
-            (pd.IndexSlice["m":"m":-1], ""),  # type: ignore[misc]
-        ],
-    )
-    def test_slice_locs_negative_step(self, in_slice, expected, any_string_dtype):
-        index = Index(list("bcdxy"), dtype=any_string_dtype)
-
-        s_start, s_stop = index.slice_locs(in_slice.start, in_slice.stop, in_slice.step)
-        result = index[s_start : s_stop : in_slice.step]
-        expected = Index(list(expected), dtype=any_string_dtype)
-        tm.assert_index_equal(result, expected)
-
-    def test_slice_locs_negative_step_oob(self, any_string_dtype):
-        index = Index(list("bcdxy"), dtype=any_string_dtype)
-
-        result = index[-10:5:1]
-        tm.assert_index_equal(result, index)
-
-        result = index[4:-10:-1]
-        expected = Index(list("yxdcb"), dtype=any_string_dtype)
-        tm.assert_index_equal(result, expected)
-
-    def test_slice_locs_dup(self):
-        index = Index(["a", "a", "b", "c", "d", "d"])
-        assert index.slice_locs("a", "d") == (0, 6)
-        assert index.slice_locs(end="d") == (0, 6)
-        assert index.slice_locs("a", "c") == (0, 4)
-        assert index.slice_locs("b", "d") == (2, 6)
-
-        index2 = index[::-1]
-        assert index2.slice_locs("d", "a") == (0, 6)
-        assert index2.slice_locs(end="a") == (0, 6)
-        assert index2.slice_locs("d", "b") == (0, 4)
-        assert index2.slice_locs("c", "a") == (2, 6)
diff --git a/pandas/tests/indexes/string/__init__.py b/pandas/tests/indexes/string/__init__.py
diff --git a/pandas/tests/indexes/string/test_astype.py b/pandas/tests/indexes/string/test_astype.py
@@ -0,0 +1,21 @@
+from pandas import (
+    Index,
+    Series,
+)
+import pandas._testing as tm
+
+
+def test_astype_str_from_bytes():
+    # https://github.com/pandas-dev/pandas/issues/38607
+    # GH#49658 pre-2.0 Index called .values.astype(str) here, which effectively
+    #  did a .decode() on the bytes object.  In 2.0 we go through
+    #  ensure_string_array which does f"{val}"
+    idx = Index(["あ", b"a"], dtype="object")
+    result = idx.astype(str)
+    expected = Index(["あ", "a"], dtype="str")
+    tm.assert_index_equal(result, expected)
+
+    # while we're here, check that Series.astype behaves the same
+    result = Series(idx).astype(str)
+    expected = Series(expected, dtype="str")
+    tm.assert_series_equal(result, expected)
diff --git a/pandas/tests/indexes/string/test_indexing.py b/pandas/tests/indexes/string/test_indexing.py
@@ -0,0 +1,118 @@
+import numpy as np
+import pytest
+
+import pandas as pd
+from pandas import Index
+import pandas._testing as tm
+
+
+class TestGetIndexer:
+    @pytest.mark.parametrize(
+        "method,expected",
+        [
+            ("pad", [-1, 0, 1, 1]),
+            ("backfill", [0, 0, 1, -1]),
+        ],
+    )
+    def test_get_indexer_strings(self, any_string_dtype, method, expected):
+        expected = np.array(expected, dtype=np.intp)
+        index = Index(["b", "c"], dtype=any_string_dtype)
+        actual = index.get_indexer(["a", "b", "c", "d"], method=method)
+
+        tm.assert_numpy_array_equal(actual, expected)
+
+    def test_get_indexer_strings_raises(self, any_string_dtype):
+        index = Index(["b", "c"], dtype=any_string_dtype)
+
+        msg = "|".join(
+            [
+                "operation 'sub' not supported for dtype 'str",
+                r"unsupported operand type\(s\) for -: 'str' and 'str'",
+            ]
+        )
+        with pytest.raises(TypeError, match=msg):
+            index.get_indexer(["a", "b", "c", "d"], method="nearest")
+
+        with pytest.raises(TypeError, match=msg):
+            index.get_indexer(["a", "b", "c", "d"], method="pad", tolerance=2)
+
+        with pytest.raises(TypeError, match=msg):
+            index.get_indexer(
+                ["a", "b", "c", "d"], method="pad", tolerance=[2, 2, 2, 2]
+            )
+
+
+class TestGetIndexerNonUnique:
+    @pytest.mark.xfail(reason="TODO(infer_string)", strict=False)
+    def test_get_indexer_non_unique_nas(self, any_string_dtype, nulls_fixture):
+        index = Index(["a", "b", None], dtype=any_string_dtype)
+        indexer, missing = index.get_indexer_non_unique([nulls_fixture])
+
+        expected_indexer = np.array([2], dtype=np.intp)
+        expected_missing = np.array([], dtype=np.intp)
+        tm.assert_numpy_array_equal(indexer, expected_indexer)
+        tm.assert_numpy_array_equal(missing, expected_missing)
+
+        # actually non-unique
+        index = Index(["a", None, "b", None], dtype=any_string_dtype)
+        indexer, missing = index.get_indexer_non_unique([nulls_fixture])
+
+        expected_indexer = np.array([1, 3], dtype=np.intp)
+        tm.assert_numpy_array_equal(indexer, expected_indexer)
+        tm.assert_numpy_array_equal(missing, expected_missing)
+
+
+class TestSliceLocs:
+    @pytest.mark.parametrize(
+        "in_slice,expected",
+        [
+            # error: Slice index must be an integer or None
+            (pd.IndexSlice[::-1], "yxdcb"),
+            (pd.IndexSlice["b":"y":-1], ""),  # type: ignore[misc]
+            (pd.IndexSlice["b"::-1], "b"),  # type: ignore[misc]
+            (pd.IndexSlice[:"b":-1], "yxdcb"),  # type: ignore[misc]
+            (pd.IndexSlice[:"y":-1], "y"),  # type: ignore[misc]
+            (pd.IndexSlice["y"::-1], "yxdcb"),  # type: ignore[misc]
+            (pd.IndexSlice["y"::-4], "yb"),  # type: ignore[misc]
+            # absent labels
+            (pd.IndexSlice[:"a":-1], "yxdcb"),  # type: ignore[misc]
+            (pd.IndexSlice[:"a":-2], "ydb"),  # type: ignore[misc]
+            (pd.IndexSlice["z"::-1], "yxdcb"),  # type: ignore[misc]
+            (pd.IndexSlice["z"::-3], "yc"),  # type: ignore[misc]
+            (pd.IndexSlice["m"::-1], "dcb"),  # type: ignore[misc]
+            (pd.IndexSlice[:"m":-1], "yx"),  # type: ignore[misc]
+            (pd.IndexSlice["a":"a":-1], ""),  # type: ignore[misc]
+            (pd.IndexSlice["z":"z":-1], ""),  # type: ignore[misc]
+            (pd.IndexSlice["m":"m":-1], ""),  # type: ignore[misc]
+        ],
+    )
+    def test_slice_locs_negative_step(self, in_slice, expected, any_string_dtype):
+        index = Index(list("bcdxy"), dtype=any_string_dtype)
+
+        s_start, s_stop = index.slice_locs(in_slice.start, in_slice.stop, in_slice.step)
+        result = index[s_start : s_stop : in_slice.step]
+        expected = Index(list(expected), dtype=any_string_dtype)
+        tm.assert_index_equal(result, expected)
+
+    def test_slice_locs_negative_step_oob(self, any_string_dtype):
+        index = Index(list("bcdxy"), dtype=any_string_dtype)
+
+        result = index[-10:5:1]
+        tm.assert_index_equal(result, index)
+
+        result = index[4:-10:-1]
+        expected = Index(list("yxdcb"), dtype=any_string_dtype)
+        tm.assert_index_equal(result, expected)
+
+    def test_slice_locs_dup(self, any_string_dtype):
+        index = Index(["a", "a", "b", "c", "d", "d"], dtype=any_string_dtype)
+        assert index.slice_locs("a", "d") == (0, 6)
+        assert index.slice_locs(end="d") == (0, 6)
+        assert index.slice_locs("a", "c") == (0, 4)
+        assert index.slice_locs("b", "d") == (2, 6)
+
+        index2 = index[::-1]
+        assert index2.slice_locs("d", "a") == (0, 6)
+        assert index2.slice_locs(end="a") == (0, 6)
+        assert index2.slice_locs("d", "b") == (0, 4)
+        assert index2.slice_locs("c", "a") == (2, 6)