Skip to content

TST (string dtype): duplicate pandas/tests/indexes/object tests specifically for string dtypes #60117

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
18 changes: 0 additions & 18 deletions pandas/tests/indexes/object/test_astype.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,25 +3,7 @@
from pandas import (
Index,
NaT,
Series,
)
import pandas._testing as tm


def test_astype_str_from_bytes():
# https://github.com/pandas-dev/pandas/issues/38607
# GH#49658 pre-2.0 Index called .values.astype(str) here, which effectively
# did a .decode() on the bytes object. In 2.0 we go through
# ensure_string_array which does f"{val}"
idx = Index(["あ", b"a"], dtype="object")
result = idx.astype(str)
expected = Index(["あ", "a"], dtype="str")
tm.assert_index_equal(result, expected)

# while we're here, check that Series.astype behaves the same
result = Series(idx).astype(str)
expected = Series(expected, dtype="str")
tm.assert_series_equal(result, expected)


def test_astype_invalid_nas_to_tdt64_raises():
Expand Down
82 changes: 9 additions & 73 deletions pandas/tests/indexes/object/test_indexing.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,12 +3,8 @@
import numpy as np
import pytest

from pandas._libs.missing import (
NA,
is_matching_na,
)
from pandas._libs.missing import is_matching_na

import pandas as pd
from pandas import Index
import pandas._testing as tm

Expand All @@ -23,13 +19,13 @@ class TestGetIndexer:
)
def test_get_indexer_strings(self, method, expected):
expected = np.array(expected, dtype=np.intp)
index = Index(["b", "c"])
index = Index(["b", "c"], dtype=object)
actual = index.get_indexer(["a", "b", "c", "d"], method=method)

tm.assert_numpy_array_equal(actual, expected)

def test_get_indexer_strings_raises(self, using_infer_string):
index = Index(["b", "c"])
def test_get_indexer_strings_raises(self):
index = Index(["b", "c"], dtype=object)

msg = "|".join(
[
Expand Down Expand Up @@ -68,13 +64,9 @@ def test_get_indexer_with_NA_values(


class TestGetIndexerNonUnique:
def test_get_indexer_non_unique_nas(
self, nulls_fixture, request, using_infer_string
):
def test_get_indexer_non_unique_nas(self, nulls_fixture):
# even though this isn't non-unique, this should still work
if using_infer_string and (nulls_fixture is None or nulls_fixture is NA):
request.applymarker(pytest.mark.xfail(reason="NAs are cast to NaN"))
index = Index(["a", "b", nulls_fixture])
index = Index(["a", "b", nulls_fixture], dtype=object)
indexer, missing = index.get_indexer_non_unique([nulls_fixture])

expected_indexer = np.array([2], dtype=np.intp)
Expand All @@ -83,7 +75,7 @@ def test_get_indexer_non_unique_nas(
tm.assert_numpy_array_equal(missing, expected_missing)

# actually non-unique
index = Index(["a", nulls_fixture, "b", nulls_fixture])
index = Index(["a", nulls_fixture, "b", nulls_fixture], dtype=object)
indexer, missing = index.get_indexer_non_unique([nulls_fixture])

expected_indexer = np.array([1, 3], dtype=np.intp)
Expand All @@ -92,10 +84,10 @@ def test_get_indexer_non_unique_nas(

# matching-but-not-identical nans
if is_matching_na(nulls_fixture, float("NaN")):
index = Index(["a", float("NaN"), "b", float("NaN")])
index = Index(["a", float("NaN"), "b", float("NaN")], dtype=object)
match_but_not_identical = True
elif is_matching_na(nulls_fixture, Decimal("NaN")):
index = Index(["a", Decimal("NaN"), "b", Decimal("NaN")])
index = Index(["a", Decimal("NaN"), "b", Decimal("NaN")], dtype=object)
match_but_not_identical = True
else:
match_but_not_identical = False
Expand Down Expand Up @@ -156,59 +148,3 @@ def test_get_indexer_non_unique_np_nats(self, np_nat_fixture, np_nat_fixture2):
expected_indexer = np.array([1, 3], dtype=np.intp)
tm.assert_numpy_array_equal(indexer, expected_indexer)
tm.assert_numpy_array_equal(missing, expected_missing)


class TestSliceLocs:
Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

The remaining tests here were already parametrized with any_string_dtype, so only keeping them in pandas/tests/indexes/string

@pytest.mark.parametrize(
"in_slice,expected",
[
# error: Slice index must be an integer or None
(pd.IndexSlice[::-1], "yxdcb"),
(pd.IndexSlice["b":"y":-1], ""), # type: ignore[misc]
(pd.IndexSlice["b"::-1], "b"), # type: ignore[misc]
(pd.IndexSlice[:"b":-1], "yxdcb"), # type: ignore[misc]
(pd.IndexSlice[:"y":-1], "y"), # type: ignore[misc]
(pd.IndexSlice["y"::-1], "yxdcb"), # type: ignore[misc]
(pd.IndexSlice["y"::-4], "yb"), # type: ignore[misc]
# absent labels
(pd.IndexSlice[:"a":-1], "yxdcb"), # type: ignore[misc]
(pd.IndexSlice[:"a":-2], "ydb"), # type: ignore[misc]
(pd.IndexSlice["z"::-1], "yxdcb"), # type: ignore[misc]
(pd.IndexSlice["z"::-3], "yc"), # type: ignore[misc]
(pd.IndexSlice["m"::-1], "dcb"), # type: ignore[misc]
(pd.IndexSlice[:"m":-1], "yx"), # type: ignore[misc]
(pd.IndexSlice["a":"a":-1], ""), # type: ignore[misc]
(pd.IndexSlice["z":"z":-1], ""), # type: ignore[misc]
(pd.IndexSlice["m":"m":-1], ""), # type: ignore[misc]
],
)
def test_slice_locs_negative_step(self, in_slice, expected, any_string_dtype):
index = Index(list("bcdxy"), dtype=any_string_dtype)

s_start, s_stop = index.slice_locs(in_slice.start, in_slice.stop, in_slice.step)
result = index[s_start : s_stop : in_slice.step]
expected = Index(list(expected), dtype=any_string_dtype)
tm.assert_index_equal(result, expected)

def test_slice_locs_negative_step_oob(self, any_string_dtype):
index = Index(list("bcdxy"), dtype=any_string_dtype)

result = index[-10:5:1]
tm.assert_index_equal(result, index)

result = index[4:-10:-1]
expected = Index(list("yxdcb"), dtype=any_string_dtype)
tm.assert_index_equal(result, expected)

def test_slice_locs_dup(self):
index = Index(["a", "a", "b", "c", "d", "d"])
assert index.slice_locs("a", "d") == (0, 6)
assert index.slice_locs(end="d") == (0, 6)
assert index.slice_locs("a", "c") == (0, 4)
assert index.slice_locs("b", "d") == (2, 6)

index2 = index[::-1]
assert index2.slice_locs("d", "a") == (0, 6)
assert index2.slice_locs(end="a") == (0, 6)
assert index2.slice_locs("d", "b") == (0, 4)
assert index2.slice_locs("c", "a") == (2, 6)
Empty file.
21 changes: 21 additions & 0 deletions pandas/tests/indexes/string/test_astype.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,21 @@
from pandas import (
Index,
Series,
)
import pandas._testing as tm


def test_astype_str_from_bytes():
# https://github.com/pandas-dev/pandas/issues/38607
# GH#49658 pre-2.0 Index called .values.astype(str) here, which effectively
# did a .decode() on the bytes object. In 2.0 we go through
# ensure_string_array which does f"{val}"
idx = Index(["あ", b"a"], dtype="object")
result = idx.astype(str)
expected = Index(["あ", "a"], dtype="str")
tm.assert_index_equal(result, expected)

# while we're here, check that Series.astype behaves the same
result = Series(idx).astype(str)
expected = Series(expected, dtype="str")
tm.assert_series_equal(result, expected)
118 changes: 118 additions & 0 deletions pandas/tests/indexes/string/test_indexing.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,118 @@
import numpy as np
import pytest

import pandas as pd
from pandas import Index
import pandas._testing as tm


class TestGetIndexer:
@pytest.mark.parametrize(
"method,expected",
[
("pad", [-1, 0, 1, 1]),
("backfill", [0, 0, 1, -1]),
],
)
def test_get_indexer_strings(self, any_string_dtype, method, expected):
expected = np.array(expected, dtype=np.intp)
index = Index(["b", "c"], dtype=any_string_dtype)
actual = index.get_indexer(["a", "b", "c", "d"], method=method)

tm.assert_numpy_array_equal(actual, expected)

def test_get_indexer_strings_raises(self, any_string_dtype):
index = Index(["b", "c"], dtype=any_string_dtype)

msg = "|".join(
[
"operation 'sub' not supported for dtype 'str",
r"unsupported operand type\(s\) for -: 'str' and 'str'",
]
)
with pytest.raises(TypeError, match=msg):
index.get_indexer(["a", "b", "c", "d"], method="nearest")

with pytest.raises(TypeError, match=msg):
index.get_indexer(["a", "b", "c", "d"], method="pad", tolerance=2)

with pytest.raises(TypeError, match=msg):
index.get_indexer(
["a", "b", "c", "d"], method="pad", tolerance=[2, 2, 2, 2]
)


class TestGetIndexerNonUnique:
@pytest.mark.xfail(reason="TODO(infer_string)", strict=False)
def test_get_indexer_non_unique_nas(self, any_string_dtype, nulls_fixture):
index = Index(["a", "b", None], dtype=any_string_dtype)
indexer, missing = index.get_indexer_non_unique([nulls_fixture])

expected_indexer = np.array([2], dtype=np.intp)
expected_missing = np.array([], dtype=np.intp)
tm.assert_numpy_array_equal(indexer, expected_indexer)
tm.assert_numpy_array_equal(missing, expected_missing)

# actually non-unique
index = Index(["a", None, "b", None], dtype=any_string_dtype)
indexer, missing = index.get_indexer_non_unique([nulls_fixture])

expected_indexer = np.array([1, 3], dtype=np.intp)
tm.assert_numpy_array_equal(indexer, expected_indexer)
tm.assert_numpy_array_equal(missing, expected_missing)


class TestSliceLocs:
@pytest.mark.parametrize(
"in_slice,expected",
[
# error: Slice index must be an integer or None
(pd.IndexSlice[::-1], "yxdcb"),
(pd.IndexSlice["b":"y":-1], ""), # type: ignore[misc]
(pd.IndexSlice["b"::-1], "b"), # type: ignore[misc]
(pd.IndexSlice[:"b":-1], "yxdcb"), # type: ignore[misc]
(pd.IndexSlice[:"y":-1], "y"), # type: ignore[misc]
(pd.IndexSlice["y"::-1], "yxdcb"), # type: ignore[misc]
(pd.IndexSlice["y"::-4], "yb"), # type: ignore[misc]
# absent labels
(pd.IndexSlice[:"a":-1], "yxdcb"), # type: ignore[misc]
(pd.IndexSlice[:"a":-2], "ydb"), # type: ignore[misc]
(pd.IndexSlice["z"::-1], "yxdcb"), # type: ignore[misc]
(pd.IndexSlice["z"::-3], "yc"), # type: ignore[misc]
(pd.IndexSlice["m"::-1], "dcb"), # type: ignore[misc]
(pd.IndexSlice[:"m":-1], "yx"), # type: ignore[misc]
(pd.IndexSlice["a":"a":-1], ""), # type: ignore[misc]
(pd.IndexSlice["z":"z":-1], ""), # type: ignore[misc]
(pd.IndexSlice["m":"m":-1], ""), # type: ignore[misc]
],
)
def test_slice_locs_negative_step(self, in_slice, expected, any_string_dtype):
index = Index(list("bcdxy"), dtype=any_string_dtype)

s_start, s_stop = index.slice_locs(in_slice.start, in_slice.stop, in_slice.step)
result = index[s_start : s_stop : in_slice.step]
expected = Index(list(expected), dtype=any_string_dtype)
tm.assert_index_equal(result, expected)

def test_slice_locs_negative_step_oob(self, any_string_dtype):
index = Index(list("bcdxy"), dtype=any_string_dtype)

result = index[-10:5:1]
tm.assert_index_equal(result, index)

result = index[4:-10:-1]
expected = Index(list("yxdcb"), dtype=any_string_dtype)
tm.assert_index_equal(result, expected)

def test_slice_locs_dup(self, any_string_dtype):
index = Index(["a", "a", "b", "c", "d", "d"], dtype=any_string_dtype)
assert index.slice_locs("a", "d") == (0, 6)
assert index.slice_locs(end="d") == (0, 6)
assert index.slice_locs("a", "c") == (0, 4)
assert index.slice_locs("b", "d") == (2, 6)

index2 = index[::-1]
assert index2.slice_locs("d", "a") == (0, 6)
assert index2.slice_locs(end="a") == (0, 6)
assert index2.slice_locs("d", "b") == (0, 4)
assert index2.slice_locs("c", "a") == (2, 6)