pandas-dev
diff --git a/‎pandas/_libs/lib.pyx
Lines changed: 8 additions & 1 deletion b/‎pandas/_libs/lib.pyx
Lines changed: 8 additions & 1 deletion
diff --git a/‎pandas/_testing/__init__.py
Lines changed: 1 addition & 1 deletion b/‎pandas/_testing/__init__.py
Lines changed: 1 addition & 1 deletion
diff --git a/‎pandas/core/arrays/categorical.py
Lines changed: 3 additions & 1 deletion b/‎pandas/core/arrays/categorical.py
Lines changed: 3 additions & 1 deletion
diff --git a/‎pandas/core/arrays/datetimelike.py
Lines changed: 8 additions & 2 deletions b/‎pandas/core/arrays/datetimelike.py
Lines changed: 8 additions & 2 deletions
diff --git a/‎pandas/core/dtypes/common.py
Lines changed: 17 additions & 1 deletion b/‎pandas/core/dtypes/common.py
Lines changed: 17 additions & 1 deletion
diff --git a/‎pandas/core/indexes/base.py
Lines changed: 5 additions & 1 deletion b/‎pandas/core/indexes/base.py
Lines changed: 5 additions & 1 deletion
diff --git a/‎pandas/core/indexes/interval.py
Lines changed: 2 additions & 1 deletion b/‎pandas/core/indexes/interval.py
Lines changed: 2 additions & 1 deletion
diff --git a/‎pandas/tests/arrays/floating/test_astype.py
Lines changed: 2 additions & 4 deletions b/‎pandas/tests/arrays/floating/test_astype.py
Lines changed: 2 additions & 4 deletions
diff --git a/‎pandas/tests/arrays/integer/test_dtypes.py
Lines changed: 2 additions & 4 deletions b/‎pandas/tests/arrays/integer/test_dtypes.py
Lines changed: 2 additions & 4 deletions
diff --git a/‎pandas/tests/arrays/sparse/test_astype.py
Lines changed: 2 additions & 2 deletions b/‎pandas/tests/arrays/sparse/test_astype.py
Lines changed: 2 additions & 2 deletions
@@ -754,7 +754,14 @@ cpdef ndarray[object] ensure_string_array(
 
     if hasattr(arr, "to_numpy"):
 
-        if hasattr(arr, "dtype") and arr.dtype.kind in "mM":
+        if (
+            hasattr(arr, "dtype")
+            and arr.dtype.kind in "mM"
+            # TODO: we should add a custom ArrowExtensionArray.astype implementation
+            # that handles astype(str) specifically, avoiding ending up here and
+            # then we can remove the below check for `_pa_array` (for ArrowEA)
+            and not hasattr(arr, "_pa_array")
+        ):
             # dtype check to exclude DataFrame
             # GH#41409 TODO: not a great place for this
             out = arr.astype(str).astype(object)
 
@@ -108,7 +108,7 @@
 
 COMPLEX_DTYPES: list[Dtype] = [complex, "complex64", "complex128"]
 if using_string_dtype():
-    STRING_DTYPES: list[Dtype] = [str, "U"]
+    STRING_DTYPES: list[Dtype] = ["U"]
 else:
     STRING_DTYPES: list[Dtype] = [str, "str", "U"]  # type: ignore[no-redef]
 COMPLEX_FLOAT_DTYPES: list[Dtype] = [*COMPLEX_DTYPES, *FLOAT_NUMPY_DTYPES]
 
@@ -2685,7 +2685,9 @@ def _str_get_dummies(self, sep: str = "|", dtype: NpDtype | None = None):
         # sep may not be in categories. Just bail on this.
         from pandas.core.arrays import NumpyExtensionArray
 
-        return NumpyExtensionArray(self.astype(str))._str_get_dummies(sep, dtype)
+        return NumpyExtensionArray(self.to_numpy(str, na_value="NaN"))._str_get_dummies(
+            sep, dtype
+        )
 
     # ------------------------------------------------------------------------
     # GroupBy Methods
 
@@ -471,10 +471,16 @@ def astype(self, dtype, copy: bool = True):
 
             return self._box_values(self.asi8.ravel()).reshape(self.shape)
 
+        elif is_string_dtype(dtype):
+            if isinstance(dtype, ExtensionDtype):
+                arr_object = self._format_native_types(na_rep=dtype.na_value)  # type: ignore[arg-type]
+                cls = dtype.construct_array_type()
+                return cls._from_sequence(arr_object, dtype=dtype, copy=False)
+            else:
+                return self._format_native_types()
+
         elif isinstance(dtype, ExtensionDtype):
             return super().astype(dtype, copy=copy)
-        elif is_string_dtype(dtype):
-            return self._format_native_types()
         elif dtype.kind in "iu":
             # we deliberately ignore int32 vs. int64 here.
             # See https://github.com/pandas-dev/pandas/issues/24381 for more.
 
@@ -12,6 +12,8 @@
 
 import numpy as np
 
+from pandas._config import using_string_dtype
+
 from pandas._libs import (
     Interval,
     Period,
@@ -1470,7 +1472,15 @@ def is_extension_array_dtype(arr_or_dtype) -> bool:
     elif isinstance(dtype, np.dtype):
         return False
     else:
-        return registry.find(dtype) is not None
+        try:
+            with warnings.catch_warnings():
+                # pandas_dtype(..) can raise UserWarning for class input
+                warnings.simplefilter("ignore", UserWarning)
+                dtype = pandas_dtype(dtype)
+        except (TypeError, ValueError):
+            # np.dtype(..) can raise ValueError
+            return False
+        return isinstance(dtype, ExtensionDtype)
 
 
 def is_ea_or_datetimelike_dtype(dtype: DtypeObj | None) -> bool:
@@ -1773,6 +1783,12 @@ def pandas_dtype(dtype) -> DtypeObj:
     elif isinstance(dtype, (np.dtype, ExtensionDtype)):
         return dtype
 
+    # builtin aliases
+    if dtype is str and using_string_dtype():
+        from pandas.core.arrays.string_ import StringDtype
+
+        return StringDtype(na_value=np.nan)
+
     # registered extension types
     result = registry.find(dtype)
     if result is not None:
 
@@ -6262,7 +6262,11 @@ def _should_compare(self, other: Index) -> bool:
             return False
 
         dtype = _unpack_nested_dtype(other)
-        return self._is_comparable_dtype(dtype) or is_object_dtype(dtype)
+        return (
+            self._is_comparable_dtype(dtype)
+            or is_object_dtype(dtype)
+            or is_string_dtype(dtype)
+        )
 
     def _is_comparable_dtype(self, dtype: DtypeObj) -> bool:
         """
 
@@ -51,6 +51,7 @@
     is_number,
     is_object_dtype,
     is_scalar,
+    is_string_dtype,
     pandas_dtype,
 )
 from pandas.core.dtypes.dtypes import (
@@ -712,7 +713,7 @@ def _get_indexer(
             # left/right get_indexer, compare elementwise, equality -> match
             indexer = self._get_indexer_unique_sides(target)
 
-        elif not is_object_dtype(target.dtype):
+        elif not (is_object_dtype(target.dtype) or is_string_dtype(target.dtype)):
             # homogeneous scalar index: use IntervalTree
             # we should always have self._should_partial_index(target) here
             target = self._maybe_convert_i8(target)
 
@@ -68,11 +68,9 @@ def test_astype_str(using_infer_string):
 
     if using_infer_string:
         expected = pd.array(["0.1", "0.2", None], dtype=pd.StringDtype(na_value=np.nan))
-        tm.assert_extension_array_equal(a.astype("str"), expected)
 
-        # TODO(infer_string) this should also be a string array like above
-        expected = np.array(["0.1", "0.2", "<NA>"], dtype="U32")
-        tm.assert_numpy_array_equal(a.astype(str), expected)
+        tm.assert_extension_array_equal(a.astype(str), expected)
+        tm.assert_extension_array_equal(a.astype("str"), expected)
     else:
         expected = np.array(["0.1", "0.2", "<NA>"], dtype="U32")
 
 
@@ -281,11 +281,9 @@ def test_astype_str(using_infer_string):
 
     if using_infer_string:
         expected = pd.array(["1", "2", None], dtype=pd.StringDtype(na_value=np.nan))
-        tm.assert_extension_array_equal(a.astype("str"), expected)
 
-        # TODO(infer_string) this should also be a string array like above
-        expected = np.array(["1", "2", "<NA>"], dtype=f"{tm.ENDIAN}U21")
-        tm.assert_numpy_array_equal(a.astype(str), expected)
+        tm.assert_extension_array_equal(a.astype(str), expected)
+        tm.assert_extension_array_equal(a.astype("str"), expected)
     else:
         expected = np.array(["1", "2", "<NA>"], dtype=f"{tm.ENDIAN}U21")
 
 
@@ -81,8 +81,8 @@ def test_astype_all(self, any_real_numpy_dtype):
             ),
             (
                 SparseArray([0, 1, 10]),
-                str,
-                SparseArray(["0", "1", "10"], dtype=SparseDtype(str, "0")),
+                np.str_,
+                SparseArray(["0", "1", "10"], dtype=SparseDtype(np.str_, "0")),
             ),
             (SparseArray(["10", "20"]), float, SparseArray([10.0, 20.0])),
             (
Original file line number	Diff line number	Diff line change
`@@ -81,8 +81,8 @@ def test_astype_all(self, any_real_numpy_dtype):`
`81`	`81`	`),`
`82`	`82`	`(`
`83`	`83`	`SparseArray([0, 1, 10]),`
`84`		`- str,`
`85`		`- SparseArray(["0", "1", "10"], dtype=SparseDtype(str, "0")),`
	`84`	`+ np.str_,`
	`85`	`+ SparseArray(["0", "1", "10"], dtype=SparseDtype(np.str_, "0")),`
`86`	`86`	`),`
`87`	`87`	`(SparseArray(["10", "20"]), float, SparseArray([10.0, 20.0])),`
`88`	`88`	`(`