From d4ea4a28ce7254fb42128139ea1404d62ca21362 Mon Sep 17 00:00:00 2001
From: Patrick Hoefler <patrick_hoefler@gmx.net>
Date: Fri, 30 Dec 2022 16:37:38 +0100
Subject: [PATCH 1/5] ENH: Add use_nullable_dtypes to to_numeric

---
 doc/source/whatsnew/v2.0.0.rst        |  1 +
 pandas/core/tools/numeric.py          | 38 ++++++++++++++++++++----
 pandas/tests/tools/test_to_numeric.py | 42 +++++++++++++++++++++++++++
 3 files changed, 75 insertions(+), 6 deletions(-)

diff --git a/doc/source/whatsnew/v2.0.0.rst b/doc/source/whatsnew/v2.0.0.rst
index b1387e9717079..b818357727d9e 100644
--- a/doc/source/whatsnew/v2.0.0.rst
+++ b/doc/source/whatsnew/v2.0.0.rst
@@ -42,6 +42,7 @@ The ``use_nullable_dtypes`` keyword argument has been expanded to the following
 * :func:`read_sql`
 * :func:`read_sql_query`
 * :func:`read_sql_table`
+* :func:`to_numeric`
 
 Additionally a new global configuration, ``mode.dtype_backend`` can now be used in conjunction with the parameter ``use_nullable_dtypes=True`` in the following functions
 to select the nullable dtypes implementation.
diff --git a/pandas/core/tools/numeric.py b/pandas/core/tools/numeric.py
index da47aa549dfa3..80da4c3a92c50 100644
--- a/pandas/core/tools/numeric.py
+++ b/pandas/core/tools/numeric.py
@@ -13,6 +13,7 @@
 from pandas.core.dtypes.cast import maybe_downcast_numeric
 from pandas.core.dtypes.common import (
     ensure_object,
+    is_bool_dtype,
     is_datetime_or_timedelta_dtype,
     is_decimal,
     is_integer_dtype,
@@ -34,6 +35,7 @@ def to_numeric(
     arg,
     errors: DateTimeErrorChoices = "raise",
     downcast: Literal["integer", "signed", "unsigned", "float"] | None = None,
+    use_nullable_dtypes: bool = False,
 ):
     """
     Convert argument to a numeric type.
@@ -47,7 +49,7 @@ def to_numeric(
     numbers smaller than `-9223372036854775808` (np.iinfo(np.int64).min)
     or larger than `18446744073709551615` (np.iinfo(np.uint64).max) are
     passed in, it is very likely they will be converted to float so that
-    they can stored in an `ndarray`. These warnings apply similarly to
+    they can be stored in an `ndarray`. These warnings apply similarly to
     `Series` since it internally leverages `ndarray`.
 
     Parameters
@@ -78,6 +80,10 @@ def to_numeric(
         the dtype it is to be cast to, so if none of the dtypes
         checked satisfy that specification, no downcasting will be
         performed on the data.
+    use_nullable_dtypes : bool = False
+        Whether or not to use nullable dtypes as default when converting data. If
+        set to True, nullable dtypes are used for all dtypes that have a nullable
+        implementation, even if no nulls are present.
 
     Returns
     -------
@@ -183,6 +189,7 @@ def to_numeric(
         values = values._data[~mask]
 
     values_dtype = getattr(values, "dtype", None)
+    new_mask: np.ndarray | None = None
     if is_numeric_dtype(values_dtype):
         pass
     elif is_datetime_or_timedelta_dtype(values_dtype):
@@ -191,13 +198,23 @@ def to_numeric(
         values = ensure_object(values)
         coerce_numeric = errors not in ("ignore", "raise")
         try:
-            values, _ = lib.maybe_convert_numeric(
-                values, set(), coerce_numeric=coerce_numeric
+            values, new_mask = lib.maybe_convert_numeric(
+                values,
+                set(),
+                coerce_numeric=coerce_numeric,
+                convert_to_masked_nullable=use_nullable_dtypes,
             )
         except (ValueError, TypeError):
             if errors == "raise":
                 raise
 
+    if new_mask is not None:
+        # Remove unnecessary values, is expected later anyway and enables
+        # downcasting
+        values = values[~new_mask]
+    elif use_nullable_dtypes and new_mask is None:
+        new_mask = np.zeros(values.shape, dtype=np.bool_)
+
     # attempt downcast only if the data has been successfully converted
     # to a numerical dtype and if a downcast method has been specified
     if downcast is not None and is_numeric_dtype(values.dtype):
@@ -228,17 +245,26 @@ def to_numeric(
                     if values.dtype == dtype:
                         break
 
-    # GH33013: for IntegerArray & FloatingArray need to reconstruct masked array
-    if mask is not None:
+    # GH33013: for IntegerArray, BooleanArray & FloatingArray need to reconstruct
+    # masked array
+    if mask is not None or new_mask is not None:
+        if mask is None:
+            mask = new_mask
         data = np.zeros(mask.shape, dtype=values.dtype)
         data[~mask] = values
 
         from pandas.core.arrays import (
+            BooleanArray,
             FloatingArray,
             IntegerArray,
         )
 
-        klass = IntegerArray if is_integer_dtype(data.dtype) else FloatingArray
+        if is_integer_dtype(data.dtype):
+            klass = IntegerArray
+        elif is_bool_dtype(data.dtype):
+            klass = BooleanArray
+        else:
+            klass = FloatingArray
         values = klass(data, mask.copy())
 
     if is_series:
diff --git a/pandas/tests/tools/test_to_numeric.py b/pandas/tests/tools/test_to_numeric.py
index 1347f6eb50b09..93f6011608fba 100644
--- a/pandas/tests/tools/test_to_numeric.py
+++ b/pandas/tests/tools/test_to_numeric.py
@@ -807,3 +807,45 @@ def test_to_numeric_large_float_not_downcast_to_float_32(val):
     expected = Series([val])
     result = to_numeric(expected, downcast="float")
     tm.assert_series_equal(result, expected)
+
+
+@pytest.mark.parametrize(
+    "val, dtype", [(1, "Int64"), (1.5, "Float64"), (True, "boolean")]
+)
+def test_to_numeric_use_nullable_dtypes(val, dtype):
+    # GH#
+    ser = Series([val], dtype=object)
+    result = to_numeric(ser, use_nullable_dtypes=True)
+    expected = Series([val], dtype=dtype)
+    tm.assert_series_equal(result, expected)
+
+
+@pytest.mark.parametrize(
+    "val, dtype", [(1, "Int64"), (1.5, "Float64"), (True, "boolean")]
+)
+def test_to_numeric_use_nullable_dtypes_na(val, dtype):
+    # GH#
+    ser = Series([val, None], dtype=object)
+    result = to_numeric(ser, use_nullable_dtypes=True)
+    expected = Series([val, pd.NA], dtype=dtype)
+    tm.assert_series_equal(result, expected)
+
+
+@pytest.mark.parametrize(
+    "val, dtype, downcast",
+    [(1, "Int8", "integer"), (1.5, "Float32", "float"), (1, "Int8", "signed")],
+)
+def test_to_numeric_use_nullable_dtypes_downcasting(val, dtype, downcast):
+    # GH#
+    ser = Series([val, None], dtype=object)
+    result = to_numeric(ser, use_nullable_dtypes=True, downcast=downcast)
+    expected = Series([val, pd.NA], dtype=dtype)
+    tm.assert_series_equal(result, expected)
+
+
+def test_to_numeric_use_nullable_dtypes_downcasting_uint():
+    # GH#
+    ser = Series([1, pd.NA], dtype="UInt64")
+    result = to_numeric(ser, use_nullable_dtypes=True, downcast="unsigned")
+    expected = Series([1, pd.NA], dtype="UInt8")
+    tm.assert_series_equal(result, expected)

From cebccbe8588054a87fea56421387af7666a0396f Mon Sep 17 00:00:00 2001
From: Patrick Hoefler <patrick_hoefler@gmx.net>
Date: Fri, 30 Dec 2022 16:39:00 +0100
Subject: [PATCH 2/5] Add gh ref

---
 pandas/tests/tools/test_to_numeric.py | 8 ++++----
 1 file changed, 4 insertions(+), 4 deletions(-)

diff --git a/pandas/tests/tools/test_to_numeric.py b/pandas/tests/tools/test_to_numeric.py
index 93f6011608fba..5f1f734cc82d4 100644
--- a/pandas/tests/tools/test_to_numeric.py
+++ b/pandas/tests/tools/test_to_numeric.py
@@ -813,7 +813,7 @@ def test_to_numeric_large_float_not_downcast_to_float_32(val):
     "val, dtype", [(1, "Int64"), (1.5, "Float64"), (True, "boolean")]
 )
 def test_to_numeric_use_nullable_dtypes(val, dtype):
-    # GH#
+    # GH#50505
     ser = Series([val], dtype=object)
     result = to_numeric(ser, use_nullable_dtypes=True)
     expected = Series([val], dtype=dtype)
@@ -824,7 +824,7 @@ def test_to_numeric_use_nullable_dtypes(val, dtype):
     "val, dtype", [(1, "Int64"), (1.5, "Float64"), (True, "boolean")]
 )
 def test_to_numeric_use_nullable_dtypes_na(val, dtype):
-    # GH#
+    # GH#50505
     ser = Series([val, None], dtype=object)
     result = to_numeric(ser, use_nullable_dtypes=True)
     expected = Series([val, pd.NA], dtype=dtype)
@@ -836,7 +836,7 @@ def test_to_numeric_use_nullable_dtypes_na(val, dtype):
     [(1, "Int8", "integer"), (1.5, "Float32", "float"), (1, "Int8", "signed")],
 )
 def test_to_numeric_use_nullable_dtypes_downcasting(val, dtype, downcast):
-    # GH#
+    # GH#50505
     ser = Series([val, None], dtype=object)
     result = to_numeric(ser, use_nullable_dtypes=True, downcast=downcast)
     expected = Series([val, pd.NA], dtype=dtype)
@@ -844,7 +844,7 @@ def test_to_numeric_use_nullable_dtypes_downcasting(val, dtype, downcast):
 
 
 def test_to_numeric_use_nullable_dtypes_downcasting_uint():
-    # GH#
+    # GH#50505
     ser = Series([1, pd.NA], dtype="UInt64")
     result = to_numeric(ser, use_nullable_dtypes=True, downcast="unsigned")
     expected = Series([1, pd.NA], dtype="UInt8")

From 91df80219ffdff03b8ef1a532b9b659f9435092e Mon Sep 17 00:00:00 2001
From: Patrick Hoefler <patrick_hoefler@gmx.net>
Date: Fri, 30 Dec 2022 18:03:12 +0100
Subject: [PATCH 3/5] Fix typing

---
 pandas/core/tools/numeric.py | 4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)

diff --git a/pandas/core/tools/numeric.py b/pandas/core/tools/numeric.py
index 80da4c3a92c50..de1a49870ea24 100644
--- a/pandas/core/tools/numeric.py
+++ b/pandas/core/tools/numeric.py
@@ -198,7 +198,7 @@ def to_numeric(
         values = ensure_object(values)
         coerce_numeric = errors not in ("ignore", "raise")
         try:
-            values, new_mask = lib.maybe_convert_numeric(
+            values, new_mask = lib.maybe_convert_numeric(  # type: ignore[call-overload]
                 values,
                 set(),
                 coerce_numeric=coerce_numeric,
@@ -250,6 +250,7 @@ def to_numeric(
     if mask is not None or new_mask is not None:
         if mask is None:
             mask = new_mask
+        assert isinstance(mask, np.ndarray)
         data = np.zeros(mask.shape, dtype=values.dtype)
         data[~mask] = values
 
@@ -259,6 +260,7 @@ def to_numeric(
             IntegerArray,
         )
 
+        klass: type[IntegerArray] | type[BooleanArray] | type[FloatingArray]
         if is_integer_dtype(data.dtype):
             klass = IntegerArray
         elif is_bool_dtype(data.dtype):

From c4c69f0d8a2f83fede850d4df61d06384c3b5a94 Mon Sep 17 00:00:00 2001
From: Patrick Hoefler <patrick_hoefler@gmx.net>
Date: Wed, 4 Jan 2023 21:51:45 +0100
Subject: [PATCH 4/5] Fix bugs

---
 pandas/_libs/lib.pyx                  |  2 ++
 pandas/core/tools/numeric.py          |  3 ++-
 pandas/tests/tools/test_to_numeric.py | 18 ++++++++++++++++++
 3 files changed, 22 insertions(+), 1 deletion(-)

diff --git a/pandas/_libs/lib.pyx b/pandas/_libs/lib.pyx
index bc7b876cb5de8..7fd055e0f739b 100644
--- a/pandas/_libs/lib.pyx
+++ b/pandas/_libs/lib.pyx
@@ -2243,6 +2243,7 @@ def maybe_convert_numeric(
             if convert_empty or seen.coerce_numeric:
                 seen.saw_null()
                 floats[i] = complexes[i] = NaN
+                mask[i] = 1
             else:
                 raise ValueError("Empty string encountered")
         elif util.is_complex_object(val):
@@ -2299,6 +2300,7 @@ def maybe_convert_numeric(
 
                 seen.saw_null()
                 floats[i] = NaN
+                mask[i] = 1
 
     if seen.check_uint64_conflict():
         return (values, None)
diff --git a/pandas/core/tools/numeric.py b/pandas/core/tools/numeric.py
index de1a49870ea24..9d8f14e101945 100644
--- a/pandas/core/tools/numeric.py
+++ b/pandas/core/tools/numeric.py
@@ -19,6 +19,7 @@
     is_integer_dtype,
     is_number,
     is_numeric_dtype,
+    is_object_dtype,
     is_scalar,
     needs_i8_conversion,
 )
@@ -247,7 +248,7 @@ def to_numeric(
 
     # GH33013: for IntegerArray, BooleanArray & FloatingArray need to reconstruct
     # masked array
-    if mask is not None or new_mask is not None:
+    if (mask is not None or new_mask is not None) and not is_object_dtype(values.dtype):
         if mask is None:
             mask = new_mask
         assert isinstance(mask, np.ndarray)
diff --git a/pandas/tests/tools/test_to_numeric.py b/pandas/tests/tools/test_to_numeric.py
index 5f1f734cc82d4..ed29b2cf6919c 100644
--- a/pandas/tests/tools/test_to_numeric.py
+++ b/pandas/tests/tools/test_to_numeric.py
@@ -849,3 +849,21 @@ def test_to_numeric_use_nullable_dtypes_downcasting_uint():
     result = to_numeric(ser, use_nullable_dtypes=True, downcast="unsigned")
     expected = Series([1, pd.NA], dtype="UInt8")
     tm.assert_series_equal(result, expected)
+
+
+@pytest.mark.parametrize(
+    "use_nullable_dtypes, dtype", [(True, "Float64"), (False, "float64")]
+)
+def test_to_numeric_use_nullable_dtypes_error(use_nullable_dtypes, dtype):
+    # GH#50505
+    ser = Series(["a", "b", ""])
+    expected = ser.copy()
+    with pytest.raises(ValueError, match="Unable to parse string"):
+        to_numeric(ser, use_nullable_dtypes=use_nullable_dtypes)
+
+    result = to_numeric(ser, use_nullable_dtypes=use_nullable_dtypes, errors="ignore")
+    tm.assert_series_equal(result, expected)
+
+    result = to_numeric(ser, use_nullable_dtypes=use_nullable_dtypes, errors="coerce")
+    expected = Series([np.nan, np.nan, np.nan], dtype=dtype)
+    tm.assert_series_equal(result, expected)

From 92f20e15996644ce173ea05cb15f3986f23254d2 Mon Sep 17 00:00:00 2001
From: Patrick Hoefler <patrick_hoefler@gmx.net>
Date: Fri, 6 Jan 2023 19:57:40 +0100
Subject: [PATCH 5/5] Use array equal fast

---
 pandas/core/tools/numeric.py          | 8 +++++---
 pandas/tests/tools/test_to_numeric.py | 9 +++++++++
 2 files changed, 14 insertions(+), 3 deletions(-)

diff --git a/pandas/core/tools/numeric.py b/pandas/core/tools/numeric.py
index 9d8f14e101945..a8ae8c47b0d19 100644
--- a/pandas/core/tools/numeric.py
+++ b/pandas/core/tools/numeric.py
@@ -29,7 +29,7 @@
 )
 
 import pandas as pd
-from pandas.core.arrays.numeric import NumericArray
+from pandas.core.arrays import BaseMaskedArray
 
 
 def to_numeric(
@@ -185,7 +185,7 @@ def to_numeric(
     # GH33013: for IntegerArray & FloatingArray extract non-null values for casting
     # save mask to reconstruct the full array after casting
     mask: npt.NDArray[np.bool_] | None = None
-    if isinstance(values, NumericArray):
+    if isinstance(values, BaseMaskedArray):
         mask = values._mask
         values = values._data[~mask]
 
@@ -251,6 +251,8 @@ def to_numeric(
     if (mask is not None or new_mask is not None) and not is_object_dtype(values.dtype):
         if mask is None:
             mask = new_mask
+        else:
+            mask = mask.copy()
         assert isinstance(mask, np.ndarray)
         data = np.zeros(mask.shape, dtype=values.dtype)
         data[~mask] = values
@@ -268,7 +270,7 @@ def to_numeric(
             klass = BooleanArray
         else:
             klass = FloatingArray
-        values = klass(data, mask.copy())
+        values = klass(data, mask)
 
     if is_series:
         return arg._constructor(values, index=arg.index, name=arg.name)
diff --git a/pandas/tests/tools/test_to_numeric.py b/pandas/tests/tools/test_to_numeric.py
index ed29b2cf6919c..1c0a8301d65cc 100644
--- a/pandas/tests/tools/test_to_numeric.py
+++ b/pandas/tests/tools/test_to_numeric.py
@@ -851,6 +851,15 @@ def test_to_numeric_use_nullable_dtypes_downcasting_uint():
     tm.assert_series_equal(result, expected)
 
 
+@pytest.mark.parametrize("dtype", ["Int64", "UInt64", "Float64", "boolean"])
+def test_to_numeric_use_nullable_dtypes_already_nullable(dtype):
+    # GH#50505
+    ser = Series([1, pd.NA], dtype=dtype)
+    result = to_numeric(ser, use_nullable_dtypes=True)
+    expected = Series([1, pd.NA], dtype=dtype)
+    tm.assert_series_equal(result, expected)
+
+
 @pytest.mark.parametrize(
     "use_nullable_dtypes, dtype", [(True, "Float64"), (False, "float64")]
 )