ENH: Add use_nullable_dtypes for read_fwf (#50289)

phofl · web-flow · commit b233faa4d1a5 · 2022-12-27T12:34:57.000-08:00
* ENH: Add use_nullable_dtypes for read_fwf

* Add gh ref
diff --git a/doc/source/whatsnew/v2.0.0.rst b/doc/source/whatsnew/v2.0.0.rst
@@ -36,6 +36,7 @@ Configuration option, ``mode.dtype_backend``, to return pyarrow-backed dtypes
 The ``use_nullable_dtypes`` keyword argument has been expanded to the following functions to enable automatic conversion to nullable dtypes (:issue:`36712`)
 
 * :func:`read_csv`
+* :func:`read_fwf`
 * :func:`read_excel`
 * :func:`read_sql`
 * :func:`read_sql_query`
diff --git a/pandas/io/parsers/readers.py b/pandas/io/parsers/readers.py
@@ -1227,6 +1227,7 @@ def read_fwf(
     colspecs: Sequence[tuple[int, int]] | str | None = "infer",
     widths: Sequence[int] | None = None,
     infer_nrows: int = 100,
+    use_nullable_dtypes: bool = False,
     **kwds,
 ) -> DataFrame | TextFileReader:
     r"""
@@ -1258,6 +1259,13 @@ def read_fwf(
     infer_nrows : int, default 100
         The number of rows to consider when letting the parser determine the
         `colspecs`.
+    use_nullable_dtypes : bool = False
+        Whether or not to use nullable dtypes as default when reading data. If
+        set to True, nullable dtypes are used for all dtypes that have a nullable
+        implementation, even if no nulls are present.
+
+        .. versionadded:: 2.0
+
     **kwds : optional
         Optional keyword arguments can be passed to ``TextFileReader``.
 
@@ -1314,6 +1322,7 @@ def read_fwf(
     kwds["colspecs"] = colspecs
     kwds["infer_nrows"] = infer_nrows
     kwds["engine"] = "python-fwf"
+    kwds["use_nullable_dtypes"] = use_nullable_dtypes
     return _read(filepath_or_buffer, kwds)
 
 
diff --git a/pandas/tests/io/parser/test_read_fwf.py b/pandas/tests/io/parser/test_read_fwf.py
@@ -16,11 +16,16 @@
 
 from pandas.errors import EmptyDataError
 
+import pandas as pd
 from pandas import (
     DataFrame,
     DatetimeIndex,
 )
 import pandas._testing as tm
+from pandas.core.arrays import (
+    ArrowStringArray,
+    StringArray,
+)
 from pandas.tests.io.test_compression import _compression_to_extension
 
 from pandas.io.parsers import (
@@ -941,3 +946,37 @@ def test_widths_and_usecols():
         }
     )
     tm.assert_frame_equal(result, expected)
+
+
+def test_use_nullable_dtypes(string_storage):
+    # GH#50289
+
+    data = """a  b    c      d  e     f  g    h  i
+1  2.5  True  a
+3  4.5  False b  True  6  7.5  a"""
+    with pd.option_context("mode.string_storage", string_storage):
+        result = read_fwf(StringIO(data), use_nullable_dtypes=True)
+
+    if string_storage == "python":
+        arr = StringArray(np.array(["a", "b"], dtype=np.object_))
+        arr_na = StringArray(np.array([pd.NA, "a"], dtype=np.object_))
+    else:
+        import pyarrow as pa
+
+        arr = ArrowStringArray(pa.array(["a", "b"]))
+        arr_na = ArrowStringArray(pa.array([None, "a"]))
+
+    expected = DataFrame(
+        {
+            "a": pd.Series([1, 3], dtype="Int64"),
+            "b": pd.Series([2.5, 4.5], dtype="Float64"),
+            "c": pd.Series([True, False], dtype="boolean"),
+            "d": arr,
+            "e": pd.Series([pd.NA, True], dtype="boolean"),
+            "f": pd.Series([pd.NA, 6], dtype="Int64"),
+            "g": pd.Series([pd.NA, 7.5], dtype="Float64"),
+            "h": arr_na,
+            "i": pd.Series([pd.NA, pd.NA], dtype="Int64"),
+        }
+    )
+    tm.assert_frame_equal(result, expected)