Skip to content

Commit b233faa

Browse files
authored
ENH: Add use_nullable_dtypes for read_fwf (#50289)
* ENH: Add use_nullable_dtypes for read_fwf * Add gh ref
1 parent 18dd6fa commit b233faa

File tree

3 files changed

+49
-0
lines changed

3 files changed

+49
-0
lines changed

doc/source/whatsnew/v2.0.0.rst

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -36,6 +36,7 @@ Configuration option, ``mode.dtype_backend``, to return pyarrow-backed dtypes
3636
The ``use_nullable_dtypes`` keyword argument has been expanded to the following functions to enable automatic conversion to nullable dtypes (:issue:`36712`)
3737

3838
* :func:`read_csv`
39+
* :func:`read_fwf`
3940
* :func:`read_excel`
4041
* :func:`read_sql`
4142
* :func:`read_sql_query`

pandas/io/parsers/readers.py

Lines changed: 9 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1227,6 +1227,7 @@ def read_fwf(
12271227
colspecs: Sequence[tuple[int, int]] | str | None = "infer",
12281228
widths: Sequence[int] | None = None,
12291229
infer_nrows: int = 100,
1230+
use_nullable_dtypes: bool = False,
12301231
**kwds,
12311232
) -> DataFrame | TextFileReader:
12321233
r"""
@@ -1258,6 +1259,13 @@ def read_fwf(
12581259
infer_nrows : int, default 100
12591260
The number of rows to consider when letting the parser determine the
12601261
`colspecs`.
1262+
use_nullable_dtypes : bool = False
1263+
Whether or not to use nullable dtypes as default when reading data. If
1264+
set to True, nullable dtypes are used for all dtypes that have a nullable
1265+
implementation, even if no nulls are present.
1266+
1267+
.. versionadded:: 2.0
1268+
12611269
**kwds : optional
12621270
Optional keyword arguments can be passed to ``TextFileReader``.
12631271
@@ -1314,6 +1322,7 @@ def read_fwf(
13141322
kwds["colspecs"] = colspecs
13151323
kwds["infer_nrows"] = infer_nrows
13161324
kwds["engine"] = "python-fwf"
1325+
kwds["use_nullable_dtypes"] = use_nullable_dtypes
13171326
return _read(filepath_or_buffer, kwds)
13181327

13191328

pandas/tests/io/parser/test_read_fwf.py

Lines changed: 39 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -16,11 +16,16 @@
1616

1717
from pandas.errors import EmptyDataError
1818

19+
import pandas as pd
1920
from pandas import (
2021
DataFrame,
2122
DatetimeIndex,
2223
)
2324
import pandas._testing as tm
25+
from pandas.core.arrays import (
26+
ArrowStringArray,
27+
StringArray,
28+
)
2429
from pandas.tests.io.test_compression import _compression_to_extension
2530

2631
from pandas.io.parsers import (
@@ -941,3 +946,37 @@ def test_widths_and_usecols():
941946
}
942947
)
943948
tm.assert_frame_equal(result, expected)
949+
950+
951+
def test_use_nullable_dtypes(string_storage):
952+
# GH#50289
953+
954+
data = """a b c d e f g h i
955+
1 2.5 True a
956+
3 4.5 False b True 6 7.5 a"""
957+
with pd.option_context("mode.string_storage", string_storage):
958+
result = read_fwf(StringIO(data), use_nullable_dtypes=True)
959+
960+
if string_storage == "python":
961+
arr = StringArray(np.array(["a", "b"], dtype=np.object_))
962+
arr_na = StringArray(np.array([pd.NA, "a"], dtype=np.object_))
963+
else:
964+
import pyarrow as pa
965+
966+
arr = ArrowStringArray(pa.array(["a", "b"]))
967+
arr_na = ArrowStringArray(pa.array([None, "a"]))
968+
969+
expected = DataFrame(
970+
{
971+
"a": pd.Series([1, 3], dtype="Int64"),
972+
"b": pd.Series([2.5, 4.5], dtype="Float64"),
973+
"c": pd.Series([True, False], dtype="boolean"),
974+
"d": arr,
975+
"e": pd.Series([pd.NA, True], dtype="boolean"),
976+
"f": pd.Series([pd.NA, 6], dtype="Int64"),
977+
"g": pd.Series([pd.NA, 7.5], dtype="Float64"),
978+
"h": arr_na,
979+
"i": pd.Series([pd.NA, pd.NA], dtype="Int64"),
980+
}
981+
)
982+
tm.assert_frame_equal(result, expected)

0 commit comments

Comments
 (0)