From f0c52a3a09ce258479e646c7a273fa62529e1636 Mon Sep 17 00:00:00 2001 From: lpc Date: Thu, 12 Nov 2020 23:49:13 +0100 Subject: [PATCH 1/4] BUG: skip_blank_lines ignored by read_fwf GH37758 overriding PythonParser._remove_empty_lines in FixedWidthFieldParser as empty line description differs --- pandas/io/parsers.py | 13 +++++++++++++ 1 file changed, 13 insertions(+) diff --git a/pandas/io/parsers.py b/pandas/io/parsers.py index 5725e2304e1d2..5067046ccbd5d 100644 --- a/pandas/io/parsers.py +++ b/pandas/io/parsers.py @@ -3769,6 +3769,19 @@ def _make_reader(self, f): self.infer_nrows, ) + def _remove_empty_lines(self, lines) -> List: + """ + Returns the list of lines without the empty ones. With fixed-width + fields, empty lines become arrays of empty strings. + + See PythonParser._remove_empty_lines. + """ + + def _keep(line): + return any(not isinstance(e, str) or e.strip() for e in line) + + return list(filter(_keep, lines)) + def _refine_defaults_read( dialect: Union[str, csv.Dialect], From b27484e82e1a23aabf12cce00d84ef7a5f18d362 Mon Sep 17 00:00:00 2001 From: lpc Date: Fri, 13 Nov 2020 00:13:29 +0100 Subject: [PATCH 2/4] BUG: skip_blank_lines ignored by read_fwf added a test --- pandas/tests/io/parser/test_read_fwf.py | 45 +++++++++++++++++++++++++ 1 file changed, 45 insertions(+) diff --git a/pandas/tests/io/parser/test_read_fwf.py b/pandas/tests/io/parser/test_read_fwf.py index 4796cf0b79fae..5e9609956183b 100644 --- a/pandas/tests/io/parser/test_read_fwf.py +++ b/pandas/tests/io/parser/test_read_fwf.py @@ -340,6 +340,51 @@ def test_fwf_comment(comment): tm.assert_almost_equal(result, expected) +def test_fwf_skip_blank_lines(): + data = """ + +A B C D + +201158 360.242940 149.910199 11950.7 +201159 444.953632 166.985655 11788.4 + + +201162 502.953953 173.237159 12468.3 + +""" + result = read_fwf(StringIO(data), skip_blank_lines=True) + expected = DataFrame( + [ + [201158, 360.242940, 149.910199, 11950.7], + [201159, 444.953632, 166.985655, 11788.4], + [201162, 502.953953, 173.237159, 12468.3], + ], + columns=["A", "B", "C", "D"], + ) + tm.assert_frame_equal(result, expected) + + data = """\ +A B C D +201158 360.242940 149.910199 11950.7 +201159 444.953632 166.985655 11788.4 + + +201162 502.953953 173.237159 12468.3 +""" + result = read_fwf(StringIO(data), skip_blank_lines=False) + expected = DataFrame( + [ + [201158, 360.242940, 149.910199, 11950.7], + [201159, 444.953632, 166.985655, 11788.4], + [np.nan, np.nan, np.nan, np.nan], + [np.nan, np.nan, np.nan, np.nan], + [201162, 502.953953, 173.237159, 12468.3], + ], + columns=["A", "B", "C", "D"], + ) + tm.assert_frame_equal(result, expected) + + @pytest.mark.parametrize("thousands", [",", "#", "~"]) def test_fwf_thousands(thousands): data = """\ From 73984ac9e2fe0b83c8fe5f2d9a7f232768065c6d Mon Sep 17 00:00:00 2001 From: lpc Date: Fri, 13 Nov 2020 00:32:53 +0100 Subject: [PATCH 3/4] BUG: skip_blank_lines ignored by read_fwf whatsnew notice --- doc/source/whatsnew/v1.2.0.rst | 1 + 1 file changed, 1 insertion(+) diff --git a/doc/source/whatsnew/v1.2.0.rst b/doc/source/whatsnew/v1.2.0.rst index f751a91cecf19..659b072ec8ddb 100644 --- a/doc/source/whatsnew/v1.2.0.rst +++ b/doc/source/whatsnew/v1.2.0.rst @@ -509,6 +509,7 @@ I/O - :func:`read_csv` was closing user-provided binary file handles when ``engine="c"`` and an ``encoding`` was requested (:issue:`36980`) - Bug in :meth:`DataFrame.to_hdf` was not dropping missing rows with ``dropna=True`` (:issue:`35719`) - Bug in :func:`read_html` was raising a ``TypeError`` when supplying a ``pathlib.Path`` argument to the ``io`` parameter (:issue:`37705`) +- Bug in :meth:`read_fw` was not skipping blank lines (even with ``skip_blank_lines=True``) (:issue:`37758`) Plotting ^^^^^^^^ From dc895d56d72ceaba7b2a469cf37717b03d06a164 Mon Sep 17 00:00:00 2001 From: lpc Date: Fri, 13 Nov 2020 01:31:32 +0100 Subject: [PATCH 4/4] replaced filter usage --- pandas/io/parsers.py | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/pandas/io/parsers.py b/pandas/io/parsers.py index 5067046ccbd5d..a707c74831126 100644 --- a/pandas/io/parsers.py +++ b/pandas/io/parsers.py @@ -3776,11 +3776,11 @@ def _remove_empty_lines(self, lines) -> List: See PythonParser._remove_empty_lines. """ - - def _keep(line): - return any(not isinstance(e, str) or e.strip() for e in line) - - return list(filter(_keep, lines)) + return [ + line + for line in lines + if any(not isinstance(e, str) or e.strip() for e in line) + ] def _refine_defaults_read(