From 0557b52e6c360569efd7c79ad5a64ddac163c65e Mon Sep 17 00:00:00 2001 From: phofl Date: Sun, 13 Dec 2020 00:55:43 +0100 Subject: [PATCH 1/6] Try fixing inconsistency --- pandas/io/parsers.py | 14 +++++++++++--- 1 file changed, 11 insertions(+), 3 deletions(-) diff --git a/pandas/io/parsers.py b/pandas/io/parsers.py index 5b623c360c3ef..ef2f96a9a7855 100644 --- a/pandas/io/parsers.py +++ b/pandas/io/parsers.py @@ -2512,6 +2512,16 @@ def _exclude_implicit_index(self, alldata): else: data = {k: v for k, v in zip(names, alldata)} + # if self._implicit_index: + # excl_indices = self.index_col + # map(alldata.pop(), excl_indices) + # if len(names) != len(alldata): + # raise ValueError( + # "Number of passed names did not match " + # "number of header fields in the file" + # ) + # data = {k: v for k, v in zip(names, alldata)} + return data # legacy @@ -2694,9 +2704,7 @@ def _infer_columns(self): self._clear_buffer() if names is not None: - if (self.usecols is not None and len(names) != len(self.usecols)) or ( - self.usecols is None and len(names) != len(columns[0]) - ): + if self.usecols is not None and len(names) != len(self.usecols): raise ValueError( "Number of passed names did not match " "number of header fields in the file" From f98f79ab04151f1afb7518da9d6357e1e710cd38 Mon Sep 17 00:00:00 2001 From: phofl Date: Sun, 13 Dec 2020 02:14:31 +0100 Subject: [PATCH 2/6] Fix inconsistency --- pandas/io/parsers.py | 30 +++++++------------------- pandas/tests/io/parser/test_usecols.py | 5 ----- 2 files changed, 8 insertions(+), 27 deletions(-) diff --git a/pandas/io/parsers.py b/pandas/io/parsers.py index ef2f96a9a7855..faf586a84daf6 100644 --- a/pandas/io/parsers.py +++ b/pandas/io/parsers.py @@ -2487,9 +2487,9 @@ def read(self, rows=None): content = content[1:] alldata = self._rows_to_cols(content) - data = self._exclude_implicit_index(alldata) + data, columns = self._exclude_implicit_index(alldata) - columns = self._maybe_dedup_names(self.columns) + columns = self._maybe_dedup_names(columns) columns, data = self._do_date_conversions(columns, data) data = self._convert_data(data) @@ -2500,29 +2500,15 @@ def read(self, rows=None): def _exclude_implicit_index(self, alldata): names = self._maybe_dedup_names(self.orig_names) + offset = 0 if self._implicit_index: excl_indices = self.index_col + offset = len(excl_indices) - data = {} - offset = 0 - for i, col in enumerate(names): - while i + offset in excl_indices: - offset += 1 - data[col] = alldata[i + offset] - else: - data = {k: v for k, v in zip(names, alldata)} - - # if self._implicit_index: - # excl_indices = self.index_col - # map(alldata.pop(), excl_indices) - # if len(names) != len(alldata): - # raise ValueError( - # "Number of passed names did not match " - # "number of header fields in the file" - # ) - # data = {k: v for k, v in zip(names, alldata)} + if self._col_indices is not None and len(names) != len(self._col_indices): + names = [names[i] for i in sorted(self._col_indices)] - return data + return {name: alldata[i + offset] for i, name in enumerate(names)}, names # legacy def get_chunk(self, size=None): @@ -2704,7 +2690,7 @@ def _infer_columns(self): self._clear_buffer() if names is not None: - if self.usecols is not None and len(names) != len(self.usecols): + if len(names) > len(columns[0]): raise ValueError( "Number of passed names did not match " "number of header fields in the file" diff --git a/pandas/tests/io/parser/test_usecols.py b/pandas/tests/io/parser/test_usecols.py index fbf3b0ea7c792..bcb6395e15618 100644 --- a/pandas/tests/io/parser/test_usecols.py +++ b/pandas/tests/io/parser/test_usecols.py @@ -560,11 +560,6 @@ def test_raises_on_usecols_names_mismatch(all_parsers, usecols, kwargs, expected @pytest.mark.parametrize("usecols", [["A", "C"], [0, 2]]) def test_usecols_subset_names_mismatch_orig_columns(all_parsers, usecols, request): - if all_parsers.engine != "c": - reason = "see gh-16469: works on the C engine but not the Python engine" - # Number of passed names did not match number of header fields in the file - request.node.add_marker(pytest.mark.xfail(reason=reason, raises=ValueError)) - data = "a,b,c,d\n1,2,3,4\n5,6,7,8" names = ["A", "B", "C", "D"] parser = all_parsers From ddc1e1ba3feab9a10dba402853e45db240bbd4b6 Mon Sep 17 00:00:00 2001 From: phofl Date: Sun, 13 Dec 2020 03:25:20 +0100 Subject: [PATCH 3/6] Remove function input --- pandas/tests/io/parser/test_usecols.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pandas/tests/io/parser/test_usecols.py b/pandas/tests/io/parser/test_usecols.py index bcb6395e15618..98e5801b3458e 100644 --- a/pandas/tests/io/parser/test_usecols.py +++ b/pandas/tests/io/parser/test_usecols.py @@ -559,7 +559,7 @@ def test_raises_on_usecols_names_mismatch(all_parsers, usecols, kwargs, expected @pytest.mark.parametrize("usecols", [["A", "C"], [0, 2]]) -def test_usecols_subset_names_mismatch_orig_columns(all_parsers, usecols, request): +def test_usecols_subset_names_mismatch_orig_columns(all_parsers, usecols): data = "a,b,c,d\n1,2,3,4\n5,6,7,8" names = ["A", "B", "C", "D"] parser = all_parsers From ecfff70ffb7547a433281035db444ca24ab60182 Mon Sep 17 00:00:00 2001 From: phofl Date: Sun, 13 Dec 2020 12:09:43 +0100 Subject: [PATCH 4/6] Add whatsnew --- doc/source/whatsnew/v1.3.0.rst | 1 + 1 file changed, 1 insertion(+) diff --git a/doc/source/whatsnew/v1.3.0.rst b/doc/source/whatsnew/v1.3.0.rst index d86c1b7911528..26e548f519ecd 100644 --- a/doc/source/whatsnew/v1.3.0.rst +++ b/doc/source/whatsnew/v1.3.0.rst @@ -217,6 +217,7 @@ MultiIndex I/O ^^^ +- Bug in :func:`read_csv` not accepting ``usecols`` with different length than ``names`` for ``engine="python"`` (:issue:`16469`) - Bug in :func:`read_csv` raising ``TypeError`` when ``names`` and ``parse_dates`` is specified for ``engine="c"`` (:issue:`33699`) - From 86f0f97f0af86f42e0000a1cb864440f10dbfcdd Mon Sep 17 00:00:00 2001 From: phofl Date: Sun, 13 Dec 2020 12:14:19 +0100 Subject: [PATCH 5/6] Remove unncesscary function call --- pandas/io/parsers.py | 1 - 1 file changed, 1 deletion(-) diff --git a/pandas/io/parsers.py b/pandas/io/parsers.py index 138ee429ee8c6..d9e46c846d05d 100644 --- a/pandas/io/parsers.py +++ b/pandas/io/parsers.py @@ -2489,7 +2489,6 @@ def read(self, rows=None): alldata = self._rows_to_cols(content) data, columns = self._exclude_implicit_index(alldata) - columns = self._maybe_dedup_names(columns) columns, data = self._do_date_conversions(columns, data) data = self._convert_data(data) From a8bb14a0a244351523b83629a3008c64e68c7eeb Mon Sep 17 00:00:00 2001 From: phofl Date: Sun, 13 Dec 2020 12:16:49 +0100 Subject: [PATCH 6/6] Simplify code --- pandas/io/parsers.py | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/pandas/io/parsers.py b/pandas/io/parsers.py index d9e46c846d05d..8177741b5252d 100644 --- a/pandas/io/parsers.py +++ b/pandas/io/parsers.py @@ -2501,8 +2501,7 @@ def _exclude_implicit_index(self, alldata): offset = 0 if self._implicit_index: - excl_indices = self.index_col - offset = len(excl_indices) + offset = len(self.index_col) if self._col_indices is not None and len(names) != len(self._col_indices): names = [names[i] for i in sorted(self._col_indices)]