From 3c5601a4809adf1b41ebbaab1dfc5b3b736231cf Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Adam=20Mr=C3=B3z?= Date: Sat, 22 Oct 2022 10:36:52 +0200 Subject: [PATCH 1/3] Improve error message about duplicate columns in df.explode --- pandas/core/frame.py | 6 +++++- pandas/tests/frame/methods/test_explode.py | 7 ++++++- 2 files changed, 11 insertions(+), 2 deletions(-) diff --git a/pandas/core/frame.py b/pandas/core/frame.py index d741ca21d8ca7..536cc798ed4fd 100644 --- a/pandas/core/frame.py +++ b/pandas/core/frame.py @@ -8939,7 +8939,11 @@ def explode( 3 4 1 e """ if not self.columns.is_unique: - raise ValueError("columns must be unique") + duplicate_cols = self.columns[self.columns.duplicated()].tolist() + raise ValueError( + "data frame columns must be unique. " + + f"Duplicate columns: {duplicate_cols}" + ) columns: list[Hashable] if is_scalar(column) or isinstance(column, tuple): diff --git a/pandas/tests/frame/methods/test_explode.py b/pandas/tests/frame/methods/test_explode.py index 8716a181120f6..7b565e2b7f996 100644 --- a/pandas/tests/frame/methods/test_explode.py +++ b/pandas/tests/frame/methods/test_explode.py @@ -1,3 +1,5 @@ +import re + import numpy as np import pytest @@ -18,7 +20,10 @@ def test_error(): df.explode(list("AA")) df.columns = list("AA") - with pytest.raises(ValueError, match="columns must be unique"): + with pytest.raises( + ValueError, + match=re.escape("data frame columns must be unique. Duplicate columns: ['A']"), + ): df.explode("A") From a7af8730809002466ec8270324ec786b0da9937b Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Adam=20Mr=C3=B3z?= <505546+a-mroz@users.noreply.github.com> Date: Tue, 25 Oct 2022 22:06:52 +0200 Subject: [PATCH 2/3] Improve error message for non-unique columns Co-authored-by: William Ayd --- pandas/core/frame.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pandas/core/frame.py b/pandas/core/frame.py index 536cc798ed4fd..52e85b86342b6 100644 --- a/pandas/core/frame.py +++ b/pandas/core/frame.py @@ -8941,7 +8941,7 @@ def explode( if not self.columns.is_unique: duplicate_cols = self.columns[self.columns.duplicated()].tolist() raise ValueError( - "data frame columns must be unique. " + "DataFrame columns must be unique. " + f"Duplicate columns: {duplicate_cols}" ) From 45e6e64bbebaaf63aeec853ffe4d539d1dafe054 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Adam=20Mr=C3=B3z?= <505546+a-mroz@users.noreply.github.com> Date: Tue, 25 Oct 2022 22:07:19 +0200 Subject: [PATCH 3/3] Improve error message for non-unique columns Co-authored-by: William Ayd --- pandas/tests/frame/methods/test_explode.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pandas/tests/frame/methods/test_explode.py b/pandas/tests/frame/methods/test_explode.py index 7b565e2b7f996..6d9874dc58c17 100644 --- a/pandas/tests/frame/methods/test_explode.py +++ b/pandas/tests/frame/methods/test_explode.py @@ -22,7 +22,7 @@ def test_error(): df.columns = list("AA") with pytest.raises( ValueError, - match=re.escape("data frame columns must be unique. Duplicate columns: ['A']"), + match=re.escape("DataFrame columns must be unique. Duplicate columns: ['A']"), ): df.explode("A")