diff --git a/doc/source/whatsnew/v2.0.0.rst b/doc/source/whatsnew/v2.0.0.rst index 75ba169600962..f89bcc2c27ed0 100644 --- a/doc/source/whatsnew/v2.0.0.rst +++ b/doc/source/whatsnew/v2.0.0.rst @@ -98,6 +98,7 @@ Other enhancements - Added :meth:`Index.infer_objects` analogous to :meth:`Series.infer_objects` (:issue:`50034`) - Added ``copy`` parameter to :meth:`Series.infer_objects` and :meth:`DataFrame.infer_objects`, passing ``False`` will avoid making copies for series or columns that are already non-object or where no better dtype can be inferred (:issue:`50096`) - :meth:`DataFrame.plot.hist` now recognizes ``xlabel`` and ``ylabel`` arguments (:issue:`49793`) +- Improved error message when trying to align :class:`DataFrame` objects (for example, in :func:`DataFrame.compare`) to clarify that "identically labelled" refers to both index and columns (:issue:`50083`) - .. --------------------------------------------------------------------------- diff --git a/pandas/core/ops/__init__.py b/pandas/core/ops/__init__.py index 0be6a13621ab2..3908422bba523 100644 --- a/pandas/core/ops/__init__.py +++ b/pandas/core/ops/__init__.py @@ -313,7 +313,8 @@ def to_series(right): left, right = left.align(right, join="outer", level=level, copy=False) else: raise ValueError( - "Can only compare identically-labeled DataFrame objects" + "Can only compare identically-labeled (both index and columns) " + "DataFrame objects" ) elif isinstance(right, ABCSeries): # axis=1 is default for DataFrame-with-Series op diff --git a/pandas/tests/frame/methods/test_compare.py b/pandas/tests/frame/methods/test_compare.py index 455acde1af684..fe74ec8077bc9 100644 --- a/pandas/tests/frame/methods/test_compare.py +++ b/pandas/tests/frame/methods/test_compare.py @@ -170,14 +170,20 @@ def test_compare_multi_index(align_axis): def test_compare_unaligned_objects(): # test DataFrames with different indices - msg = "Can only compare identically-labeled DataFrame objects" + msg = ( + r"Can only compare identically-labeled \(both index and columns\) DataFrame " + "objects" + ) with pytest.raises(ValueError, match=msg): df1 = pd.DataFrame([1, 2, 3], index=["a", "b", "c"]) df2 = pd.DataFrame([1, 2, 3], index=["a", "b", "d"]) df1.compare(df2) # test DataFrames with different shapes - msg = "Can only compare identically-labeled DataFrame objects" + msg = ( + r"Can only compare identically-labeled \(both index and columns\) DataFrame " + "objects" + ) with pytest.raises(ValueError, match=msg): df1 = pd.DataFrame(np.ones((3, 3))) df2 = pd.DataFrame(np.zeros((2, 1))) diff --git a/pandas/tests/frame/test_arithmetic.py b/pandas/tests/frame/test_arithmetic.py index d55843603fb63..241e2df377af6 100644 --- a/pandas/tests/frame/test_arithmetic.py +++ b/pandas/tests/frame/test_arithmetic.py @@ -1536,7 +1536,10 @@ def test_comparisons(self, simple_frame, float_frame, func): result3 = func(float_frame, 0) tm.assert_numpy_array_equal(result3.values, func(float_frame.values, 0)) - msg = "Can only compare identically-labeled DataFrame" + msg = ( + r"Can only compare identically-labeled \(both index and columns\) " + "DataFrame objects" + ) with pytest.raises(ValueError, match=msg): func(simple_frame, simple_frame[:2]) diff --git a/pandas/tests/frame/test_nonunique_indexes.py b/pandas/tests/frame/test_nonunique_indexes.py index 4c21446cab375..819a8304769ab 100644 --- a/pandas/tests/frame/test_nonunique_indexes.py +++ b/pandas/tests/frame/test_nonunique_indexes.py @@ -201,7 +201,10 @@ def test_dup_columns_comparisons(self): df2 = DataFrame([[0, 1], [2, 4], [2, np.nan], [4, 5]], columns=["A", "A"]) # not-comparing like-labelled - msg = "Can only compare identically-labeled DataFrame objects" + msg = ( + r"Can only compare identically-labeled \(both index and columns\) " + "DataFrame objects" + ) with pytest.raises(ValueError, match=msg): df1 == df2 diff --git a/pandas/tests/series/test_arithmetic.py b/pandas/tests/series/test_arithmetic.py index 9d8cf9787ad3f..27410a626811c 100644 --- a/pandas/tests/series/test_arithmetic.py +++ b/pandas/tests/series/test_arithmetic.py @@ -643,10 +643,19 @@ def test_ne(self): ) def test_comp_ops_df_compat(self, left, right, frame_or_series): # GH 1134 - msg = f"Can only compare identically-labeled {frame_or_series.__name__} objects" + # GH 50083 to clarify that index and columns must be identically labeled if frame_or_series is not Series: + msg = ( + rf"Can only compare identically-labeled \(both index and columns\) " + f"{frame_or_series.__name__} objects" + ) left = left.to_frame() right = right.to_frame() + else: + msg = ( + f"Can only compare identically-labeled {frame_or_series.__name__} " + f"objects" + ) with pytest.raises(ValueError, match=msg): left == right