diff --git a/pandas/_testing/asserters.py b/pandas/_testing/asserters.py index daa5187cdb636..267b4cd9ef884 100644 --- a/pandas/_testing/asserters.py +++ b/pandas/_testing/asserters.py @@ -1316,6 +1316,20 @@ def assert_frame_equal( lcol = left._ixs(i, axis=1) rcol = right._ixs(i, axis=1) + # Fix for issue #61473: Handle pd.NA values when check_dtype=False + if not check_dtype: + # Normalize both pd.NA and np.nan to the same representation for comparison + # This allows comparison between object and Int32 dtypes with pd.NA + lcol_normalized = lcol.copy() + rcol_normalized = rcol.copy() + + # Replace all null values (pd.NA, np.nan) with a consistent representation + lcol_normalized = lcol_normalized.where(lcol_normalized.notna(), np.nan) + rcol_normalized = rcol_normalized.where(rcol_normalized.notna(), np.nan) + + lcol = lcol_normalized + rcol = rcol_normalized + # GH #38183 # use check_index=False, because we do not want to run # assert_index_equal for each column, diff --git a/pandas/tests/util/test_assert_frame_equal.py b/pandas/tests/util/test_assert_frame_equal.py index ea954756d63c8..1d8a0119e55e0 100644 --- a/pandas/tests/util/test_assert_frame_equal.py +++ b/pandas/tests/util/test_assert_frame_equal.py @@ -3,6 +3,7 @@ import pandas as pd from pandas import DataFrame import pandas._testing as tm +import numpy as np @pytest.fixture(params=[True, False]) @@ -395,3 +396,23 @@ def test_assert_frame_equal_set_mismatch(): msg = r'DataFrame.iloc\[:, 0\] \(column name="set_column"\) values are different' with pytest.raises(AssertionError, match=msg): tm.assert_frame_equal(df1, df2) + + +def test_assert_frame_equal_na_dtype_mismatch(): + # GH#61473 - Test that pd.NA values are handled correctly when check_dtype=False + df1 = DataFrame({"a": [pd.NA, 1, 2]}, dtype="Int64") + df2 = DataFrame({"a": [np.nan, 1, 2]}, dtype="float64") + + # This should pass with our fix + tm.assert_frame_equal(df1, df2, check_dtype=False) + + # This should still fail when check_dtype=True + msg = ( + "Attributes of DataFrame\\.iloc\\[:, 0\\] " + '\\(column name="a"\\) are different\n\n' + 'Attribute "dtype" are different\n' + "\\[left\\]: Int64\n" + "\\[right\\]: float64" + ) + with pytest.raises(AssertionError, match=msg): + tm.assert_frame_equal(df1, df2, check_dtype=True)