diff --git a/pandas/core/internals/concat.py b/pandas/core/internals/concat.py index c6f30ef65e9d5..c75373b82305c 100644 --- a/pandas/core/internals/concat.py +++ b/pandas/core/internals/concat.py @@ -350,7 +350,7 @@ def _get_empty_dtype_and_na(join_units): dtype = upcast_classes["datetimetz"] return dtype[0], tslibs.NaT elif "datetime" in upcast_classes: - return np.dtype("M8[ns]"), tslibs.iNaT + return np.dtype("M8[ns]"), np.datetime64("NaT", "ns") elif "timedelta" in upcast_classes: return np.dtype("m8[ns]"), np.timedelta64("NaT", "ns") else: # pragma diff --git a/pandas/tests/reshape/merge/test_merge.py b/pandas/tests/reshape/merge/test_merge.py index 30c440035d48e..f9acf5b60a3cd 100644 --- a/pandas/tests/reshape/merge/test_merge.py +++ b/pandas/tests/reshape/merge/test_merge.py @@ -2152,3 +2152,20 @@ def test_merge_multiindex_columns(): expected["id"] = "" tm.assert_frame_equal(result, expected) + + +def test_merge_datetime_upcast_dtype(): + # https://github.com/pandas-dev/pandas/issues/31208 + df1 = pd.DataFrame({"x": ["a", "b", "c"], "y": ["1", "2", "4"]}) + df2 = pd.DataFrame( + {"y": ["1", "2", "3"], "z": pd.to_datetime(["2000", "2001", "2002"])} + ) + result = pd.merge(df1, df2, how="left", on="y") + expected = pd.DataFrame( + { + "x": ["a", "b", "c"], + "y": ["1", "2", "4"], + "z": pd.to_datetime(["2000", "2001", "NaT"]), + } + ) + tm.assert_frame_equal(result, expected)