From 16a78449cd9301ea40dcbebbd3c7a0f552aad57b Mon Sep 17 00:00:00 2001 From: PCerles Date: Mon, 5 Oct 2020 11:53:27 -0700 Subject: [PATCH 01/17] possible fix for missing datetime data types --- pandas/core/reshape/merge.py | 9 ++++++--- 1 file changed, 6 insertions(+), 3 deletions(-) diff --git a/pandas/core/reshape/merge.py b/pandas/core/reshape/merge.py index 5012be593820e..69f26a68bb1c4 100644 --- a/pandas/core/reshape/merge.py +++ b/pandas/core/reshape/merge.py @@ -831,11 +831,14 @@ def _maybe_add_join_keys(self, result, left_indexer, right_indexer): # if we have an all missing left_indexer # make sure to just use the right values - mask = left_indexer == -1 - if mask.all(): + mask_left = left_indexer == -1 + mask_right = right_indexer == -1 + if mask_left.all(): key_col = rvals + elif mask_right.all(): + key_col = lvals else: - key_col = Index(lvals).where(~mask, rvals) + key_col = Index(lvals).where(~mask_left, rvals) if result._is_label_reference(name): result[name] = key_col From 029447d7b77c6f0c45132449c95a8ca5bd9eb3eb Mon Sep 17 00:00:00 2001 From: PCerles Date: Mon, 5 Oct 2020 12:18:39 -0700 Subject: [PATCH 02/17] update comment --- pandas/core/reshape/merge.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pandas/core/reshape/merge.py b/pandas/core/reshape/merge.py index 69f26a68bb1c4..516ae90360be7 100644 --- a/pandas/core/reshape/merge.py +++ b/pandas/core/reshape/merge.py @@ -830,7 +830,7 @@ def _maybe_add_join_keys(self, result, left_indexer, right_indexer): rvals = algos.take_1d(take_right, right_indexer, fill_value=rfill) # if we have an all missing left_indexer - # make sure to just use the right values + # make sure to just use the right values or vice-versa mask_left = left_indexer == -1 mask_right = right_indexer == -1 if mask_left.all(): From 30d0c902883fab85ad3957b08c747596e2e7c905 Mon Sep 17 00:00:00 2001 From: PCerles Date: Mon, 5 Oct 2020 14:55:57 -0700 Subject: [PATCH 03/17] added test --- pandas/tests/reshape/merge/test_multi.py | 32 ++++++++++++++++++++++++ 1 file changed, 32 insertions(+) diff --git a/pandas/tests/reshape/merge/test_multi.py b/pandas/tests/reshape/merge/test_multi.py index b1922241c7843..127452afc58c2 100644 --- a/pandas/tests/reshape/merge/test_multi.py +++ b/pandas/tests/reshape/merge/test_multi.py @@ -457,6 +457,38 @@ def test_merge_na_keys(self): tm.assert_frame_equal(result, expected) + def test_merge_na_datetime_keys_empty_df(self): + data = [ + [pd.Timestamp("1950-01-01"), "A", 1.5], + [pd.Timestamp("1950-01-01"), "B", 1.5], + [pd.Timestamp("1950-01-01"), "B", 1.5], + [pd.Timestamp("1950-01-01"), "B", np.nan], + [pd.Timestamp("1950-01-01"), "B", 4.0], + [pd.Timestamp("1950-01-01"), "C", 4.0], + [pd.Timestamp("1950-01-01"), "C", np.nan], + [pd.Timestamp("1950-01-01"), "C", 3.0], + [pd.Timestamp("1950-01-01"), "C", 4.0], + ] + + frame = DataFrame(data, columns=["date", "panel", "data"]).set_index( + ["date", "panel"] + ) + + other_data = [] + other = DataFrame(other_data, columns=["date", "panel", "state"]).set_index( + ["date", "panel"] + ) + + expected = DataFrame([], columns=["date", "panel", "data", "state"]) + expected[["date", "panel", "data"]] = frame.reset_index()[ + ["date", "panel", "data"] + ] + expected = expected.set_index(["date", "panel"]) + + result = frame.merge(other, how="left", on=["date", "panel"]) + + tm.assert_frame_equal(result, expected) + @pytest.mark.parametrize("klass", [None, np.asarray, Series, Index]) def test_merge_datetime_index(self, klass): # see gh-19038 From 08d0ef1df54d9c5948c5615c6561735aa836be8c Mon Sep 17 00:00:00 2001 From: PCerles Date: Mon, 5 Oct 2020 14:56:31 -0700 Subject: [PATCH 04/17] updated test --- pandas/tests/reshape/merge/test_multi.py | 16 ++++++++-------- 1 file changed, 8 insertions(+), 8 deletions(-) diff --git a/pandas/tests/reshape/merge/test_multi.py b/pandas/tests/reshape/merge/test_multi.py index 127452afc58c2..63b3ba40a5752 100644 --- a/pandas/tests/reshape/merge/test_multi.py +++ b/pandas/tests/reshape/merge/test_multi.py @@ -460,14 +460,14 @@ def test_merge_na_keys(self): def test_merge_na_datetime_keys_empty_df(self): data = [ [pd.Timestamp("1950-01-01"), "A", 1.5], - [pd.Timestamp("1950-01-01"), "B", 1.5], - [pd.Timestamp("1950-01-01"), "B", 1.5], - [pd.Timestamp("1950-01-01"), "B", np.nan], - [pd.Timestamp("1950-01-01"), "B", 4.0], - [pd.Timestamp("1950-01-01"), "C", 4.0], - [pd.Timestamp("1950-01-01"), "C", np.nan], - [pd.Timestamp("1950-01-01"), "C", 3.0], - [pd.Timestamp("1950-01-01"), "C", 4.0], + [pd.Timestamp("1950-01-02"), "B", 1.5], + [pd.Timestamp("1950-01-03"), "B", 1.5], + [pd.Timestamp("1950-01-04"), "B", np.nan], + [pd.Timestamp("1950-01-05"), "B", 4.0], + [pd.Timestamp("1950-01-06"), "C", 4.0], + [pd.Timestamp("1950-01-07"), "C", np.nan], + [pd.Timestamp("1950-01-08"), "C", 3.0], + [pd.Timestamp("1950-01-09"), "C", 4.0], ] frame = DataFrame(data, columns=["date", "panel", "data"]).set_index( From 3da79a4f66ca7074e5d61efb8bc6dd9808db794f Mon Sep 17 00:00:00 2001 From: PCerles Date: Mon, 5 Oct 2020 15:04:01 -0700 Subject: [PATCH 05/17] added release note --- doc/source/whatsnew/v1.2.0.rst | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/doc/source/whatsnew/v1.2.0.rst b/doc/source/whatsnew/v1.2.0.rst index 6f137302d4994..4499d640c5e2b 100644 --- a/doc/source/whatsnew/v1.2.0.rst +++ b/doc/source/whatsnew/v1.2.0.rst @@ -467,7 +467,7 @@ MultiIndex - Bug in :meth:`DataFrame.xs` when used with :class:`IndexSlice` raises ``TypeError`` with message ``"Expected label or tuple of labels"`` (:issue:`35301`) - Bug in :meth:`DataFrame.reset_index` with ``NaT`` values in index raises ``ValueError`` with message ``"cannot convert float NaN to integer"`` (:issue:`36541`) -- +- Fixed regression in :func:`merge` on merging datetime index with empty DF (:issue:`36895`) I/O ^^^ From b3a52611e44b610f59c8eb4809e3f002d9e5f877 Mon Sep 17 00:00:00 2001 From: PCerles Date: Tue, 6 Oct 2020 12:51:24 -0700 Subject: [PATCH 06/17] restructured test to not rely on reset_index --- pandas/tests/reshape/merge/test_multi.py | 22 ++++++++++++++-------- 1 file changed, 14 insertions(+), 8 deletions(-) diff --git a/pandas/tests/reshape/merge/test_multi.py b/pandas/tests/reshape/merge/test_multi.py index 63b3ba40a5752..36f029675b91a 100644 --- a/pandas/tests/reshape/merge/test_multi.py +++ b/pandas/tests/reshape/merge/test_multi.py @@ -457,7 +457,7 @@ def test_merge_na_keys(self): tm.assert_frame_equal(result, expected) - def test_merge_na_datetime_keys_empty_df(self): + def test_merge_datetime_index_empty_df(self): data = [ [pd.Timestamp("1950-01-01"), "A", 1.5], [pd.Timestamp("1950-01-02"), "B", 1.5], @@ -473,16 +473,22 @@ def test_merge_na_datetime_keys_empty_df(self): frame = DataFrame(data, columns=["date", "panel", "data"]).set_index( ["date", "panel"] ) - - other_data = [] - other = DataFrame(other_data, columns=["date", "panel", "state"]).set_index( + other = DataFrame(columns=["date", "panel", "state"]).set_index( ["date", "panel"] ) - - expected = DataFrame([], columns=["date", "panel", "data", "state"]) - expected[["date", "panel", "data"]] = frame.reset_index()[ - ["date", "panel", "data"] + expected_data = [ + [pd.Timestamp("1950-01-01"), "A", 1.5, pd.NA], + [pd.Timestamp("1950-01-02"), "B", 1.5, pd.NA], + [pd.Timestamp("1950-01-03"), "B", 1.5, pd.NA], + [pd.Timestamp("1950-01-04"), "B", np.nan, pd.NA], + [pd.Timestamp("1950-01-05"), "B", 4.0, pd.NA], + [pd.Timestamp("1950-01-06"), "C", 4.0, pd.NA], + [pd.Timestamp("1950-01-07"), "C", np.nan, pd.NA], + [pd.Timestamp("1950-01-08"), "C", 3.0, pd.NA], + [pd.Timestamp("1950-01-09"), "C", 4.0, pd.NA], ] + + expected = DataFrame(expected_data, columns=["date", "panel", "data", "state"]) expected = expected.set_index(["date", "panel"]) result = frame.merge(other, how="left", on=["date", "panel"]) From d85af3049ee092198a254fd2d487b18afdf33469 Mon Sep 17 00:00:00 2001 From: PCerles Date: Tue, 6 Oct 2020 12:52:12 -0700 Subject: [PATCH 07/17] and make data smaller --- pandas/tests/reshape/merge/test_multi.py | 14 -------------- 1 file changed, 14 deletions(-) diff --git a/pandas/tests/reshape/merge/test_multi.py b/pandas/tests/reshape/merge/test_multi.py index 36f029675b91a..412c19c8f1b70 100644 --- a/pandas/tests/reshape/merge/test_multi.py +++ b/pandas/tests/reshape/merge/test_multi.py @@ -461,13 +461,6 @@ def test_merge_datetime_index_empty_df(self): data = [ [pd.Timestamp("1950-01-01"), "A", 1.5], [pd.Timestamp("1950-01-02"), "B", 1.5], - [pd.Timestamp("1950-01-03"), "B", 1.5], - [pd.Timestamp("1950-01-04"), "B", np.nan], - [pd.Timestamp("1950-01-05"), "B", 4.0], - [pd.Timestamp("1950-01-06"), "C", 4.0], - [pd.Timestamp("1950-01-07"), "C", np.nan], - [pd.Timestamp("1950-01-08"), "C", 3.0], - [pd.Timestamp("1950-01-09"), "C", 4.0], ] frame = DataFrame(data, columns=["date", "panel", "data"]).set_index( @@ -479,13 +472,6 @@ def test_merge_datetime_index_empty_df(self): expected_data = [ [pd.Timestamp("1950-01-01"), "A", 1.5, pd.NA], [pd.Timestamp("1950-01-02"), "B", 1.5, pd.NA], - [pd.Timestamp("1950-01-03"), "B", 1.5, pd.NA], - [pd.Timestamp("1950-01-04"), "B", np.nan, pd.NA], - [pd.Timestamp("1950-01-05"), "B", 4.0, pd.NA], - [pd.Timestamp("1950-01-06"), "C", 4.0, pd.NA], - [pd.Timestamp("1950-01-07"), "C", np.nan, pd.NA], - [pd.Timestamp("1950-01-08"), "C", 3.0, pd.NA], - [pd.Timestamp("1950-01-09"), "C", 4.0, pd.NA], ] expected = DataFrame(expected_data, columns=["date", "panel", "data", "state"]) From 70651d1b9913657b4d5c9a2764b249f106ce10c5 Mon Sep 17 00:00:00 2001 From: PCerles Date: Tue, 6 Oct 2020 13:50:32 -0700 Subject: [PATCH 08/17] reformat from pd.NA --- pandas/tests/reshape/merge/test_multi.py | 23 +++++++++++++---------- 1 file changed, 13 insertions(+), 10 deletions(-) diff --git a/pandas/tests/reshape/merge/test_multi.py b/pandas/tests/reshape/merge/test_multi.py index 412c19c8f1b70..95da9d7424821 100644 --- a/pandas/tests/reshape/merge/test_multi.py +++ b/pandas/tests/reshape/merge/test_multi.py @@ -458,23 +458,26 @@ def test_merge_na_keys(self): tm.assert_frame_equal(result, expected) def test_merge_datetime_index_empty_df(self): - data = [ - [pd.Timestamp("1950-01-01"), "A", 1.5], - [pd.Timestamp("1950-01-02"), "B", 1.5], - ] - frame = DataFrame(data, columns=["date", "panel", "data"]).set_index( + date = np.array( + [pd.Timestamp("1950-01-01"), pd.Timestamp("1950-01-02")], + dtype=np.datetime64, + ) + panel = np.array(["A", "B"], dtype=object) + data = np.array([1.5, 1.5], dtype=np.float64) + + frame = DataFrame({"date": date, "panel": panel, "data": data}).set_index( ["date", "panel"] ) other = DataFrame(columns=["date", "panel", "state"]).set_index( ["date", "panel"] ) - expected_data = [ - [pd.Timestamp("1950-01-01"), "A", 1.5, pd.NA], - [pd.Timestamp("1950-01-02"), "B", 1.5, pd.NA], - ] - expected = DataFrame(expected_data, columns=["date", "panel", "data", "state"]) + state = np.array([np.nan, np.nan], dtype=object) + + expected = DataFrame( + {"date": date, "panel": panel, "data": data, "state": state} + ) expected = expected.set_index(["date", "panel"]) result = frame.merge(other, how="left", on=["date", "panel"]) From a70c0ff3671e69469ea7778e3885b48ac024d9ba Mon Sep 17 00:00:00 2001 From: PCerles Date: Tue, 27 Oct 2020 10:50:52 -0700 Subject: [PATCH 09/17] test fixes --- pandas/tests/reshape/merge/test_multi.py | 57 ++++++++++++------------ 1 file changed, 28 insertions(+), 29 deletions(-) diff --git a/pandas/tests/reshape/merge/test_multi.py b/pandas/tests/reshape/merge/test_multi.py index 95da9d7424821..659750aa9d14e 100644 --- a/pandas/tests/reshape/merge/test_multi.py +++ b/pandas/tests/reshape/merge/test_multi.py @@ -1,11 +1,11 @@ import numpy as np -import pytest - +from numpy.random import randn import pandas as pd from pandas import DataFrame, Index, MultiIndex, Series import pandas._testing as tm from pandas.core.reshape.concat import concat from pandas.core.reshape.merge import merge +import pytest @pytest.fixture @@ -457,33 +457,6 @@ def test_merge_na_keys(self): tm.assert_frame_equal(result, expected) - def test_merge_datetime_index_empty_df(self): - - date = np.array( - [pd.Timestamp("1950-01-01"), pd.Timestamp("1950-01-02")], - dtype=np.datetime64, - ) - panel = np.array(["A", "B"], dtype=object) - data = np.array([1.5, 1.5], dtype=np.float64) - - frame = DataFrame({"date": date, "panel": panel, "data": data}).set_index( - ["date", "panel"] - ) - other = DataFrame(columns=["date", "panel", "state"]).set_index( - ["date", "panel"] - ) - - state = np.array([np.nan, np.nan], dtype=object) - - expected = DataFrame( - {"date": date, "panel": panel, "data": data, "state": state} - ) - expected = expected.set_index(["date", "panel"]) - - result = frame.merge(other, how="left", on=["date", "panel"]) - - tm.assert_frame_equal(result, expected) - @pytest.mark.parametrize("klass", [None, np.asarray, Series, Index]) def test_merge_datetime_index(self, klass): # see gh-19038 @@ -863,3 +836,29 @@ def test_join_multi_wrong_order(self): ) tm.assert_frame_equal(result, expected) + + +def test_merge_datetime_index_empty_df(): + + frame = DataFrame( + { + "date": [pd.Timestamp("1950-01-01"), pd.Timestamp("1950-01-02")], + "panel": ["A", "B"], + "data": [1.5, 1.5], + } + ).set_index(["date", "panel"]) + other = DataFrame(columns=["date", "panel", "state"]).set_index(["date", "panel"]) + + expected = DataFrame( + { + "date": [pd.Timestamp("1950-01-01"), pd.Timestamp("1950-01-02")], + "panel": ["A", "B"], + "data": [1.5, 1.5], + "state": [None, None], + } + ) + expected = expected.set_index(["date", "panel"]) + + result = frame.merge(other, how="left", on=["date", "panel"]) + + tm.assert_frame_equal(result, expected) From ce3a1fa6819fcce24c8e1cf4d91339e320bc8eb8 Mon Sep 17 00:00:00 2001 From: PCerles Date: Tue, 27 Oct 2020 10:56:13 -0700 Subject: [PATCH 10/17] structure --- doc/source/whatsnew/v1.2.0.rst | 11 +++++++++-- 1 file changed, 9 insertions(+), 2 deletions(-) diff --git a/doc/source/whatsnew/v1.2.0.rst b/doc/source/whatsnew/v1.2.0.rst index 4499d640c5e2b..b4d8fc0d1b41e 100644 --- a/doc/source/whatsnew/v1.2.0.rst +++ b/doc/source/whatsnew/v1.2.0.rst @@ -343,7 +343,6 @@ Deprecations .. --------------------------------------------------------------------------- - .. _whatsnew_120.performance: Performance improvements @@ -366,6 +365,15 @@ Performance improvements .. --------------------------------------------------------------------------- +.. _whatsnew_120.regressions: + +Fixed regressions +~~~~~~~~~~~~~~~~~ +- Fixed regression in :func:`merge` on merging datetime index with empty DF (:issue:`36895`) + +.. --------------------------------------------------------------------------- + + .. _whatsnew_120.bug_fixes: Bug fixes @@ -467,7 +475,6 @@ MultiIndex - Bug in :meth:`DataFrame.xs` when used with :class:`IndexSlice` raises ``TypeError`` with message ``"Expected label or tuple of labels"`` (:issue:`35301`) - Bug in :meth:`DataFrame.reset_index` with ``NaT`` values in index raises ``ValueError`` with message ``"cannot convert float NaN to integer"`` (:issue:`36541`) -- Fixed regression in :func:`merge` on merging datetime index with empty DF (:issue:`36895`) I/O ^^^ From 83be75e95dc0905628f6fd99aacc515b1e77661c Mon Sep 17 00:00:00 2001 From: PCerles Date: Tue, 27 Oct 2020 10:59:29 -0700 Subject: [PATCH 11/17] comma missing --- doc/source/whatsnew/v1.2.0.rst | 1 + 1 file changed, 1 insertion(+) diff --git a/doc/source/whatsnew/v1.2.0.rst b/doc/source/whatsnew/v1.2.0.rst index b4d8fc0d1b41e..b30e4ec87eab6 100644 --- a/doc/source/whatsnew/v1.2.0.rst +++ b/doc/source/whatsnew/v1.2.0.rst @@ -475,6 +475,7 @@ MultiIndex - Bug in :meth:`DataFrame.xs` when used with :class:`IndexSlice` raises ``TypeError`` with message ``"Expected label or tuple of labels"`` (:issue:`35301`) - Bug in :meth:`DataFrame.reset_index` with ``NaT`` values in index raises ``ValueError`` with message ``"cannot convert float NaN to integer"`` (:issue:`36541`) +- I/O ^^^ From fe1335e5aefc08fec65cbe062b6b756fb22f4da7 Mon Sep 17 00:00:00 2001 From: PCerles Date: Tue, 27 Oct 2020 14:59:36 -0700 Subject: [PATCH 12/17] fix import and explicitly construct multi indexes --- pandas/tests/reshape/merge/test_multi.py | 36 +++++++++++++++--------- 1 file changed, 22 insertions(+), 14 deletions(-) diff --git a/pandas/tests/reshape/merge/test_multi.py b/pandas/tests/reshape/merge/test_multi.py index 659750aa9d14e..4f7d498480865 100644 --- a/pandas/tests/reshape/merge/test_multi.py +++ b/pandas/tests/reshape/merge/test_multi.py @@ -1,11 +1,12 @@ import numpy as np from numpy.random import randn +import pytest + import pandas as pd from pandas import DataFrame, Index, MultiIndex, Series import pandas._testing as tm from pandas.core.reshape.concat import concat from pandas.core.reshape.merge import merge -import pytest @pytest.fixture @@ -840,25 +841,32 @@ def test_join_multi_wrong_order(self): def test_merge_datetime_index_empty_df(): + midx1 = pd.MultiIndex.from_tuples( + [[pd.Timestamp("1950-01-01"), "A"], [pd.Timestamp("1950-01-02"), "B"]], + names=["date", "panel"], + ) frame = DataFrame( - { - "date": [pd.Timestamp("1950-01-01"), pd.Timestamp("1950-01-02")], - "panel": ["A", "B"], + data={ "data": [1.5, 1.5], - } - ).set_index(["date", "panel"]) - other = DataFrame(columns=["date", "panel", "state"]).set_index(["date", "panel"]) + }, + index=midx1, + ) + + midx2 = pd.MultiIndex.from_tuples([], names=["date", "panel"]) + + other = DataFrame(index=midx2, columns=["state"]) + + midx3 = pd.MultiIndex.from_tuples( + [[pd.Timestamp("1950-01-01"), "A"], [pd.Timestamp("1950-01-02"), "B"]], + names=["date", "panel"], + ) expected = DataFrame( - { - "date": [pd.Timestamp("1950-01-01"), pd.Timestamp("1950-01-02")], - "panel": ["A", "B"], + data={ "data": [1.5, 1.5], "state": [None, None], - } + }, + index=midx3, ) - expected = expected.set_index(["date", "panel"]) - result = frame.merge(other, how="left", on=["date", "panel"]) - tm.assert_frame_equal(result, expected) From e5045f315c6a145d1fa134eb749fd6919703fe75 Mon Sep 17 00:00:00 2001 From: PCerles Date: Mon, 2 Nov 2020 13:33:37 -0800 Subject: [PATCH 13/17] changed 1.2 release notes and added right/left tests --- doc/source/whatsnew/v1.2.0.rst | 11 +-- pandas/tests/reshape/merge/test_multi.py | 86 ++++++++++++++---------- 2 files changed, 52 insertions(+), 45 deletions(-) diff --git a/doc/source/whatsnew/v1.2.0.rst b/doc/source/whatsnew/v1.2.0.rst index b30e4ec87eab6..e89a68aa79fd0 100644 --- a/doc/source/whatsnew/v1.2.0.rst +++ b/doc/source/whatsnew/v1.2.0.rst @@ -365,15 +365,6 @@ Performance improvements .. --------------------------------------------------------------------------- -.. _whatsnew_120.regressions: - -Fixed regressions -~~~~~~~~~~~~~~~~~ -- Fixed regression in :func:`merge` on merging datetime index with empty DF (:issue:`36895`) - -.. --------------------------------------------------------------------------- - - .. _whatsnew_120.bug_fixes: Bug fixes @@ -475,7 +466,7 @@ MultiIndex - Bug in :meth:`DataFrame.xs` when used with :class:`IndexSlice` raises ``TypeError`` with message ``"Expected label or tuple of labels"`` (:issue:`35301`) - Bug in :meth:`DataFrame.reset_index` with ``NaT`` values in index raises ``ValueError`` with message ``"cannot convert float NaN to integer"`` (:issue:`36541`) -- +- Fixed regression in :func:`merge` on merging DatetimeIndex with empty DataFrame (:issue:`36895`) I/O ^^^ diff --git a/pandas/tests/reshape/merge/test_multi.py b/pandas/tests/reshape/merge/test_multi.py index 4f7d498480865..464430de1c699 100644 --- a/pandas/tests/reshape/merge/test_multi.py +++ b/pandas/tests/reshape/merge/test_multi.py @@ -1,12 +1,11 @@ import numpy as np from numpy.random import randn -import pytest - import pandas as pd from pandas import DataFrame, Index, MultiIndex, Series import pandas._testing as tm from pandas.core.reshape.concat import concat from pandas.core.reshape.merge import merge +import pytest @pytest.fixture @@ -482,6 +481,56 @@ def test_merge_datetime_index(self, klass): result = df.merge(df, on=[df.index.year], how="inner") tm.assert_frame_equal(result, expected) + def test_merge_datetime_multi_index_empty_df(self): + + midx1 = pd.MultiIndex.from_tuples( + [[pd.Timestamp("1950-01-01"), "A"], [pd.Timestamp("1950-01-02"), "B"]], + names=["date", "panel"], + ) + left = DataFrame( + data={ + "data": [1.5, 1.5], + }, + index=midx1, + ) + + midx2 = pd.MultiIndex.from_tuples([], names=["date", "panel"]) + + right = DataFrame(index=midx2, columns=["state"]) + + midx3 = pd.MultiIndex.from_tuples( + [[pd.Timestamp("1950-01-01"), "A"], [pd.Timestamp("1950-01-02"), "B"]], + names=["date", "panel"], + ) + + expected_left_merge = DataFrame( + data={ + "data": [1.5, 1.5], + "state": [None, None], + }, + index=midx3, + ) + + expected_right_merge = DataFrame( + data={ + "state": [None, None], + "data": [1.5, 1.5], + }, + index=midx3, + ) + + result_left_merge = left.merge(right, how="left", on=["date", "panel"]) + tm.assert_frame_equal(result_left_merge, expected_left_merge) + + result_right_merge = right.merge(left, how="right", on=["date", "panel"]) + tm.assert_frame_equal(result_right_merge, expected_right_merge) + + result_left_join = left.join(right, how="left") + tm.assert_frame_equal(result_left_join, expected_left_merge) + + result_right_join = right.join(left, how="right") + tm.assert_frame_equal(result_right_join, expected_right_merge) + def test_join_multi_levels(self): # GH 3662 @@ -837,36 +886,3 @@ def test_join_multi_wrong_order(self): ) tm.assert_frame_equal(result, expected) - - -def test_merge_datetime_index_empty_df(): - - midx1 = pd.MultiIndex.from_tuples( - [[pd.Timestamp("1950-01-01"), "A"], [pd.Timestamp("1950-01-02"), "B"]], - names=["date", "panel"], - ) - frame = DataFrame( - data={ - "data": [1.5, 1.5], - }, - index=midx1, - ) - - midx2 = pd.MultiIndex.from_tuples([], names=["date", "panel"]) - - other = DataFrame(index=midx2, columns=["state"]) - - midx3 = pd.MultiIndex.from_tuples( - [[pd.Timestamp("1950-01-01"), "A"], [pd.Timestamp("1950-01-02"), "B"]], - names=["date", "panel"], - ) - - expected = DataFrame( - data={ - "data": [1.5, 1.5], - "state": [None, None], - }, - index=midx3, - ) - result = frame.merge(other, how="left", on=["date", "panel"]) - tm.assert_frame_equal(result, expected) From 9fb068767556467ee99d6fe7f74ad54f03212a30 Mon Sep 17 00:00:00 2001 From: PCerles Date: Mon, 2 Nov 2020 13:35:14 -0800 Subject: [PATCH 14/17] accidentally removed newline --- doc/source/whatsnew/v1.2.0.rst | 1 + 1 file changed, 1 insertion(+) diff --git a/doc/source/whatsnew/v1.2.0.rst b/doc/source/whatsnew/v1.2.0.rst index e89a68aa79fd0..a0a8edd409845 100644 --- a/doc/source/whatsnew/v1.2.0.rst +++ b/doc/source/whatsnew/v1.2.0.rst @@ -343,6 +343,7 @@ Deprecations .. --------------------------------------------------------------------------- + .. _whatsnew_120.performance: Performance improvements From 0f856dd50dd30fbd6b43f3dbc30b7321d36cfacd Mon Sep 17 00:00:00 2001 From: PCerles Date: Mon, 2 Nov 2020 13:36:07 -0800 Subject: [PATCH 15/17] fix auto formatter --- pandas/tests/reshape/merge/test_multi.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/pandas/tests/reshape/merge/test_multi.py b/pandas/tests/reshape/merge/test_multi.py index 464430de1c699..16af3c95a3e06 100644 --- a/pandas/tests/reshape/merge/test_multi.py +++ b/pandas/tests/reshape/merge/test_multi.py @@ -1,11 +1,11 @@ import numpy as np -from numpy.random import randn +import pytest + import pandas as pd from pandas import DataFrame, Index, MultiIndex, Series import pandas._testing as tm from pandas.core.reshape.concat import concat from pandas.core.reshape.merge import merge -import pytest @pytest.fixture From b1c7519b461d6baccfe3128d8a16b8554ee1ef7d Mon Sep 17 00:00:00 2001 From: PCerles Date: Mon, 2 Nov 2020 15:04:08 -0800 Subject: [PATCH 16/17] changed doc place and parametrized test --- doc/source/whatsnew/v1.2.0.rst | 2 +- pandas/tests/reshape/merge/test_multi.py | 64 ++++++++++++------------ 2 files changed, 32 insertions(+), 34 deletions(-) diff --git a/doc/source/whatsnew/v1.2.0.rst b/doc/source/whatsnew/v1.2.0.rst index a0a8edd409845..167af1e5e282e 100644 --- a/doc/source/whatsnew/v1.2.0.rst +++ b/doc/source/whatsnew/v1.2.0.rst @@ -467,7 +467,6 @@ MultiIndex - Bug in :meth:`DataFrame.xs` when used with :class:`IndexSlice` raises ``TypeError`` with message ``"Expected label or tuple of labels"`` (:issue:`35301`) - Bug in :meth:`DataFrame.reset_index` with ``NaT`` values in index raises ``ValueError`` with message ``"cannot convert float NaN to integer"`` (:issue:`36541`) -- Fixed regression in :func:`merge` on merging DatetimeIndex with empty DataFrame (:issue:`36895`) I/O ^^^ @@ -534,6 +533,7 @@ Reshaping - Bug in :meth:`DataFrame.pivot` did not preserve :class:`MultiIndex` level names for columns when rows and columns both multiindexed (:issue:`36360`) - Bug in :func:`join` returned a non deterministic level-order for the resulting :class:`MultiIndex` (:issue:`36910`) - Bug in :meth:`DataFrame.combine_first()` caused wrong alignment with dtype ``string`` and one level of ``MultiIndex`` containing only ``NA`` (:issue:`37591`) +- Fixed regression in :func:`merge` on merging DatetimeIndex with empty DataFrame (:issue:`36895`) Sparse ^^^^^^ diff --git a/pandas/tests/reshape/merge/test_multi.py b/pandas/tests/reshape/merge/test_multi.py index 16af3c95a3e06..399a61a86e325 100644 --- a/pandas/tests/reshape/merge/test_multi.py +++ b/pandas/tests/reshape/merge/test_multi.py @@ -2,7 +2,7 @@ import pytest import pandas as pd -from pandas import DataFrame, Index, MultiIndex, Series +from pandas import DataFrame, Index, MultiIndex, Series, Timestamp import pandas._testing as tm from pandas.core.reshape.concat import concat from pandas.core.reshape.merge import merge @@ -481,10 +481,12 @@ def test_merge_datetime_index(self, klass): result = df.merge(df, on=[df.index.year], how="inner") tm.assert_frame_equal(result, expected) - def test_merge_datetime_multi_index_empty_df(self): + @pytest.mark.parametrize("merge_type", ["left", "right"]) + def test_merge_datetime_multi_index_empty_df(self, merge_type): + # see gh-36895 - midx1 = pd.MultiIndex.from_tuples( - [[pd.Timestamp("1950-01-01"), "A"], [pd.Timestamp("1950-01-02"), "B"]], + midx1 = MultiIndex.from_tuples( + [[Timestamp("1950-01-01"), "A"], [Timestamp("1950-01-02"), "B"]], names=["date", "panel"], ) left = DataFrame( @@ -494,42 +496,38 @@ def test_merge_datetime_multi_index_empty_df(self): index=midx1, ) - midx2 = pd.MultiIndex.from_tuples([], names=["date", "panel"]) + midx2 = MultiIndex.from_tuples([], names=["date", "panel"]) right = DataFrame(index=midx2, columns=["state"]) - midx3 = pd.MultiIndex.from_tuples( - [[pd.Timestamp("1950-01-01"), "A"], [pd.Timestamp("1950-01-02"), "B"]], + midx3 = MultiIndex.from_tuples( + [[Timestamp("1950-01-01"), "A"], [Timestamp("1950-01-02"), "B"]], names=["date", "panel"], ) - expected_left_merge = DataFrame( - data={ - "data": [1.5, 1.5], - "state": [None, None], - }, - index=midx3, - ) - - expected_right_merge = DataFrame( - data={ - "state": [None, None], - "data": [1.5, 1.5], - }, - index=midx3, - ) - - result_left_merge = left.merge(right, how="left", on=["date", "panel"]) - tm.assert_frame_equal(result_left_merge, expected_left_merge) - - result_right_merge = right.merge(left, how="right", on=["date", "panel"]) - tm.assert_frame_equal(result_right_merge, expected_right_merge) - - result_left_join = left.join(right, how="left") - tm.assert_frame_equal(result_left_join, expected_left_merge) + if merge_type == "left": + expected = DataFrame( + data={ + "data": [1.5, 1.5], + "state": [None, None], + }, + index=midx3, + ) + results = left.merge(right, how="left", on=["date", "panel"]) + results_join = left.join(right, how="left") + else: + expected = DataFrame( + data={ + "state": [None, None], + "data": [1.5, 1.5], + }, + index=midx3, + ) + results = right.merge(left, how="right", on=["date", "panel"]) + results_join = right.join(left, how="right") - result_right_join = right.join(left, how="right") - tm.assert_frame_equal(result_right_join, expected_right_merge) + tm.assert_frame_equal(results, expected) + tm.assert_frame_equal(results_join, expected) def test_join_multi_levels(self): From 84822c6764a4361999468df4c648e7dd2358f7db Mon Sep 17 00:00:00 2001 From: PCerles Date: Mon, 2 Nov 2020 20:59:11 -0800 Subject: [PATCH 17/17] restructured test per pr comments --- pandas/tests/reshape/merge/test_multi.py | 27 ++++++++++++------------ 1 file changed, 13 insertions(+), 14 deletions(-) diff --git a/pandas/tests/reshape/merge/test_multi.py b/pandas/tests/reshape/merge/test_multi.py index 399a61a86e325..260a0e9d486b2 100644 --- a/pandas/tests/reshape/merge/test_multi.py +++ b/pandas/tests/reshape/merge/test_multi.py @@ -485,22 +485,21 @@ def test_merge_datetime_index(self, klass): def test_merge_datetime_multi_index_empty_df(self, merge_type): # see gh-36895 - midx1 = MultiIndex.from_tuples( - [[Timestamp("1950-01-01"), "A"], [Timestamp("1950-01-02"), "B"]], - names=["date", "panel"], - ) left = DataFrame( data={ "data": [1.5, 1.5], }, - index=midx1, + index=MultiIndex.from_tuples( + [[Timestamp("1950-01-01"), "A"], [Timestamp("1950-01-02"), "B"]], + names=["date", "panel"], + ), ) - midx2 = MultiIndex.from_tuples([], names=["date", "panel"]) - - right = DataFrame(index=midx2, columns=["state"]) + right = DataFrame( + index=MultiIndex.from_tuples([], names=["date", "panel"]), columns=["state"] + ) - midx3 = MultiIndex.from_tuples( + expected_index = MultiIndex.from_tuples( [[Timestamp("1950-01-01"), "A"], [Timestamp("1950-01-02"), "B"]], names=["date", "panel"], ) @@ -511,9 +510,9 @@ def test_merge_datetime_multi_index_empty_df(self, merge_type): "data": [1.5, 1.5], "state": [None, None], }, - index=midx3, + index=expected_index, ) - results = left.merge(right, how="left", on=["date", "panel"]) + results_merge = left.merge(right, how="left", on=["date", "panel"]) results_join = left.join(right, how="left") else: expected = DataFrame( @@ -521,12 +520,12 @@ def test_merge_datetime_multi_index_empty_df(self, merge_type): "state": [None, None], "data": [1.5, 1.5], }, - index=midx3, + index=expected_index, ) - results = right.merge(left, how="right", on=["date", "panel"]) + results_merge = right.merge(left, how="right", on=["date", "panel"]) results_join = right.join(left, how="right") - tm.assert_frame_equal(results, expected) + tm.assert_frame_equal(results_merge, expected) tm.assert_frame_equal(results_join, expected) def test_join_multi_levels(self):