Skip to content

BUG: merge with left and/or right empty returning mis-ordered columns #55028

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 1 commit into from
Sep 6, 2023
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion doc/source/whatsnew/v2.2.0.rst
Original file line number Diff line number Diff line change
Expand Up @@ -246,7 +246,7 @@ Groupby/resample/rolling

Reshaping
^^^^^^^^^
-
- Bug in :func:`merge` returning columns in incorrect order when left and/or right is empty (:issue:`51929`)
-

Sparse
Expand Down
7 changes: 1 addition & 6 deletions pandas/core/reshape/merge.py
Original file line number Diff line number Diff line change
Expand Up @@ -1272,12 +1272,7 @@ def _get_merge_keys(
# work-around for merge_asof(right_index=True)
right_keys.append(right.index._values)
if lk is not None and lk == rk: # FIXME: what about other NAs?
# avoid key upcast in corner case (length-0)
lk = cast(Hashable, lk)
if len(left) > 0:
right_drop.append(rk)
else:
left_drop.append(lk)
right_drop.append(rk)
else:
rk = cast(ArrayLike, rk)
right_keys.append(rk)
Expand Down
47 changes: 33 additions & 14 deletions pandas/tests/reshape/merge/test_merge.py
Original file line number Diff line number Diff line change
Expand Up @@ -582,11 +582,11 @@ def test_merge_empty_frame(self, series_of_dtype, series_of_dtype2):
df_empty = df[:0]
expected = DataFrame(
{
"value_x": Series(dtype=df.dtypes["value"]),
"key": Series(dtype=df.dtypes["key"]),
"value_x": Series(dtype=df.dtypes["value"]),
"value_y": Series(dtype=df.dtypes["value"]),
},
columns=["value_x", "key", "value_y"],
columns=["key", "value_x", "value_y"],
)
actual = df_empty.merge(df, on="key")
tm.assert_frame_equal(actual, expected)
Expand Down Expand Up @@ -889,13 +889,13 @@ def test_merge_on_datetime64tz_empty(self):
result = left.merge(right, on="date")
expected = DataFrame(
{
"date": Series(dtype=dtz),
"value_x": Series(dtype=float),
"date2_x": Series(dtype=dtz),
"date": Series(dtype=dtz),
"value_y": Series(dtype=float),
"date2_y": Series(dtype=dtz),
},
columns=["value_x", "date2_x", "date", "value_y", "date2_y"],
columns=["date", "value_x", "date2_x", "value_y", "date2_y"],
)
tm.assert_frame_equal(result, expected)

Expand Down Expand Up @@ -1827,11 +1827,9 @@ def test_merge_empty(self, left_empty, how, exp):
if exp == "left":
expected = DataFrame({"A": [2, 1], "B": [3, 4], "C": [np.nan, np.nan]})
elif exp == "right":
expected = DataFrame({"B": [np.nan], "A": [1], "C": [5]})
expected = DataFrame({"A": [1], "B": [np.nan], "C": [5]})
elif exp == "empty":
expected = DataFrame(columns=["A", "B", "C"], dtype="int64")
if left_empty:
expected = expected[["B", "A", "C"]]
elif exp == "empty_cross":
expected = DataFrame(columns=["A_x", "B", "A_y", "C"], dtype="int64")

Expand Down Expand Up @@ -2481,14 +2479,12 @@ def test_merge_multiindex_columns():
result = frame_x.merge(frame_y, on="id", suffixes=((l_suf, r_suf)))

# Constructing the expected results
expected_labels = [letter + l_suf for letter in letters] + [
letter + r_suf for letter in letters
]
expected_index = MultiIndex.from_product(
[expected_labels, numbers], names=["outer", "inner"]
)
tuples = [(letter + l_suf, num) for letter in letters for num in numbers]
tuples += [("id", "")]
tuples += [(letter + r_suf, num) for letter in letters for num in numbers]

expected_index = MultiIndex.from_tuples(tuples, names=["outer", "inner"])
expected = DataFrame(columns=expected_index)
expected["id"] = ""

tm.assert_frame_equal(result, expected)

Expand Down Expand Up @@ -2959,3 +2955,26 @@ def test_merge_arrow_string_index(any_string_dtype):
{"a": Series(["a", "b"], dtype=any_string_dtype), "b": [1, np.nan]}
)
tm.assert_frame_equal(result, expected)


@pytest.mark.parametrize("left_empty", [True, False])
@pytest.mark.parametrize("right_empty", [True, False])
def test_merge_empty_frames_column_order(left_empty, right_empty):
# GH 51929
df1 = DataFrame(1, index=[0], columns=["A", "B"])
df2 = DataFrame(1, index=[0], columns=["A", "C", "D"])

if left_empty:
df1 = df1.iloc[:0]
if right_empty:
df2 = df2.iloc[:0]

result = merge(df1, df2, on=["A"], how="outer")
expected = DataFrame(1, index=[0], columns=["A", "B", "C", "D"])
if left_empty and right_empty:
expected = expected.iloc[:0]
elif left_empty:
expected.loc[:, "B"] = np.nan
elif right_empty:
expected.loc[:, ["C", "D"]] = np.nan
tm.assert_frame_equal(result, expected)