Skip to content
3 changes: 2 additions & 1 deletion doc/source/whatsnew/v1.1.0.rst
Original file line number Diff line number Diff line change
Expand Up @@ -103,9 +103,10 @@ Bug fixes

Categorical
^^^^^^^^^^^

- Bug where :func:`merge` was unable to join on non-unique categorical indices (:issue:`28189`)
- Bug when passing categorical data to :class:`Index` constructor along with ``dtype=object`` incorrectly returning a :class:`CategoricalIndex` instead of object-dtype :class:`Index` (:issue:`32167`)
-
-

Datetimelike
^^^^^^^^^^^^
Expand Down
2 changes: 2 additions & 0 deletions pandas/_libs/join.pyx
Original file line number Diff line number Diff line change
Expand Up @@ -254,6 +254,8 @@ ctypedef fused join_t:
float64_t
float32_t
object
int8_t
int16_t
int32_t
int64_t
uint64_t
Expand Down
7 changes: 7 additions & 0 deletions pandas/core/indexes/category.py
Original file line number Diff line number Diff line change
Expand Up @@ -28,6 +28,7 @@
from pandas.core.indexes.base import Index, _index_shared_docs, maybe_extract_name
from pandas.core.indexes.extension import ExtensionIndex, inherit_names
import pandas.core.missing as missing
from pandas.core.ops import get_op_result_name

_index_doc_kwargs = dict(ibase._index_doc_kwargs)
_index_doc_kwargs.update(dict(target_klass="CategoricalIndex"))
Expand Down Expand Up @@ -785,6 +786,12 @@ def _delegate_method(self, name: str, *args, **kwargs):
return res
return CategoricalIndex(res, name=self.name)

def _wrap_joined_index(
self, joined: np.ndarray, other: "CategoricalIndex"
) -> "CategoricalIndex":
name = get_op_result_name(self, other)
return self._create_from_codes(joined, name=name)


CategoricalIndex._add_numeric_methods_add_sub_disabled()
CategoricalIndex._add_numeric_methods_disabled()
Expand Down
22 changes: 22 additions & 0 deletions pandas/tests/reshape/merge/test_merge.py
Original file line number Diff line number Diff line change
Expand Up @@ -2163,3 +2163,25 @@ def test_merge_datetime_upcast_dtype():
}
)
tm.assert_frame_equal(result, expected)


@pytest.mark.parametrize("n_categories", [5, 128])
def test_categorical_non_unique_monotonic(n_categories):
# GH 28189
# With n_categories as 5, we test the int8 case is hit in libjoin,
# with n_categories as 128 we test the int16 case.
left_index = CategoricalIndex([0] + list(range(n_categories)))
df1 = DataFrame(range(n_categories + 1), columns=["value"], index=left_index)
df2 = DataFrame(
[[6]],
columns=["value"],
index=CategoricalIndex([0], categories=np.arange(n_categories)),
)

result = merge(df1, df2, how="left", left_index=True, right_index=True)
expected = DataFrame(
[[i, 6.0] if i < 2 else [i, np.nan] for i in range(n_categories + 1)],
columns=["value_x", "value_y"],
index=left_index,
)
tm.assert_frame_equal(expected, result)