diff --git a/pandas/core/dtypes/concat.py b/pandas/core/dtypes/concat.py index 49034616b374a..ecfaac2210807 100644 --- a/pandas/core/dtypes/concat.py +++ b/pandas/core/dtypes/concat.py @@ -97,6 +97,9 @@ def is_nonempty(x) -> bool: # Creating an empty array directly is tempting, but the winnings would be # marginal given that it would still require shape & dtype calculation and # np.concatenate which has them both implemented is compiled. + non_empties = [x for x in to_concat if is_nonempty(x)] + if non_empties and axis == 0: + to_concat = non_empties typs = get_dtype_kinds(to_concat) _contains_datetime = any(typ.startswith("datetime") for typ in typs) @@ -114,10 +117,17 @@ def is_nonempty(x) -> bool: elif "sparse" in typs: return _concat_sparse(to_concat, axis=axis, typs=typs) - all_empty = all(not is_nonempty(x) for x in to_concat) - if any(is_extension_array_dtype(x) for x in to_concat) and axis == 1: + all_empty = not len(non_empties) + single_dtype = len({x.dtype for x in to_concat}) == 1 + any_ea = any(is_extension_array_dtype(x.dtype) for x in to_concat) + + if any_ea and axis == 1: to_concat = [np.atleast_2d(x.astype("object")) for x in to_concat] + elif any_ea and single_dtype and axis == 0: + cls = type(to_concat[0]) + return cls._concat_same_type(to_concat) + if all_empty: # we have all empties, but may need to coerce the result dtype to # object if we have non-numeric type operands (numpy would otherwise diff --git a/pandas/core/internals/managers.py b/pandas/core/internals/managers.py index dda932cafe73b..9630abf61f692 100644 --- a/pandas/core/internals/managers.py +++ b/pandas/core/internals/managers.py @@ -1649,21 +1649,11 @@ def concat(self, to_concat, new_axis: Index) -> "SingleBlockManager": ------- SingleBlockManager """ - non_empties = [x for x in to_concat if len(x) > 0] - # check if all series are of the same block type: - if len(non_empties) > 0: - blocks = [obj.blocks[0] for obj in non_empties] - if len({b.dtype for b in blocks}) == 1: - new_block = blocks[0].concat_same_type(blocks) - else: - values = [x.values for x in blocks] - values = concat_compat(values) - new_block = make_block(values, placement=slice(0, len(values), 1)) - else: - values = [x._block.values for x in to_concat] - values = concat_compat(values) - new_block = make_block(values, placement=slice(0, len(values), 1)) + blocks = [obj.blocks[0] for obj in to_concat] + values = concat_compat([x.values for x in blocks]) + + new_block = make_block(values, placement=slice(0, len(values), 1)) mgr = SingleBlockManager(new_block, new_axis) return mgr diff --git a/pandas/tests/dtypes/test_concat.py b/pandas/tests/dtypes/test_concat.py index 02daa185b1cdb..1fbbd3356ae13 100644 --- a/pandas/tests/dtypes/test_concat.py +++ b/pandas/tests/dtypes/test_concat.py @@ -2,7 +2,9 @@ import pandas.core.dtypes.concat as _concat +import pandas as pd from pandas import DatetimeIndex, Period, PeriodIndex, Series, TimedeltaIndex +import pandas._testing as tm @pytest.mark.parametrize( @@ -76,3 +78,13 @@ def test_get_dtype_kinds(index_or_series, to_concat, expected): def test_get_dtype_kinds_period(to_concat, expected): result = _concat.get_dtype_kinds(to_concat) assert result == set(expected) + + +def test_concat_mismatched_categoricals_with_empty(): + # concat_compat behavior on series._values should match pd.concat on series + ser1 = Series(["a", "b", "c"], dtype="category") + ser2 = Series([], dtype="category") + + result = _concat.concat_compat([ser1._values, ser2._values]) + expected = pd.concat([ser1, ser2])._values + tm.assert_categorical_equal(result, expected) diff --git a/pandas/tests/extension/test_external_block.py b/pandas/tests/extension/test_external_block.py index 26606d7e799e8..1960e3d09245c 100644 --- a/pandas/tests/extension/test_external_block.py +++ b/pandas/tests/extension/test_external_block.py @@ -2,7 +2,7 @@ import pytest import pandas as pd -from pandas.core.internals import BlockManager, SingleBlockManager +from pandas.core.internals import BlockManager from pandas.core.internals.blocks import ExtensionBlock @@ -33,17 +33,6 @@ def df(): return pd.DataFrame(block_manager) -def test_concat_series(): - # GH17728 - values = np.arange(3, dtype="int64") - block = CustomBlock(values, placement=slice(0, 3)) - mgr = SingleBlockManager(block, pd.RangeIndex(3)) - s = pd.Series(mgr, pd.RangeIndex(3), fastpath=True) - - res = pd.concat([s, s]) - assert isinstance(res._data.blocks[0], CustomBlock) - - def test_concat_dataframe(df): # GH17728 res = pd.concat([df, df])