From f0f3a8217148ee1cacce2fff4c6118d1f2cb7b15 Mon Sep 17 00:00:00 2001 From: Joris Van den Bossche Date: Fri, 29 Jun 2018 15:31:31 +0200 Subject: [PATCH 1/6] Allow merging on object / non-object column --- pandas/core/reshape/merge.py | 9 ++++++- pandas/tests/reshape/merge/test_merge.py | 32 ++++++++++-------------- 2 files changed, 21 insertions(+), 20 deletions(-) diff --git a/pandas/core/reshape/merge.py b/pandas/core/reshape/merge.py index e38c069b3c3fb..9fc57a29cc64f 100644 --- a/pandas/core/reshape/merge.py +++ b/pandas/core/reshape/merge.py @@ -31,6 +31,7 @@ is_bool_dtype, is_list_like, is_datetimelike, + is_object_dtype, _ensure_int64, _ensure_float64, _ensure_object, @@ -946,11 +947,14 @@ def _maybe_coerce_merge_keys(self): "you should use pd.concat".format(lk_dtype=lk.dtype, rk_dtype=rk.dtype)) + coerce_to_object = False + if is_object_dtype(lk) or is_object_dtype(rk): + coerce_to_object = True # if we are numeric, then allow differing # kinds to proceed, eg. int64 and int8, int and float # further if we are object, but we infer to # the same, then proceed - if is_numeric_dtype(lk) and is_numeric_dtype(rk): + elif is_numeric_dtype(lk) and is_numeric_dtype(rk): if lk.dtype.kind == rk.dtype.kind: pass @@ -1001,6 +1005,9 @@ def _maybe_coerce_merge_keys(self): # columns, and end up trying to merge # incompatible dtypes. See GH 16900. else: + coerce_to_object = True + + if coerce_to_object: if name in self.left.columns: typ = lk.categories.dtype if lk_is_cat else object self.left = self.left.assign( diff --git a/pandas/tests/reshape/merge/test_merge.py b/pandas/tests/reshape/merge/test_merge.py index 037bd9cc7cd18..bfa995fd4e9e4 100644 --- a/pandas/tests/reshape/merge/test_merge.py +++ b/pandas/tests/reshape/merge/test_merge.py @@ -1434,22 +1434,22 @@ def test_different(self, right_vals): # GH 9780 # We allow merging on object and categorical cols and cast # categorical cols to object - if (is_categorical_dtype(right['A'].dtype) or - is_object_dtype(right['A'].dtype)): - result = pd.merge(left, right, on='A') - assert is_object_dtype(result.A.dtype) + # if (is_categorical_dtype(right['A'].dtype) or + # is_object_dtype(right['A'].dtype)): + result = pd.merge(left, right, on='A') + assert is_object_dtype(result.A.dtype) # GH 9780 # We raise for merging on object col and int/float col and # merging on categorical col and int/float col - else: - msg = ("You are trying to merge on " - "{lk_dtype} and {rk_dtype} columns. " - "If you wish to proceed you should use " - "pd.concat".format(lk_dtype=left['A'].dtype, - rk_dtype=right['A'].dtype)) - with tm.assert_raises_regex(ValueError, msg): - pd.merge(left, right, on='A') + # else: + # msg = ("You are trying to merge on " + # "{lk_dtype} and {rk_dtype} columns. " + # "If you wish to proceed you should use " + # "pd.concat".format(lk_dtype=left['A'].dtype, + # rk_dtype=right['A'].dtype)) + # with tm.assert_raises_regex(ValueError, msg): + # pd.merge(left, right, on='A') @pytest.mark.parametrize('d1', [np.int64, np.int32, np.int16, np.int8, np.uint8]) @@ -1548,19 +1548,13 @@ def test_merge_incompat_infer_boolean_object(self): assert_frame_equal(result, expected) @pytest.mark.parametrize('df1_vals, df2_vals', [ - ([0, 1, 2], ["0", "1", "2"]), - ([0.0, 1.0, 2.0], ["0", "1", "2"]), - ([0, 1, 2], [u"0", u"1", u"2"]), - (pd.date_range('1/1/2011', periods=2, freq='D'), ['2011-01-01', - '2011-01-02']), (pd.date_range('1/1/2011', periods=2, freq='D'), [0, 1]), (pd.date_range('1/1/2011', periods=2, freq='D'), [0.0, 1.0]), (pd.date_range('20130101', periods=3), pd.date_range('20130101', periods=3, tz='US/Eastern')), ([0, 1, 2], Series(['a', 'b', 'a']).astype('category')), ([0.0, 1.0, 2.0], Series(['a', 'b', 'a']).astype('category')), - # TODO ([0, 1], pd.Series([False, True], dtype=bool)), - ([0, 1], pd.Series([False, True], dtype=object)) + # TODO ([0, 1], pd.Series([False, True])) ]) def test_merge_incompat_dtypes(self, df1_vals, df2_vals): # GH 9780, GH 15800 From 8ff0f35f75660a653c2f3b615bd16e852511bd83 Mon Sep 17 00:00:00 2001 From: Jeff Reback Date: Tue, 25 Dec 2018 19:50:38 -0500 Subject: [PATCH 2/6] all but bools --- pandas/core/reshape/merge.py | 57 +++++++++++----------- pandas/tests/reshape/merge/test_merge.py | 62 ++++++++++++++++-------- 2 files changed, 73 insertions(+), 46 deletions(-) diff --git a/pandas/core/reshape/merge.py b/pandas/core/reshape/merge.py index 8eb3bd881ea4e..ec4490ddae9f7 100644 --- a/pandas/core/reshape/merge.py +++ b/pandas/core/reshape/merge.py @@ -891,7 +891,7 @@ def _maybe_coerce_merge_keys(self): # coerce these if they are originally incompatible types # # for example if these are categorical, but are not dtype_equal - # or if we have object and integer dtypes + # or if we have object and integer dtypes, that do not infer for lk, rk, name in zip(self.left_join_keys, self.right_join_keys, @@ -901,6 +901,8 @@ def _maybe_coerce_merge_keys(self): lk_is_cat = is_categorical_dtype(lk) rk_is_cat = is_categorical_dtype(rk) + lk_is_object = is_object_dtype(lk) + rk_is_object = is_object_dtype(rk) # if either left or right is a categorical # then the must match exactly in categories & ordered @@ -919,16 +921,13 @@ def _maybe_coerce_merge_keys(self): "you should use pd.concat".format(lk_dtype=lk.dtype, rk_dtype=rk.dtype)) - coerce_to_object = False - if is_object_dtype(lk) or is_object_dtype(rk): - coerce_to_object = True # if we are numeric, then allow differing # kinds to proceed, eg. int64 and int8, int and float # further if we are object, but we infer to # the same, then proceed - elif is_numeric_dtype(lk) and is_numeric_dtype(rk): + if is_numeric_dtype(lk) and is_numeric_dtype(rk): if lk.dtype.kind == rk.dtype.kind: - pass + continue # check whether ints and floats elif is_integer_dtype(rk) and is_float_dtype(lk): @@ -937,6 +936,7 @@ def _maybe_coerce_merge_keys(self): 'columns where the float values ' 'are not equal to their int ' 'representation', UserWarning) + continue elif is_float_dtype(rk) and is_integer_dtype(lk): if not (rk == rk.astype(lk.dtype))[~np.isnan(rk)].all(): @@ -944,22 +944,26 @@ def _maybe_coerce_merge_keys(self): 'columns where the float values ' 'are not equal to their int ' 'representation', UserWarning) + continue # let's infer and see if we are ok elif lib.infer_dtype(lk) == lib.infer_dtype(rk): - pass + continue # Check if we are trying to merge on obviously # incompatible dtypes GH 9780, GH 15800 - # boolean values are considered as numeric, but are still allowed - # to be merged on object boolean values - elif ((is_numeric_dtype(lk) and not is_bool_dtype(lk)) - and not is_numeric_dtype(rk)): - raise ValueError(msg) - elif (not is_numeric_dtype(lk) - and (is_numeric_dtype(rk) and not is_bool_dtype(rk))): - raise ValueError(msg) + # bool values are coerced to object + elif ((lk_is_object and is_bool_dtype(rk)) or + (is_bool_dtype(lk) and rk_is_object)): + pass + + # object values are allowed to be merged + elif ((lk_is_object and is_numeric_dtype(rk)) or + (is_numeric_dtype(lk) and rk_is_object)): + continue + + # datetimelikes must match exactly elif is_datetimelike(lk) and not is_datetimelike(rk): raise ValueError(msg) elif not is_datetimelike(lk) and is_datetimelike(rk): @@ -969,6 +973,9 @@ def _maybe_coerce_merge_keys(self): elif not is_datetime64tz_dtype(lk) and is_datetime64tz_dtype(rk): raise ValueError(msg) + elif lk_is_object and rk_is_object: + continue + # Houston, we have a problem! # let's coerce to object if the dtypes aren't # categorical, otherwise coerce to the category @@ -976,18 +983,14 @@ def _maybe_coerce_merge_keys(self): # then we would lose type information on some # columns, and end up trying to merge # incompatible dtypes. See GH 16900. - else: - coerce_to_object = True - - if coerce_to_object: - if name in self.left.columns: - typ = lk.categories.dtype if lk_is_cat else object - self.left = self.left.assign( - **{name: self.left[name].astype(typ)}) - if name in self.right.columns: - typ = rk.categories.dtype if rk_is_cat else object - self.right = self.right.assign( - **{name: self.right[name].astype(typ)}) + if name in self.left.columns: + typ = lk.categories.dtype if lk_is_cat else object + self.left = self.left.assign( + **{name: self.left[name].astype(typ)}) + if name in self.right.columns: + typ = rk.categories.dtype if rk_is_cat else object + self.right = self.right.assign( + **{name: self.right[name].astype(typ)}) def _validate_specification(self): # Hm, any way to make this logic less complicated?? diff --git a/pandas/tests/reshape/merge/test_merge.py b/pandas/tests/reshape/merge/test_merge.py index 33a8a8c318285..47c78e1cafff7 100644 --- a/pandas/tests/reshape/merge/test_merge.py +++ b/pandas/tests/reshape/merge/test_merge.py @@ -16,7 +16,8 @@ Series, UInt64Index) from pandas.api.types import CategoricalDtype as CDT from pandas.compat import lrange -from pandas.core.dtypes.common import is_categorical_dtype, is_object_dtype +from pandas.core.dtypes.common import ( + is_categorical_dtype, is_object_dtype, is_float_dtype, is_integer_dtype) from pandas.core.dtypes.dtypes import CategoricalDtype from pandas.core.reshape.concat import concat from pandas.core.reshape.merge import MergeError, merge @@ -942,23 +943,9 @@ def test_different(self, right_vals): # GH 9780 # We allow merging on object and categorical cols and cast # categorical cols to object - # if (is_categorical_dtype(right['A'].dtype) or - # is_object_dtype(right['A'].dtype)): result = pd.merge(left, right, on='A') assert is_object_dtype(result.A.dtype) - # GH 9780 - # We raise for merging on object col and int/float col and - # merging on categorical col and int/float col - # else: - # msg = ("You are trying to merge on " - # "{lk_dtype} and {rk_dtype} columns. " - # "If you wish to proceed you should use " - # "pd.concat".format(lk_dtype=left['A'].dtype, - # rk_dtype=right['A'].dtype)) - # with pytest.raises(ValueError, match=msg): - # pd.merge(left, right, on='A') - @pytest.mark.parametrize('d1', [np.int64, np.int32, np.int16, np.int8, np.uint8]) @pytest.mark.parametrize('d2', [np.int64, np.float64, @@ -1055,16 +1042,53 @@ def test_merge_incompat_infer_boolean_object(self): result = pd.merge(df2, df1, on='key') assert_frame_equal(result, expected) + @pytest.mark.parametrize('df1_vals, df2_vals, left_type, right_type', [ + + # infer to numeric + ([0, 1, 2], ["0", "1", "2"], + is_integer_dtype, is_object_dtype), + ([0.0, 1.0, 2.0], ["0", "1", "2"], + is_float_dtype, is_object_dtype), + + # unicode does not infer to numeric + ([0, 1, 2], [u"0", u"1", u"2"], + is_integer_dtype, is_object_dtype), + + # merge on category coercs to object + ([0, 1, 2], Series(['a', 'b', 'a']).astype('category'), + is_object_dtype, is_object_dtype), + ([0.0, 1.0, 2.0], Series(['a', 'b', 'a']).astype('category'), + is_object_dtype, is_object_dtype), + + # bool will infer if possible + ([0, 1], pd.Series([False, True], dtype=object), + is_integer_dtype, is_object_dtype), + ([0, 1], pd.Series([False, True], dtype=bool), + is_object_dtype, is_object_dtype) + ]) + def test_merge_incompat_dtypes_are_ok(self, df1_vals, df2_vals, + left_type, right_type): + # these are explicity allowed incompat merges, that pass thru + # the result type is dependent on if the values on the rhs are + # inferred, otherwise these will be coereced to object + + df1 = DataFrame({'A': df1_vals}) + df2 = DataFrame({'A': df2_vals}) + + result = pd.merge(df1, df2, on=['A']) + assert left_type(result.A.dtype) + result = pd.merge(df2, df1, on=['A']) + assert right_type(result.A.dtype) + @pytest.mark.parametrize('df1_vals, df2_vals', [ + (pd.date_range('1/1/2011', periods=2, freq='D'), ['2011-01-01', + '2011-01-02']), (pd.date_range('1/1/2011', periods=2, freq='D'), [0, 1]), (pd.date_range('1/1/2011', periods=2, freq='D'), [0.0, 1.0]), (pd.date_range('20130101', periods=3), pd.date_range('20130101', periods=3, tz='US/Eastern')), - ([0, 1, 2], Series(['a', 'b', 'a']).astype('category')), - ([0.0, 1.0, 2.0], Series(['a', 'b', 'a']).astype('category')), - # TODO ([0, 1], pd.Series([False, True])) ]) - def test_merge_incompat_dtypes(self, df1_vals, df2_vals): + def test_merge_incompat_dtypes_error(self, df1_vals, df2_vals): # GH 9780, GH 15800 # Raise a ValueError when a user tries to merge on # dtypes that are incompatible (e.g., obj and int/float) From 9fc37bea5d12a84186159f48eae578ba79d03985 Mon Sep 17 00:00:00 2001 From: Jeff Reback Date: Wed, 26 Dec 2018 14:57:10 -0500 Subject: [PATCH 3/6] add whatsnew note --- doc/source/whatsnew/v0.24.0.rst | 1 + 1 file changed, 1 insertion(+) diff --git a/doc/source/whatsnew/v0.24.0.rst b/doc/source/whatsnew/v0.24.0.rst index d12d929470be5..c8e8d93cf9fc9 100644 --- a/doc/source/whatsnew/v0.24.0.rst +++ b/doc/source/whatsnew/v0.24.0.rst @@ -387,6 +387,7 @@ Backwards incompatible API changes - :func:`read_csv` will now raise a ``ValueError`` if a column with missing values is declared as having dtype ``bool`` (:issue:`20591`) - The column order of the resultant :class:`DataFrame` from :meth:`MultiIndex.to_frame` is now guaranteed to match the :attr:`MultiIndex.names` order. (:issue:`22420`) - :func:`pd.offsets.generate_range` argument ``time_rule`` has been removed; use ``offset`` instead (:issue:`24157`) +- In 0.23.x, pandas would raise a ``ValueError`` on a merge of a numeric column (e.g. ``int`` dtyped column) and an ``object`` dtyped column (:issue:`9780`). We have re-enabled the ability to merge ``object`` and other dtypes (:issue:`21681`) Percentage change on groupby ^^^^^^^^^^^^^^^^^^^^^^^^^^^^ From 32d737fbedd08e4d5956cc1a72a4e8db44a81343 Mon Sep 17 00:00:00 2001 From: Jeff Reback Date: Wed, 26 Dec 2018 16:18:27 -0500 Subject: [PATCH 4/6] fix with numeric-like object types --- pandas/core/reshape/merge.py | 4 ++-- pandas/tests/reshape/merge/test_merge.py | 17 +++++++---------- 2 files changed, 9 insertions(+), 12 deletions(-) diff --git a/pandas/core/reshape/merge.py b/pandas/core/reshape/merge.py index ec4490ddae9f7..249249f870276 100644 --- a/pandas/core/reshape/merge.py +++ b/pandas/core/reshape/merge.py @@ -891,7 +891,7 @@ def _maybe_coerce_merge_keys(self): # coerce these if they are originally incompatible types # # for example if these are categorical, but are not dtype_equal - # or if we have object and integer dtypes, that do not infer + # or if we have object and integer dtypes for lk, rk, name in zip(self.left_join_keys, self.right_join_keys, @@ -961,7 +961,7 @@ def _maybe_coerce_merge_keys(self): # object values are allowed to be merged elif ((lk_is_object and is_numeric_dtype(rk)) or (is_numeric_dtype(lk) and rk_is_object)): - continue + pass # datetimelikes must match exactly elif is_datetimelike(lk) and not is_datetimelike(rk): diff --git a/pandas/tests/reshape/merge/test_merge.py b/pandas/tests/reshape/merge/test_merge.py index 47c78e1cafff7..508d3712816f6 100644 --- a/pandas/tests/reshape/merge/test_merge.py +++ b/pandas/tests/reshape/merge/test_merge.py @@ -16,8 +16,7 @@ Series, UInt64Index) from pandas.api.types import CategoricalDtype as CDT from pandas.compat import lrange -from pandas.core.dtypes.common import ( - is_categorical_dtype, is_object_dtype, is_float_dtype, is_integer_dtype) +from pandas.core.dtypes.common import is_categorical_dtype, is_object_dtype from pandas.core.dtypes.dtypes import CategoricalDtype from pandas.core.reshape.concat import concat from pandas.core.reshape.merge import MergeError, merge @@ -1044,15 +1043,13 @@ def test_merge_incompat_infer_boolean_object(self): @pytest.mark.parametrize('df1_vals, df2_vals, left_type, right_type', [ - # infer to numeric + # do not infer to numeric ([0, 1, 2], ["0", "1", "2"], - is_integer_dtype, is_object_dtype), + is_object_dtype, is_object_dtype), ([0.0, 1.0, 2.0], ["0", "1", "2"], - is_float_dtype, is_object_dtype), - - # unicode does not infer to numeric + is_object_dtype, is_object_dtype), ([0, 1, 2], [u"0", u"1", u"2"], - is_integer_dtype, is_object_dtype), + is_object_dtype, is_object_dtype), # merge on category coercs to object ([0, 1, 2], Series(['a', 'b', 'a']).astype('category'), @@ -1060,9 +1057,9 @@ def test_merge_incompat_infer_boolean_object(self): ([0.0, 1.0, 2.0], Series(['a', 'b', 'a']).astype('category'), is_object_dtype, is_object_dtype), - # bool will infer if possible + # no not infer ([0, 1], pd.Series([False, True], dtype=object), - is_integer_dtype, is_object_dtype), + is_object_dtype, is_object_dtype), ([0, 1], pd.Series([False, True], dtype=bool), is_object_dtype, is_object_dtype) ]) From ed94284eb222dda2cf1199d41a2e2831e086925b Mon Sep 17 00:00:00 2001 From: Jeff Reback Date: Wed, 26 Dec 2018 16:27:50 -0500 Subject: [PATCH 5/6] simplify --- pandas/tests/reshape/merge/test_merge.py | 30 +++++++++--------------- 1 file changed, 11 insertions(+), 19 deletions(-) diff --git a/pandas/tests/reshape/merge/test_merge.py b/pandas/tests/reshape/merge/test_merge.py index 508d3712816f6..7bcaa9ba74f4c 100644 --- a/pandas/tests/reshape/merge/test_merge.py +++ b/pandas/tests/reshape/merge/test_merge.py @@ -1041,30 +1041,22 @@ def test_merge_incompat_infer_boolean_object(self): result = pd.merge(df2, df1, on='key') assert_frame_equal(result, expected) - @pytest.mark.parametrize('df1_vals, df2_vals, left_type, right_type', [ + @pytest.mark.parametrize('df1_vals, df2_vals', [ # do not infer to numeric - ([0, 1, 2], ["0", "1", "2"], - is_object_dtype, is_object_dtype), - ([0.0, 1.0, 2.0], ["0", "1", "2"], - is_object_dtype, is_object_dtype), - ([0, 1, 2], [u"0", u"1", u"2"], - is_object_dtype, is_object_dtype), + ([0, 1, 2], ["0", "1", "2"]), + ([0.0, 1.0, 2.0], ["0", "1", "2"]), + ([0, 1, 2], [u"0", u"1", u"2"]), # merge on category coercs to object - ([0, 1, 2], Series(['a', 'b', 'a']).astype('category'), - is_object_dtype, is_object_dtype), - ([0.0, 1.0, 2.0], Series(['a', 'b', 'a']).astype('category'), - is_object_dtype, is_object_dtype), + ([0, 1, 2], Series(['a', 'b', 'a']).astype('category')), + ([0.0, 1.0, 2.0], Series(['a', 'b', 'a']).astype('category')), # no not infer - ([0, 1], pd.Series([False, True], dtype=object), - is_object_dtype, is_object_dtype), - ([0, 1], pd.Series([False, True], dtype=bool), - is_object_dtype, is_object_dtype) + ([0, 1], pd.Series([False, True], dtype=object)), + ([0, 1], pd.Series([False, True], dtype=bool)), ]) - def test_merge_incompat_dtypes_are_ok(self, df1_vals, df2_vals, - left_type, right_type): + def test_merge_incompat_dtypes_are_ok(self, df1_vals, df2_vals): # these are explicity allowed incompat merges, that pass thru # the result type is dependent on if the values on the rhs are # inferred, otherwise these will be coereced to object @@ -1073,9 +1065,9 @@ def test_merge_incompat_dtypes_are_ok(self, df1_vals, df2_vals, df2 = DataFrame({'A': df2_vals}) result = pd.merge(df1, df2, on=['A']) - assert left_type(result.A.dtype) + assert is_object_dtype(result.A.dtype) result = pd.merge(df2, df1, on=['A']) - assert right_type(result.A.dtype) + assert is_object_dtype(result.A.dtype) @pytest.mark.parametrize('df1_vals, df2_vals', [ (pd.date_range('1/1/2011', periods=2, freq='D'), ['2011-01-01', From c805634f57ccd7761190e530024092479b385eab Mon Sep 17 00:00:00 2001 From: Jeff Reback Date: Thu, 27 Dec 2018 07:56:02 -0500 Subject: [PATCH 6/6] disallow inferred numeric with non-numeric --- pandas/core/reshape/merge.py | 17 ++++++++++++++++- pandas/tests/reshape/merge/test_merge.py | 18 ++++++++---------- 2 files changed, 24 insertions(+), 11 deletions(-) diff --git a/pandas/core/reshape/merge.py b/pandas/core/reshape/merge.py index 249249f870276..130bc2b080c72 100644 --- a/pandas/core/reshape/merge.py +++ b/pandas/core/reshape/merge.py @@ -961,7 +961,22 @@ def _maybe_coerce_merge_keys(self): # object values are allowed to be merged elif ((lk_is_object and is_numeric_dtype(rk)) or (is_numeric_dtype(lk) and rk_is_object)): - pass + inferred_left = lib.infer_dtype(lk) + inferred_right = lib.infer_dtype(rk) + bool_types = ['integer', 'mixed-integer', 'boolean', 'empty'] + string_types = ['string', 'unicode', 'mixed', 'bytes', 'empty'] + + # inferred bool + if (inferred_left in bool_types and + inferred_right in bool_types): + pass + + # unless we are merging non-string-like with string-like + elif ((inferred_left in string_types and + inferred_right not in string_types) or + (inferred_right in string_types and + inferred_left not in string_types)): + raise ValueError(msg) # datetimelikes must match exactly elif is_datetimelike(lk) and not is_datetimelike(rk): diff --git a/pandas/tests/reshape/merge/test_merge.py b/pandas/tests/reshape/merge/test_merge.py index 7bcaa9ba74f4c..7839d93f5af88 100644 --- a/pandas/tests/reshape/merge/test_merge.py +++ b/pandas/tests/reshape/merge/test_merge.py @@ -924,10 +924,6 @@ class TestMergeDtypes(object): @pytest.mark.parametrize('right_vals', [ ['foo', 'bar'], Series(['foo', 'bar']).astype('category'), - [1, 2], - [1.0, 2.0], - Series([1, 2], dtype='uint64'), - Series([1, 2], dtype='int32') ]) def test_different(self, right_vals): @@ -1043,12 +1039,7 @@ def test_merge_incompat_infer_boolean_object(self): @pytest.mark.parametrize('df1_vals, df2_vals', [ - # do not infer to numeric - ([0, 1, 2], ["0", "1", "2"]), - ([0.0, 1.0, 2.0], ["0", "1", "2"]), - ([0, 1, 2], [u"0", u"1", u"2"]), - - # merge on category coercs to object + # merge on category coerces to object ([0, 1, 2], Series(['a', 'b', 'a']).astype('category')), ([0.0, 1.0, 2.0], Series(['a', 'b', 'a']).astype('category')), @@ -1070,6 +1061,13 @@ def test_merge_incompat_dtypes_are_ok(self, df1_vals, df2_vals): assert is_object_dtype(result.A.dtype) @pytest.mark.parametrize('df1_vals, df2_vals', [ + # do not infer to numeric + + (Series([1, 2], dtype='uint64'), ["a", "b", "c"]), + (Series([1, 2], dtype='int32'), ["a", "b", "c"]), + ([0, 1, 2], ["0", "1", "2"]), + ([0.0, 1.0, 2.0], ["0", "1", "2"]), + ([0, 1, 2], [u"0", u"1", u"2"]), (pd.date_range('1/1/2011', periods=2, freq='D'), ['2011-01-01', '2011-01-02']), (pd.date_range('1/1/2011', periods=2, freq='D'), [0, 1]),