diff --git a/pandas/core/dtypes/cast.py b/pandas/core/dtypes/cast.py index 4049b0321f221..ad62146dda268 100644 --- a/pandas/core/dtypes/cast.py +++ b/pandas/core/dtypes/cast.py @@ -354,7 +354,7 @@ def infer_dtype_from_scalar(val, pandas_dtype=False): # a 1-element ndarray if isinstance(val, np.ndarray): - msg = "invalid ndarray passed to _infer_dtype_from_scalar" + msg = "invalid ndarray passed to infer_dtype_from_scalar" if val.ndim != 0: raise ValueError(msg) diff --git a/pandas/tests/dtypes/cast/__init__.py b/pandas/tests/dtypes/cast/__init__.py new file mode 100644 index 0000000000000..e69de29bb2d1d diff --git a/pandas/tests/dtypes/cast/test_construct_from_scalar.py b/pandas/tests/dtypes/cast/test_construct_from_scalar.py new file mode 100644 index 0000000000000..d0f58c811e34c --- /dev/null +++ b/pandas/tests/dtypes/cast/test_construct_from_scalar.py @@ -0,0 +1,22 @@ +# -*- coding: utf-8 -*- + +from pandas.core.dtypes.cast import construct_1d_arraylike_from_scalar +from pandas.core.dtypes.dtypes import CategoricalDtype + +from pandas import Categorical +from pandas.util import testing as tm + + +def test_cast_1d_array_like_from_scalar_categorical(): + # see gh-19565 + # + # Categorical result from scalar did not maintain + # categories and ordering of the passed dtype. + cats = ["a", "b", "c"] + cat_type = CategoricalDtype(categories=cats, ordered=False) + expected = Categorical(["a", "a"], categories=cats) + + result = construct_1d_arraylike_from_scalar("a", len(expected), cat_type) + tm.assert_categorical_equal(result, expected, + check_category_order=True, + check_dtype=True) diff --git a/pandas/tests/dtypes/cast/test_construct_ndarray.py b/pandas/tests/dtypes/cast/test_construct_ndarray.py new file mode 100644 index 0000000000000..aa2cb25e62d52 --- /dev/null +++ b/pandas/tests/dtypes/cast/test_construct_ndarray.py @@ -0,0 +1,20 @@ +# -*- coding: utf-8 -*- + +import numpy as np +import pytest + +from pandas.core.dtypes.cast import construct_1d_ndarray_preserving_na + +from pandas.util import testing as tm + + +@pytest.mark.parametrize('values, dtype, expected', [ + ([1, 2, 3], None, np.array([1, 2, 3])), + (np.array([1, 2, 3]), None, np.array([1, 2, 3])), + (['1', '2', None], None, np.array(['1', '2', None])), + (['1', '2', None], np.dtype('str'), np.array(['1', '2', None])), + ([1, 2, None], np.dtype('str'), np.array(['1', '2', None])), +]) +def test_construct_1d_ndarray_preserving_na(values, dtype, expected): + result = construct_1d_ndarray_preserving_na(values, dtype=dtype) + tm.assert_numpy_array_equal(result, expected) diff --git a/pandas/tests/dtypes/cast/test_construct_object_arr.py b/pandas/tests/dtypes/cast/test_construct_object_arr.py new file mode 100644 index 0000000000000..61fc17880ed65 --- /dev/null +++ b/pandas/tests/dtypes/cast/test_construct_object_arr.py @@ -0,0 +1,22 @@ +# -*- coding: utf-8 -*- + +import pytest + +from pandas.core.dtypes.cast import construct_1d_object_array_from_listlike + + +@pytest.mark.parametrize("datum1", [1, 2., "3", (4, 5), [6, 7], None]) +@pytest.mark.parametrize("datum2", [8, 9., "10", (11, 12), [13, 14], None]) +def test_cast_1d_array(datum1, datum2): + data = [datum1, datum2] + result = construct_1d_object_array_from_listlike(data) + + # Direct comparison fails: https://github.com/numpy/numpy/issues/10218 + assert result.dtype == "object" + assert list(result) == data + + +@pytest.mark.parametrize("val", [1, 2., None]) +def test_cast_1d_array_invalid_scalar(val): + with pytest.raises(TypeError, match="has no len()"): + construct_1d_object_array_from_listlike(val) diff --git a/pandas/tests/dtypes/cast/test_convert_objects.py b/pandas/tests/dtypes/cast/test_convert_objects.py new file mode 100644 index 0000000000000..58ba4161e96a9 --- /dev/null +++ b/pandas/tests/dtypes/cast/test_convert_objects.py @@ -0,0 +1,15 @@ +# -*- coding: utf-8 -*- + +import numpy as np +import pytest + +from pandas.core.dtypes.cast import maybe_convert_objects + + +@pytest.mark.parametrize("data", [[1, 2], ["apply", "banana"]]) +@pytest.mark.parametrize("copy", [True, False]) +def test_maybe_convert_objects_copy(data, copy): + arr = np.array(data) + out = maybe_convert_objects(arr, copy=copy) + + assert (arr is out) is (not copy) diff --git a/pandas/tests/dtypes/cast/test_downcast.py b/pandas/tests/dtypes/cast/test_downcast.py new file mode 100644 index 0000000000000..41607c948b909 --- /dev/null +++ b/pandas/tests/dtypes/cast/test_downcast.py @@ -0,0 +1,82 @@ +# -*- coding: utf-8 -*- + +import numpy as np +import pytest + +from pandas.core.dtypes.cast import maybe_downcast_to_dtype + +from pandas import DatetimeIndex, Series, Timestamp +from pandas.util import testing as tm + + +@pytest.mark.parametrize("arr,dtype,expected", [ + (np.array([8.5, 8.6, 8.7, 8.8, 8.9999999999995]), "infer", + np.array([8.5, 8.6, 8.7, 8.8, 8.9999999999995])), + + (np.array([8., 8., 8., 8., 8.9999999999995]), "infer", + np.array([8, 8, 8, 8, 9], dtype=np.int64)), + + (np.array([8., 8., 8., 8., 9.0000000000005]), "infer", + np.array([8, 8, 8, 8, 9], dtype=np.int64)), +]) +def test_downcast(arr, expected, dtype): + result = maybe_downcast_to_dtype(arr, dtype) + tm.assert_numpy_array_equal(result, expected) + + +def test_downcast_booleans(): + # see gh-16875: coercing of booleans. + ser = Series([True, True, False]) + result = maybe_downcast_to_dtype(ser, np.dtype(np.float64)) + + expected = ser + tm.assert_series_equal(result, expected) + + +def test_downcast_conversion_no_nan(any_real_dtype): + dtype = any_real_dtype + expected = np.array([1, 2]) + arr = np.array([1.0, 2.0], dtype=dtype) + + result = maybe_downcast_to_dtype(arr, "infer") + tm.assert_almost_equal(result, expected, check_dtype=False) + + +def test_downcast_conversion_nan(float_dtype): + dtype = float_dtype + data = [1.0, 2.0, np.nan] + + expected = np.array(data, dtype=dtype) + arr = np.array(data, dtype=dtype) + + result = maybe_downcast_to_dtype(arr, "infer") + tm.assert_almost_equal(result, expected) + + +def test_downcast_conversion_empty(any_real_dtype): + dtype = any_real_dtype + arr = np.array([], dtype=dtype) + result = maybe_downcast_to_dtype(arr, "int64") + tm.assert_numpy_array_equal(result, np.array([], dtype=np.int64)) + + +@pytest.mark.parametrize("klass", [np.datetime64, np.timedelta64]) +def test_datetime_likes_nan(klass): + dtype = klass.__name__ + "[ns]" + arr = np.array([1, 2, np.nan]) + + exp = np.array([1, 2, klass("NaT")], dtype) + res = maybe_downcast_to_dtype(arr, dtype) + tm.assert_numpy_array_equal(res, exp) + + +@pytest.mark.parametrize("as_asi", [True, False]) +def test_datetime_with_timezone(as_asi): + # see gh-15426 + ts = Timestamp("2016-01-01 12:00:00", tz="US/Pacific") + exp = DatetimeIndex([ts, ts]) + + obj = exp.asi8 if as_asi else exp + res = maybe_downcast_to_dtype(obj, exp.dtype) + + tm.assert_index_equal(res, exp) diff --git a/pandas/tests/dtypes/cast/test_find_common_type.py b/pandas/tests/dtypes/cast/test_find_common_type.py new file mode 100644 index 0000000000000..d83c8d03e9e42 --- /dev/null +++ b/pandas/tests/dtypes/cast/test_find_common_type.py @@ -0,0 +1,108 @@ +# -*- coding: utf-8 -*- + +import numpy as np +import pytest + +from pandas.core.dtypes.cast import find_common_type +from pandas.core.dtypes.dtypes import ( + CategoricalDtype, DatetimeTZDtype, PeriodDtype) + + +@pytest.mark.parametrize("source_dtypes,expected_common_dtype", [ + ((np.int64,), np.int64), + ((np.uint64,), np.uint64), + ((np.float32,), np.float32), + ((np.object,), np.object), + + # Into ints. + ((np.int16, np.int64), np.int64), + ((np.int32, np.uint32), np.int64), + ((np.uint16, np.uint64), np.uint64), + + # Into floats. + ((np.float16, np.float32), np.float32), + ((np.float16, np.int16), np.float32), + ((np.float32, np.int16), np.float32), + ((np.uint64, np.int64), np.float64), + ((np.int16, np.float64), np.float64), + ((np.float16, np.int64), np.float64), + + # Into others. + ((np.complex128, np.int32), np.complex128), + ((np.object, np.float32), np.object), + ((np.object, np.int16), np.object), + + # Bool with int. + ((np.dtype("bool"), np.int64), np.object), + ((np.dtype("bool"), np.int32), np.object), + ((np.dtype("bool"), np.int16), np.object), + ((np.dtype("bool"), np.int8), np.object), + ((np.dtype("bool"), np.uint64), np.object), + ((np.dtype("bool"), np.uint32), np.object), + ((np.dtype("bool"), np.uint16), np.object), + ((np.dtype("bool"), np.uint8), np.object), + + # Bool with float. + ((np.dtype("bool"), np.float64), np.object), + ((np.dtype("bool"), np.float32), np.object), + + ((np.dtype("datetime64[ns]"), np.dtype("datetime64[ns]")), + np.dtype("datetime64[ns]")), + ((np.dtype("timedelta64[ns]"), np.dtype("timedelta64[ns]")), + np.dtype("timedelta64[ns]")), + + ((np.dtype("datetime64[ns]"), np.dtype("datetime64[ms]")), + np.dtype("datetime64[ns]")), + ((np.dtype("timedelta64[ms]"), np.dtype("timedelta64[ns]")), + np.dtype("timedelta64[ns]")), + + ((np.dtype("datetime64[ns]"), np.dtype("timedelta64[ns]")), np.object), + ((np.dtype("datetime64[ns]"), np.int64), np.object) +]) +def test_numpy_dtypes(source_dtypes, expected_common_dtype): + assert find_common_type(source_dtypes) == expected_common_dtype + + +def test_raises_empty_input(): + with pytest.raises(ValueError, match="no types given"): + find_common_type([]) + + +@pytest.mark.parametrize("dtypes,exp_type", [ + ([CategoricalDtype()], "category"), + ([np.object, CategoricalDtype()], np.object), + ([CategoricalDtype(), CategoricalDtype()], "category"), +]) +def test_categorical_dtype(dtypes, exp_type): + assert find_common_type(dtypes) == exp_type + + +def test_datetimetz_dtype_match(): + dtype = DatetimeTZDtype(unit="ns", tz="US/Eastern") + assert find_common_type([dtype, dtype]) == "datetime64[ns, US/Eastern]" + + +@pytest.mark.parametrize("dtype2", [ + DatetimeTZDtype(unit="ns", tz="Asia/Tokyo"), + np.dtype("datetime64[ns]"), np.object, np.int64 +]) +def test_datetimetz_dtype_mismatch(dtype2): + dtype = DatetimeTZDtype(unit="ns", tz="US/Eastern") + assert find_common_type([dtype, dtype2]) == np.object + assert find_common_type([dtype2, dtype]) == np.object + + +def test_period_dtype_match(): + dtype = PeriodDtype(freq="D") + assert find_common_type([dtype, dtype]) == "period[D]" + + +@pytest.mark.parametrize("dtype2", [ + DatetimeTZDtype(unit="ns", tz="Asia/Tokyo"), + PeriodDtype(freq="2D"), PeriodDtype(freq="H"), + np.dtype("datetime64[ns]"), np.object, np.int64 +]) +def test_period_dtype_mismatch(dtype2): + dtype = PeriodDtype(freq="D") + assert find_common_type([dtype, dtype2]) == np.object + assert find_common_type([dtype2, dtype]) == np.object diff --git a/pandas/tests/dtypes/cast/test_infer_datetimelike.py b/pandas/tests/dtypes/cast/test_infer_datetimelike.py new file mode 100644 index 0000000000000..b2d63a6bfbd1c --- /dev/null +++ b/pandas/tests/dtypes/cast/test_infer_datetimelike.py @@ -0,0 +1,22 @@ +# -*- coding: utf-8 -*- + +import numpy as np +import pytest + +from pandas import DataFrame, NaT, Series, Timestamp + + +@pytest.mark.parametrize("data,exp_size", [ + # see gh-16362. + ([[NaT, "a", "b", 0], [NaT, "b", "c", 1]], 8), + ([[NaT, "a", 0], [NaT, "b", 1]], 6) +]) +def test_maybe_infer_to_datetimelike_df_construct(data, exp_size): + result = DataFrame(np.array(data)) + assert result.size == exp_size + + +def test_maybe_infer_to_datetimelike_ser_construct(): + # see gh-19671. + result = Series(["M1701", Timestamp("20130101")]) + assert result.dtype.kind == "O" diff --git a/pandas/tests/dtypes/cast/test_infer_dtype.py b/pandas/tests/dtypes/cast/test_infer_dtype.py new file mode 100644 index 0000000000000..c7842ac591ed9 --- /dev/null +++ b/pandas/tests/dtypes/cast/test_infer_dtype.py @@ -0,0 +1,160 @@ +# -*- coding: utf-8 -*- + +from datetime import date, datetime, timedelta + +import numpy as np +import pytest + +from pandas.core.dtypes.cast import ( + cast_scalar_to_array, infer_dtype_from_array, infer_dtype_from_scalar) +from pandas.core.dtypes.common import is_dtype_equal + +from pandas import ( + Categorical, Period, Series, Timedelta, Timestamp, date_range) +from pandas.util import testing as tm + + +@pytest.fixture(params=[True, False]) +def pandas_dtype(request): + return request.param + + +def test_infer_dtype_from_int_scalar(any_int_dtype): + # Test that infer_dtype_from_scalar is + # returning correct dtype for int and float. + data = np.dtype(any_int_dtype).type(12) + dtype, val = infer_dtype_from_scalar(data) + assert dtype == type(data) + + +def test_infer_dtype_from_float_scalar(float_dtype): + float_dtype = np.dtype(float_dtype).type + data = float_dtype(12) + + dtype, val = infer_dtype_from_scalar(data) + assert dtype == float_dtype + + +@pytest.mark.parametrize("data,exp_dtype", [ + (12, np.int64), (np.float(12), np.float64) +]) +def test_infer_dtype_from_python_scalar(data, exp_dtype): + dtype, val = infer_dtype_from_scalar(data) + assert dtype == exp_dtype + + +@pytest.mark.parametrize("bool_val", [True, False]) +def test_infer_dtype_from_boolean(bool_val): + dtype, val = infer_dtype_from_scalar(bool_val) + assert dtype == np.bool_ + + +def test_infer_dtype_from_complex(complex_dtype): + data = np.dtype(complex_dtype).type(1) + dtype, val = infer_dtype_from_scalar(data) + assert dtype == np.complex_ + + +@pytest.mark.parametrize("data", [np.datetime64(1, "ns"), Timestamp(1), + datetime(2000, 1, 1, 0, 0)]) +def test_infer_dtype_from_datetime(data): + dtype, val = infer_dtype_from_scalar(data) + assert dtype == "M8[ns]" + + +@pytest.mark.parametrize("data", [np.timedelta64(1, "ns"), Timedelta(1), + timedelta(1)]) +def test_infer_dtype_from_timedelta(data): + dtype, val = infer_dtype_from_scalar(data) + assert dtype == "m8[ns]" + + +@pytest.mark.parametrize("freq", ["M", "D"]) +def test_infer_dtype_from_period(freq, pandas_dtype): + p = Period("2011-01-01", freq=freq) + dtype, val = infer_dtype_from_scalar(p, pandas_dtype=pandas_dtype) + + if pandas_dtype: + exp_dtype = "period[{0}]".format(freq) + exp_val = p.ordinal + else: + exp_dtype = np.object_ + exp_val = p + + assert dtype == exp_dtype + assert val == exp_val + + +@pytest.mark.parametrize("data", [date(2000, 1, 1), "foo", + Timestamp(1, tz="US/Eastern")]) +def test_infer_dtype_misc(data): + dtype, val = infer_dtype_from_scalar(data) + assert dtype == np.object_ + + +@pytest.mark.parametrize("tz", ["UTC", "US/Eastern", "Asia/Tokyo"]) +def test_infer_from_scalar_tz(tz, pandas_dtype): + dt = Timestamp(1, tz=tz) + dtype, val = infer_dtype_from_scalar(dt, pandas_dtype=pandas_dtype) + + if pandas_dtype: + exp_dtype = "datetime64[ns, {0}]".format(tz) + exp_val = dt.value + else: + exp_dtype = np.object_ + exp_val = dt + + assert dtype == exp_dtype + assert val == exp_val + + +def test_infer_dtype_from_scalar_errors(): + msg = "invalid ndarray passed to infer_dtype_from_scalar" + + with pytest.raises(ValueError, match=msg): + infer_dtype_from_scalar(np.array([1])) + + +@pytest.mark.parametrize( + "arr, expected, pandas_dtype", + [("foo", np.object_, False), + (b"foo", np.object_, False), + (1, np.int_, False), + (1.5, np.float_, False), + ([1], np.int_, False), + (np.array([1], dtype=np.int64), np.int64, False), + ([np.nan, 1, ""], np.object_, False), + (np.array([[1.0, 2.0]]), np.float_, False), + (Categorical(list("aabc")), np.object_, False), + (Categorical([1, 2, 3]), np.int64, False), + (Categorical(list("aabc")), "category", True), + (Categorical([1, 2, 3]), "category", True), + (Timestamp("20160101"), np.object_, False), + (np.datetime64("2016-01-01"), np.dtype("=M8[D]"), False), + (date_range("20160101", periods=3), + np.dtype("=M8[ns]"), False), + (date_range("20160101", periods=3, tz="US/Eastern"), + "datetime64[ns, US/Eastern]", True), + (Series([1., 2, 3]), np.float64, False), + (Series(list("abc")), np.object_, False), + (Series(date_range("20160101", periods=3, tz="US/Eastern")), + "datetime64[ns, US/Eastern]", True)]) +def test_infer_dtype_from_array(arr, expected, pandas_dtype): + dtype, _ = infer_dtype_from_array(arr, pandas_dtype=pandas_dtype) + assert is_dtype_equal(dtype, expected) + + +@pytest.mark.parametrize("obj,dtype", [ + (1, np.int64), (1.1, np.float64), + (Timestamp("2011-01-01"), "datetime64[ns]"), + (Timestamp("2011-01-01", tz="US/Eastern"), np.object), + (Period("2011-01-01", freq="D"), np.object) +]) +def test_cast_scalar_to_array(obj, dtype): + shape = (3, 2) + + exp = np.empty(shape, dtype=dtype) + exp.fill(obj) + + arr = cast_scalar_to_array(shape, obj, dtype=dtype) + tm.assert_numpy_array_equal(arr, exp) diff --git a/pandas/tests/dtypes/test_cast.py b/pandas/tests/dtypes/test_cast.py deleted file mode 100644 index 871e71ea2e4b0..0000000000000 --- a/pandas/tests/dtypes/test_cast.py +++ /dev/null @@ -1,395 +0,0 @@ -# -*- coding: utf-8 -*- - -""" -These test the private routines in types/cast.py - -""" - -from datetime import date, datetime, timedelta - -import numpy as np -import pytest - -from pandas.core.dtypes.cast import ( - cast_scalar_to_array, construct_1d_arraylike_from_scalar, - construct_1d_ndarray_preserving_na, - construct_1d_object_array_from_listlike, find_common_type, - infer_dtype_from_array, infer_dtype_from_scalar, maybe_convert_objects, - maybe_downcast_to_dtype) -from pandas.core.dtypes.common import is_dtype_equal -from pandas.core.dtypes.dtypes import ( - CategoricalDtype, DatetimeTZDtype, PeriodDtype) - -import pandas as pd -from pandas import ( - DataFrame, DatetimeIndex, NaT, Period, Series, Timedelta, Timestamp) -from pandas.util import testing as tm - - -class TestMaybeDowncast(object): - - def test_downcast(self): - # test downcasting - - arr = np.array([8.5, 8.6, 8.7, 8.8, 8.9999999999995]) - result = maybe_downcast_to_dtype(arr, 'infer') - tm.assert_numpy_array_equal(result, arr) - - arr = np.array([8., 8., 8., 8., 8.9999999999995]) - result = maybe_downcast_to_dtype(arr, 'infer') - expected = np.array([8, 8, 8, 8, 9], dtype=np.int64) - tm.assert_numpy_array_equal(result, expected) - - arr = np.array([8., 8., 8., 8., 9.0000000000005]) - result = maybe_downcast_to_dtype(arr, 'infer') - expected = np.array([8, 8, 8, 8, 9], dtype=np.int64) - tm.assert_numpy_array_equal(result, expected) - - # see gh-16875: coercing of booleans. - ser = Series([True, True, False]) - result = maybe_downcast_to_dtype(ser, np.dtype(np.float64)) - expected = ser - tm.assert_series_equal(result, expected) - - @pytest.mark.parametrize("dtype", [np.float64, object, np.int64]) - def test_downcast_conversion_no_nan(self, dtype): - expected = np.array([1, 2]) - arr = np.array([1.0, 2.0], dtype=dtype) - - result = maybe_downcast_to_dtype(arr, "infer") - tm.assert_almost_equal(result, expected, check_dtype=False) - - @pytest.mark.parametrize("dtype", [np.float64, object]) - def test_downcast_conversion_nan(self, dtype): - expected = np.array([1.0, 2.0, np.nan], dtype=dtype) - arr = np.array([1.0, 2.0, np.nan], dtype=dtype) - - result = maybe_downcast_to_dtype(arr, "infer") - tm.assert_almost_equal(result, expected) - - @pytest.mark.parametrize("dtype", [np.int32, np.float64, np.float32, - np.bool_, np.int64, object]) - def test_downcast_conversion_empty(self, dtype): - arr = np.array([], dtype=dtype) - result = maybe_downcast_to_dtype(arr, "int64") - tm.assert_numpy_array_equal(result, np.array([], dtype=np.int64)) - - def test_datetimelikes_nan(self): - arr = np.array([1, 2, np.nan]) - exp = np.array([1, 2, np.datetime64('NaT')], dtype='datetime64[ns]') - res = maybe_downcast_to_dtype(arr, 'datetime64[ns]') - tm.assert_numpy_array_equal(res, exp) - - exp = np.array([1, 2, np.timedelta64('NaT')], dtype='timedelta64[ns]') - res = maybe_downcast_to_dtype(arr, 'timedelta64[ns]') - tm.assert_numpy_array_equal(res, exp) - - def test_datetime_with_timezone(self): - # GH 15426 - ts = Timestamp("2016-01-01 12:00:00", tz='US/Pacific') - exp = DatetimeIndex([ts, ts]) - res = maybe_downcast_to_dtype(exp, exp.dtype) - tm.assert_index_equal(res, exp) - - res = maybe_downcast_to_dtype(exp.asi8, exp.dtype) - tm.assert_index_equal(res, exp) - - -class TestInferDtype(object): - - def test_infer_dtype_from_int_scalar(self, any_int_dtype): - # Test that infer_dtype_from_scalar is - # returning correct dtype for int and float. - data = np.dtype(any_int_dtype).type(12) - dtype, val = infer_dtype_from_scalar(data) - assert dtype == type(data) - - def test_infer_dtype_from_float_scalar(self, float_dtype): - float_dtype = np.dtype(float_dtype).type - data = float_dtype(12) - - dtype, val = infer_dtype_from_scalar(data) - assert dtype == float_dtype - - def test_infer_dtype_from_python_scalar(self): - data = 12 - dtype, val = infer_dtype_from_scalar(data) - assert dtype == np.int64 - - data = np.float(12) - dtype, val = infer_dtype_from_scalar(data) - assert dtype == np.float64 - - @pytest.mark.parametrize("bool_val", [True, False]) - def test_infer_dtype_from_boolean(self, bool_val): - dtype, val = infer_dtype_from_scalar(bool_val) - assert dtype == np.bool_ - - def test_infer_dtype_from_complex(self, complex_dtype): - data = np.dtype(complex_dtype).type(1) - dtype, val = infer_dtype_from_scalar(data) - assert dtype == np.complex_ - - @pytest.mark.parametrize("data", [np.datetime64(1, "ns"), Timestamp(1), - datetime(2000, 1, 1, 0, 0)]) - def test_infer_dtype_from_datetime(self, data): - dtype, val = infer_dtype_from_scalar(data) - assert dtype == "M8[ns]" - - @pytest.mark.parametrize("data", [np.timedelta64(1, "ns"), Timedelta(1), - timedelta(1)]) - def test_infer_dtype_from_timedelta(self, data): - dtype, val = infer_dtype_from_scalar(data) - assert dtype == "m8[ns]" - - @pytest.mark.parametrize("freq", ["M", "D"]) - def test_infer_dtype_from_period(self, freq): - p = Period("2011-01-01", freq=freq) - dtype, val = infer_dtype_from_scalar(p, pandas_dtype=True) - - assert dtype == "period[{0}]".format(freq) - assert val == p.ordinal - - dtype, val = infer_dtype_from_scalar(p) - assert dtype == np.object_ - assert val == p - - @pytest.mark.parametrize("data", [date(2000, 1, 1), "foo", - Timestamp(1, tz="US/Eastern")]) - def test_infer_dtype_misc(self, data): - dtype, val = infer_dtype_from_scalar(data) - assert dtype == np.object_ - - @pytest.mark.parametrize('tz', ['UTC', 'US/Eastern', 'Asia/Tokyo']) - def test_infer_from_scalar_tz(self, tz): - dt = Timestamp(1, tz=tz) - dtype, val = infer_dtype_from_scalar(dt, pandas_dtype=True) - assert dtype == 'datetime64[ns, {0}]'.format(tz) - assert val == dt.value - - dtype, val = infer_dtype_from_scalar(dt) - assert dtype == np.object_ - assert val == dt - - def test_infer_dtype_from_scalar_errors(self): - with pytest.raises(ValueError): - infer_dtype_from_scalar(np.array([1])) - - @pytest.mark.parametrize( - "arr, expected, pandas_dtype", - [('foo', np.object_, False), - (b'foo', np.object_, False), - (1, np.int_, False), - (1.5, np.float_, False), - ([1], np.int_, False), - (np.array([1], dtype=np.int64), np.int64, False), - ([np.nan, 1, ''], np.object_, False), - (np.array([[1.0, 2.0]]), np.float_, False), - (pd.Categorical(list('aabc')), np.object_, False), - (pd.Categorical([1, 2, 3]), np.int64, False), - (pd.Categorical(list('aabc')), 'category', True), - (pd.Categorical([1, 2, 3]), 'category', True), - (Timestamp('20160101'), np.object_, False), - (np.datetime64('2016-01-01'), np.dtype('=M8[D]'), False), - (pd.date_range('20160101', periods=3), - np.dtype('=M8[ns]'), False), - (pd.date_range('20160101', periods=3, tz='US/Eastern'), - 'datetime64[ns, US/Eastern]', True), - (pd.Series([1., 2, 3]), np.float64, False), - (pd.Series(list('abc')), np.object_, False), - (pd.Series(pd.date_range('20160101', periods=3, tz='US/Eastern')), - 'datetime64[ns, US/Eastern]', True)]) - def test_infer_dtype_from_array(self, arr, expected, pandas_dtype): - - dtype, _ = infer_dtype_from_array(arr, pandas_dtype=pandas_dtype) - assert is_dtype_equal(dtype, expected) - - def test_cast_scalar_to_array(self): - arr = cast_scalar_to_array((3, 2), 1, dtype=np.int64) - exp = np.ones((3, 2), dtype=np.int64) - tm.assert_numpy_array_equal(arr, exp) - - arr = cast_scalar_to_array((3, 2), 1.1) - exp = np.empty((3, 2), dtype=np.float64) - exp.fill(1.1) - tm.assert_numpy_array_equal(arr, exp) - - arr = cast_scalar_to_array((2, 3), Timestamp('2011-01-01')) - exp = np.empty((2, 3), dtype='datetime64[ns]') - exp.fill(np.datetime64('2011-01-01')) - tm.assert_numpy_array_equal(arr, exp) - - # pandas dtype is stored as object dtype - obj = Timestamp('2011-01-01', tz='US/Eastern') - arr = cast_scalar_to_array((2, 3), obj) - exp = np.empty((2, 3), dtype=np.object) - exp.fill(obj) - tm.assert_numpy_array_equal(arr, exp) - - obj = Period('2011-01-01', freq='D') - arr = cast_scalar_to_array((2, 3), obj) - exp = np.empty((2, 3), dtype=np.object) - exp.fill(obj) - tm.assert_numpy_array_equal(arr, exp) - - -class TestMaybe(object): - - def test_maybe_infer_to_datetimelike(self): - # GH16362 - # pandas=0.20.1 raises IndexError: tuple index out of range - result = DataFrame(np.array([[NaT, 'a', 'b', 0], - [NaT, 'b', 'c', 1]])) - assert result.size == 8 - # this construction was fine - result = DataFrame(np.array([[NaT, 'a', 0], - [NaT, 'b', 1]])) - assert result.size == 6 - - # GH19671 - result = Series(['M1701', Timestamp('20130101')]) - assert result.dtype.kind == 'O' - - -class TestConvert(object): - - def test_maybe_convert_objects_copy(self): - values = np.array([1, 2]) - - out = maybe_convert_objects(values, copy=False) - assert values is out - - out = maybe_convert_objects(values, copy=True) - assert values is not out - - values = np.array(['apply', 'banana']) - out = maybe_convert_objects(values, copy=False) - assert values is out - - out = maybe_convert_objects(values, copy=True) - assert values is not out - - -class TestCommonTypes(object): - - @pytest.mark.parametrize("source_dtypes,expected_common_dtype", [ - ((np.int64,), np.int64), - ((np.uint64,), np.uint64), - ((np.float32,), np.float32), - ((np.object,), np.object), - - # into ints - ((np.int16, np.int64), np.int64), - ((np.int32, np.uint32), np.int64), - ((np.uint16, np.uint64), np.uint64), - - # into floats - ((np.float16, np.float32), np.float32), - ((np.float16, np.int16), np.float32), - ((np.float32, np.int16), np.float32), - ((np.uint64, np.int64), np.float64), - ((np.int16, np.float64), np.float64), - ((np.float16, np.int64), np.float64), - - # into others - ((np.complex128, np.int32), np.complex128), - ((np.object, np.float32), np.object), - ((np.object, np.int16), np.object), - - # bool with int - ((np.dtype('bool'), np.int64), np.object), - ((np.dtype('bool'), np.int32), np.object), - ((np.dtype('bool'), np.int16), np.object), - ((np.dtype('bool'), np.int8), np.object), - ((np.dtype('bool'), np.uint64), np.object), - ((np.dtype('bool'), np.uint32), np.object), - ((np.dtype('bool'), np.uint16), np.object), - ((np.dtype('bool'), np.uint8), np.object), - - # bool with float - ((np.dtype('bool'), np.float64), np.object), - ((np.dtype('bool'), np.float32), np.object), - - ((np.dtype('datetime64[ns]'), np.dtype('datetime64[ns]')), - np.dtype('datetime64[ns]')), - ((np.dtype('timedelta64[ns]'), np.dtype('timedelta64[ns]')), - np.dtype('timedelta64[ns]')), - - ((np.dtype('datetime64[ns]'), np.dtype('datetime64[ms]')), - np.dtype('datetime64[ns]')), - ((np.dtype('timedelta64[ms]'), np.dtype('timedelta64[ns]')), - np.dtype('timedelta64[ns]')), - - ((np.dtype('datetime64[ns]'), np.dtype('timedelta64[ns]')), - np.object), - ((np.dtype('datetime64[ns]'), np.int64), np.object) - ]) - def test_numpy_dtypes(self, source_dtypes, expected_common_dtype): - assert find_common_type(source_dtypes) == expected_common_dtype - - def test_raises_empty_input(self): - with pytest.raises(ValueError): - find_common_type([]) - - def test_categorical_dtype(self): - dtype = CategoricalDtype() - assert find_common_type([dtype]) == 'category' - assert find_common_type([dtype, dtype]) == 'category' - assert find_common_type([np.object, dtype]) == np.object - - def test_datetimetz_dtype(self): - dtype = DatetimeTZDtype(unit='ns', tz='US/Eastern') - assert find_common_type([dtype, dtype]) == 'datetime64[ns, US/Eastern]' - - for dtype2 in [DatetimeTZDtype(unit='ns', tz='Asia/Tokyo'), - np.dtype('datetime64[ns]'), np.object, np.int64]: - assert find_common_type([dtype, dtype2]) == np.object - assert find_common_type([dtype2, dtype]) == np.object - - def test_period_dtype(self): - dtype = PeriodDtype(freq='D') - assert find_common_type([dtype, dtype]) == 'period[D]' - - for dtype2 in [DatetimeTZDtype(unit='ns', tz='Asia/Tokyo'), - PeriodDtype(freq='2D'), PeriodDtype(freq='H'), - np.dtype('datetime64[ns]'), np.object, np.int64]: - assert find_common_type([dtype, dtype2]) == np.object - assert find_common_type([dtype2, dtype]) == np.object - - @pytest.mark.parametrize('datum1', [1, 2., "3", (4, 5), [6, 7], None]) - @pytest.mark.parametrize('datum2', [8, 9., "10", (11, 12), [13, 14], None]) - def test_cast_1d_array(self, datum1, datum2): - data = [datum1, datum2] - result = construct_1d_object_array_from_listlike(data) - - # Direct comparison fails: https://github.com/numpy/numpy/issues/10218 - assert result.dtype == 'object' - assert list(result) == data - - @pytest.mark.parametrize('val', [1, 2., None]) - def test_cast_1d_array_invalid_scalar(self, val): - pytest.raises(TypeError, construct_1d_object_array_from_listlike, val) - - def test_cast_1d_arraylike_from_scalar_categorical(self): - # GH 19565 - Categorical result from scalar did not maintain categories - # and ordering of the passed dtype - cats = ['a', 'b', 'c'] - cat_type = CategoricalDtype(categories=cats, ordered=False) - expected = pd.Categorical(['a', 'a'], categories=cats) - result = construct_1d_arraylike_from_scalar('a', len(expected), - cat_type) - tm.assert_categorical_equal(result, expected, - check_category_order=True, - check_dtype=True) - - -@pytest.mark.parametrize('values, dtype, expected', [ - ([1, 2, 3], None, np.array([1, 2, 3])), - (np.array([1, 2, 3]), None, np.array([1, 2, 3])), - (['1', '2', None], None, np.array(['1', '2', None])), - (['1', '2', None], np.dtype('str'), np.array(['1', '2', None])), - ([1, 2, None], np.dtype('str'), np.array(['1', '2', None])), -]) -def test_construct_1d_ndarray_preserving_na(values, dtype, expected): - result = construct_1d_ndarray_preserving_na(values, dtype=dtype) - tm.assert_numpy_array_equal(result, expected)