From 0618d9ab984f2915c07cf5dbe683d589fc15d9ab Mon Sep 17 00:00:00 2001 From: jbrockmendel Date: Tue, 7 Apr 2020 18:07:01 -0700 Subject: [PATCH 1/7] REF: collect dtype-less cases up front --- pandas/core/indexes/base.py | 81 ++++++++++++------- .../indexes/interval/test_constructors.py | 2 +- .../tests/indexes/ranges/test_constructors.py | 2 +- 3 files changed, 56 insertions(+), 29 deletions(-) diff --git a/pandas/core/indexes/base.py b/pandas/core/indexes/base.py index d0319e9181bad..e8a7f2d87b3f0 100644 --- a/pandas/core/indexes/base.py +++ b/pandas/core/indexes/base.py @@ -18,6 +18,7 @@ from pandas.util._decorators import Appender, Substitution, cache_readonly, doc from pandas.core.dtypes import concat as _concat +from pandas.core.dtypes.base import ExtensionDtype from pandas.core.dtypes.cast import ( maybe_cast_to_integer_array, validate_numeric_casting, @@ -47,6 +48,7 @@ is_signed_integer_dtype, is_timedelta64_dtype, is_unsigned_integer_dtype, + pandas_dtype, ) from pandas.core.dtypes.concat import concat_compat from pandas.core.dtypes.generic import ( @@ -55,7 +57,6 @@ ABCDatetimeIndex, ABCIntervalIndex, ABCMultiIndex, - ABCPandasArray, ABCPeriodIndex, ABCRangeIndex, ABCSeries, @@ -69,6 +70,7 @@ from pandas.core.arrays import ExtensionArray from pandas.core.base import IndexOpsMixin, PandasObject import pandas.core.common as com +from pandas.core.construction import extract_array from pandas.core.indexers import deprecate_ndim_indexing from pandas.core.indexes.frozen import FrozenList import pandas.core.missing as missing @@ -287,12 +289,59 @@ def __new__( cls, data=None, dtype=None, copy=False, name=None, tupleize_cols=True, **kwargs, ) -> "Index": - from pandas.core.indexes.range import RangeIndex - name = maybe_extract_name(name, data, cls) - if isinstance(data, ABCPandasArray): + if dtype is not None: + dtype = pandas_dtype(dtype) + + if is_scalar(data): + raise cls._scalar_data_error(data) + elif isinstance(dtype, ExtensionDtype) and getattr(dtype, "kind", None) not in [ + "m", + "M", + ]: + # Handled below + pass + elif not isinstance( + data, (range, np.ndarray, ExtensionArray, Index, ABCSeries) + ): + if hasattr(data, "__array__"): + return Index( + np.asarray(data), dtype=dtype, copy=copy, name=name, **kwargs + ) + + if tupleize_cols and is_list_like(data): + # GH#21470: convert iterable to list before determining if empty + if is_iterator(data): + data = list(data) + + if data and all(isinstance(e, tuple) for e in data): + # we must be all tuples, otherwise don't construct + # GH#10697 + from pandas.core.indexes.multi import MultiIndex + + return MultiIndex.from_tuples( + data, names=name or kwargs.get("names") + ) + # other iterable of some kind + subarr = com.asarray_tuplesafe(data, dtype=object) + return Index(subarr, dtype=dtype, copy=copy, name=name, **kwargs) + + # ------------------------------------------------------------------- + # From here down, we know that we have an array-like data + + from pandas.core.indexes.range import RangeIndex + from pandas.core.arrays import PandasArray + + if not isinstance(data, (RangeIndex, ABCMultiIndex)): # ensure users don't accidentally put a PandasArray in an index. + data = extract_array(data, extract_numpy=True) + if isinstance(data, PandasArray): + # ensure users don't accidentally put a PandasArray in an index. + # Note: this check is only necessary for the tests where we patch + # patch PandasArray._typ, because extract_array returns + # a PandasArray even though we passed extract_numpy=True. + # For the same reason, we cant check ABCPandasArray here. data = data.to_numpy() # range @@ -351,7 +400,7 @@ def __new__( return Index(data, dtype=object, copy=copy, name=name, **kwargs) # index-like - elif isinstance(data, (np.ndarray, Index, ABCSeries)): + else: # Delay import for perf. https://github.com/pandas-dev/pandas/pull/31423 from pandas.core.indexes.numeric import ( Float64Index, @@ -399,28 +448,6 @@ def __new__( raise ValueError("Index data must be 1-dimensional") return cls._simple_new(subarr, name) - elif data is None or is_scalar(data): - raise cls._scalar_data_error(data) - elif hasattr(data, "__array__"): - return Index(np.asarray(data), dtype=dtype, copy=copy, name=name, **kwargs) - else: - if tupleize_cols and is_list_like(data): - # GH21470: convert iterable to list before determining if empty - if is_iterator(data): - data = list(data) - - if data and all(isinstance(e, tuple) for e in data): - # we must be all tuples, otherwise don't construct - # 10697 - from pandas.core.indexes.multi import MultiIndex - - return MultiIndex.from_tuples( - data, names=name or kwargs.get("names") - ) - # other iterable of some kind - subarr = com.asarray_tuplesafe(data, dtype=object) - return Index(subarr, dtype=dtype, copy=copy, name=name, **kwargs) - """ NOTE for new Index creation: diff --git a/pandas/tests/indexes/interval/test_constructors.py b/pandas/tests/indexes/interval/test_constructors.py index fa881df8139c6..84b939dc1a0f1 100644 --- a/pandas/tests/indexes/interval/test_constructors.py +++ b/pandas/tests/indexes/interval/test_constructors.py @@ -361,7 +361,7 @@ def test_constructor_errors(self, constructor): # scalar msg = ( - r"IntervalIndex\(...\) must be called with a collection of " + r"Index\(...\) must be called with a collection of " "some kind, 5 was passed" ) with pytest.raises(TypeError, match=msg): diff --git a/pandas/tests/indexes/ranges/test_constructors.py b/pandas/tests/indexes/ranges/test_constructors.py index 426341a53a5d1..b7f673428ae38 100644 --- a/pandas/tests/indexes/ranges/test_constructors.py +++ b/pandas/tests/indexes/ranges/test_constructors.py @@ -43,7 +43,7 @@ def test_constructor_invalid_args(self): r"kind, 0 was passed" ) with pytest.raises(TypeError, match=msg): - Index(0, 1000) + Index(0) @pytest.mark.parametrize( "args", From 95433c19c26a933f130f2878a942dd92e5aea72a Mon Sep 17 00:00:00 2001 From: jbrockmendel Date: Tue, 7 Apr 2020 18:33:10 -0700 Subject: [PATCH 2/7] handle tz kwarg upfront --- pandas/core/indexes/base.py | 11 ++++++----- 1 file changed, 6 insertions(+), 5 deletions(-) diff --git a/pandas/core/indexes/base.py b/pandas/core/indexes/base.py index 04f542eec3282..6900cfa18e0f7 100644 --- a/pandas/core/indexes/base.py +++ b/pandas/core/indexes/base.py @@ -69,6 +69,7 @@ from pandas.core.accessor import CachedAccessor import pandas.core.algorithms as algos from pandas.core.arrays import ExtensionArray +from pandas.core.arrays.datetimes import tz_to_dtype, validate_tz_from_dtype from pandas.core.base import IndexOpsMixin, PandasObject import pandas.core.common as com from pandas.core.construction import extract_array @@ -294,6 +295,10 @@ def __new__( if dtype is not None: dtype = pandas_dtype(dtype) + if "tz" in kwargs: + tz = kwargs.pop("tz") + validate_tz_from_dtype(dtype, tz) + dtype = tz_to_dtype(tz) if is_scalar(data): raise cls._scalar_data_error(data) @@ -365,11 +370,7 @@ def __new__( return _maybe_asobject(dtype, IntervalIndex, data, copy, name, **kwargs) - elif ( - is_datetime64_any_dtype(data) - or is_datetime64_any_dtype(dtype) - or "tz" in kwargs - ): + elif is_datetime64_any_dtype(data) or is_datetime64_any_dtype(dtype): # Delay import for perf. https://github.com/pandas-dev/pandas/pull/31423 from pandas import DatetimeIndex From 3500cac9fc2450868543d9451e1597b8ba7734bf Mon Sep 17 00:00:00 2001 From: jbrockmendel Date: Wed, 8 Apr 2020 13:53:46 -0700 Subject: [PATCH 3/7] revert --- pandas/core/indexes/base.py | 92 +++++++++++++------------------------ 1 file changed, 32 insertions(+), 60 deletions(-) diff --git a/pandas/core/indexes/base.py b/pandas/core/indexes/base.py index 6900cfa18e0f7..df58593bc930c 100644 --- a/pandas/core/indexes/base.py +++ b/pandas/core/indexes/base.py @@ -19,7 +19,6 @@ from pandas.util._decorators import Appender, Substitution, cache_readonly, doc from pandas.core.dtypes import concat as _concat -from pandas.core.dtypes.base import ExtensionDtype from pandas.core.dtypes.cast import ( maybe_cast_to_integer_array, validate_numeric_casting, @@ -49,7 +48,6 @@ is_signed_integer_dtype, is_timedelta64_dtype, is_unsigned_integer_dtype, - pandas_dtype, ) from pandas.core.dtypes.concat import concat_compat from pandas.core.dtypes.generic import ( @@ -58,6 +56,7 @@ ABCDatetimeIndex, ABCIntervalIndex, ABCMultiIndex, + ABCPandasArray, ABCPeriodIndex, ABCRangeIndex, ABCSeries, @@ -69,10 +68,8 @@ from pandas.core.accessor import CachedAccessor import pandas.core.algorithms as algos from pandas.core.arrays import ExtensionArray -from pandas.core.arrays.datetimes import tz_to_dtype, validate_tz_from_dtype from pandas.core.base import IndexOpsMixin, PandasObject import pandas.core.common as com -from pandas.core.construction import extract_array from pandas.core.indexers import deprecate_ndim_indexing from pandas.core.indexes.frozen import FrozenList import pandas.core.missing as missing @@ -291,63 +288,12 @@ def __new__( cls, data=None, dtype=None, copy=False, name=None, tupleize_cols=True, **kwargs, ) -> "Index": - name = maybe_extract_name(name, data, cls) - - if dtype is not None: - dtype = pandas_dtype(dtype) - if "tz" in kwargs: - tz = kwargs.pop("tz") - validate_tz_from_dtype(dtype, tz) - dtype = tz_to_dtype(tz) - - if is_scalar(data): - raise cls._scalar_data_error(data) - elif isinstance(dtype, ExtensionDtype) and getattr(dtype, "kind", None) not in [ - "m", - "M", - ]: - # Handled below - pass - elif not isinstance( - data, (range, np.ndarray, ExtensionArray, Index, ABCSeries) - ): - if hasattr(data, "__array__"): - return Index( - np.asarray(data), dtype=dtype, copy=copy, name=name, **kwargs - ) - - if tupleize_cols and is_list_like(data): - # GH#21470: convert iterable to list before determining if empty - if is_iterator(data): - data = list(data) - - if data and all(isinstance(e, tuple) for e in data): - # we must be all tuples, otherwise don't construct - # GH#10697 - from pandas.core.indexes.multi import MultiIndex - - return MultiIndex.from_tuples( - data, names=name or kwargs.get("names") - ) - # other iterable of some kind - subarr = com.asarray_tuplesafe(data, dtype=object) - return Index(subarr, dtype=dtype, copy=copy, name=name, **kwargs) - - # ------------------------------------------------------------------- - # From here down, we know that we have an array-like data - from pandas.core.indexes.range import RangeIndex - from pandas.core.arrays import PandasArray - if not isinstance(data, (RangeIndex, ABCMultiIndex)): - # ensure users don't accidentally put a PandasArray in an index. - data = extract_array(data, extract_numpy=True) - if isinstance(data, PandasArray): + name = maybe_extract_name(name, data, cls) + + if isinstance(data, ABCPandasArray): # ensure users don't accidentally put a PandasArray in an index. - # Note: this check is only necessary for the tests where we patch - # patch PandasArray._typ, because extract_array returns - # a PandasArray even though we passed extract_numpy=True. - # For the same reason, we cant check ABCPandasArray here. data = data.to_numpy() # range @@ -370,7 +316,11 @@ def __new__( return _maybe_asobject(dtype, IntervalIndex, data, copy, name, **kwargs) - elif is_datetime64_any_dtype(data) or is_datetime64_any_dtype(dtype): + elif ( + is_datetime64_any_dtype(data) + or is_datetime64_any_dtype(dtype) + or "tz" in kwargs + ): # Delay import for perf. https://github.com/pandas-dev/pandas/pull/31423 from pandas import DatetimeIndex @@ -402,7 +352,7 @@ def __new__( return Index(data, dtype=object, copy=copy, name=name, **kwargs) # index-like - else: + elif isinstance(data, (np.ndarray, Index, ABCSeries)): # Delay import for perf. https://github.com/pandas-dev/pandas/pull/31423 from pandas.core.indexes.numeric import ( Float64Index, @@ -450,6 +400,28 @@ def __new__( raise ValueError("Index data must be 1-dimensional") return cls._simple_new(subarr, name) + elif data is None or is_scalar(data): + raise cls._scalar_data_error(data) + elif hasattr(data, "__array__"): + return Index(np.asarray(data), dtype=dtype, copy=copy, name=name, **kwargs) + else: + if tupleize_cols and is_list_like(data): + # GH21470: convert iterable to list before determining if empty + if is_iterator(data): + data = list(data) + + if data and all(isinstance(e, tuple) for e in data): + # we must be all tuples, otherwise don't construct + # 10697 + from pandas.core.indexes.multi import MultiIndex + + return MultiIndex.from_tuples( + data, names=name or kwargs.get("names") + ) + # other iterable of some kind + subarr = com.asarray_tuplesafe(data, dtype=object) + return Index(subarr, dtype=dtype, copy=copy, name=name, **kwargs) + """ NOTE for new Index creation: From fe43626802ea7df99054fdbceaaa0444b38123af Mon Sep 17 00:00:00 2001 From: jbrockmendel Date: Wed, 8 Apr 2020 13:54:05 -0700 Subject: [PATCH 4/7] revert --- pandas/tests/indexes/interval/test_constructors.py | 2 +- pandas/tests/indexes/ranges/test_constructors.py | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/pandas/tests/indexes/interval/test_constructors.py b/pandas/tests/indexes/interval/test_constructors.py index 84b939dc1a0f1..fa881df8139c6 100644 --- a/pandas/tests/indexes/interval/test_constructors.py +++ b/pandas/tests/indexes/interval/test_constructors.py @@ -361,7 +361,7 @@ def test_constructor_errors(self, constructor): # scalar msg = ( - r"Index\(...\) must be called with a collection of " + r"IntervalIndex\(...\) must be called with a collection of " "some kind, 5 was passed" ) with pytest.raises(TypeError, match=msg): diff --git a/pandas/tests/indexes/ranges/test_constructors.py b/pandas/tests/indexes/ranges/test_constructors.py index b7f673428ae38..426341a53a5d1 100644 --- a/pandas/tests/indexes/ranges/test_constructors.py +++ b/pandas/tests/indexes/ranges/test_constructors.py @@ -43,7 +43,7 @@ def test_constructor_invalid_args(self): r"kind, 0 was passed" ) with pytest.raises(TypeError, match=msg): - Index(0) + Index(0, 1000) @pytest.mark.parametrize( "args", From e86e82e9246cebe7a8c754240d72877282ffb71d Mon Sep 17 00:00:00 2001 From: jbrockmendel Date: Wed, 8 Apr 2020 13:58:33 -0700 Subject: [PATCH 5/7] Call pandas_dtype up front in Index.__new__ --- pandas/core/indexes/base.py | 27 +++++++++++++++++---------- 1 file changed, 17 insertions(+), 10 deletions(-) diff --git a/pandas/core/indexes/base.py b/pandas/core/indexes/base.py index df58593bc930c..f17dff307b6b2 100644 --- a/pandas/core/indexes/base.py +++ b/pandas/core/indexes/base.py @@ -48,6 +48,7 @@ is_signed_integer_dtype, is_timedelta64_dtype, is_unsigned_integer_dtype, + pandas_dtype, ) from pandas.core.dtypes.concat import concat_compat from pandas.core.dtypes.generic import ( @@ -68,6 +69,7 @@ from pandas.core.accessor import CachedAccessor import pandas.core.algorithms as algos from pandas.core.arrays import ExtensionArray +from pandas.core.arrays.datetimes import tz_to_dtype, validate_tz_from_dtype from pandas.core.base import IndexOpsMixin, PandasObject import pandas.core.common as com from pandas.core.indexers import deprecate_ndim_indexing @@ -292,10 +294,19 @@ def __new__( name = maybe_extract_name(name, data, cls) + if dtype is not None: + dtype = pandas_dtype(dtype) + if "tz" in kwargs: + tz = kwargs.pop("tz") + validate_tz_from_dtype(dtype, tz) + dtype = tz_to_dtype(tz) + if isinstance(data, ABCPandasArray): # ensure users don't accidentally put a PandasArray in an index. data = data.to_numpy() + data_dype = getattr(data, "dtype", None) + # range if isinstance(data, RangeIndex): return RangeIndex(start=data, copy=copy, dtype=dtype, name=name) @@ -303,43 +314,39 @@ def __new__( return RangeIndex.from_range(data, dtype=dtype, name=name) # categorical - elif is_categorical_dtype(data) or is_categorical_dtype(dtype): + elif is_categorical_dtype(data_dype) or is_categorical_dtype(dtype): # Delay import for perf. https://github.com/pandas-dev/pandas/pull/31423 from pandas.core.indexes.category import CategoricalIndex return _maybe_asobject(dtype, CategoricalIndex, data, copy, name, **kwargs) # interval - elif is_interval_dtype(data) or is_interval_dtype(dtype): + elif is_interval_dtype(data_dype) or is_interval_dtype(dtype): # Delay import for perf. https://github.com/pandas-dev/pandas/pull/31423 from pandas.core.indexes.interval import IntervalIndex return _maybe_asobject(dtype, IntervalIndex, data, copy, name, **kwargs) - elif ( - is_datetime64_any_dtype(data) - or is_datetime64_any_dtype(dtype) - or "tz" in kwargs - ): + elif is_datetime64_any_dtype(data_dype) or is_datetime64_any_dtype(dtype): # Delay import for perf. https://github.com/pandas-dev/pandas/pull/31423 from pandas import DatetimeIndex return _maybe_asobject(dtype, DatetimeIndex, data, copy, name, **kwargs) - elif is_timedelta64_dtype(data) or is_timedelta64_dtype(dtype): + elif is_timedelta64_dtype(data_dype) or is_timedelta64_dtype(dtype): # Delay import for perf. https://github.com/pandas-dev/pandas/pull/31423 from pandas import TimedeltaIndex return _maybe_asobject(dtype, TimedeltaIndex, data, copy, name, **kwargs) - elif is_period_dtype(data) or is_period_dtype(dtype): + elif is_period_dtype(data_dype) or is_period_dtype(dtype): # Delay import for perf. https://github.com/pandas-dev/pandas/pull/31423 from pandas import PeriodIndex return _maybe_asobject(dtype, PeriodIndex, data, copy, name, **kwargs) # extension dtype - elif is_extension_array_dtype(data) or is_extension_array_dtype(dtype): + elif is_extension_array_dtype(data_dype) or is_extension_array_dtype(dtype): if not (dtype is None or is_object_dtype(dtype)): # coerce to the provided dtype ea_cls = dtype.construct_array_type() From ec7442cfe3b6478444a7173f00c49b149251e095 Mon Sep 17 00:00:00 2001 From: jbrockmendel Date: Wed, 8 Apr 2020 15:03:10 -0700 Subject: [PATCH 6/7] update test --- pandas/tests/indexes/ranges/test_constructors.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pandas/tests/indexes/ranges/test_constructors.py b/pandas/tests/indexes/ranges/test_constructors.py index 426341a53a5d1..b7f673428ae38 100644 --- a/pandas/tests/indexes/ranges/test_constructors.py +++ b/pandas/tests/indexes/ranges/test_constructors.py @@ -43,7 +43,7 @@ def test_constructor_invalid_args(self): r"kind, 0 was passed" ) with pytest.raises(TypeError, match=msg): - Index(0, 1000) + Index(0) @pytest.mark.parametrize( "args", From 770056959ec4f0c02cf813c492b2cfcdf72339b2 Mon Sep 17 00:00:00 2001 From: jbrockmendel Date: Fri, 10 Apr 2020 11:05:18 -0700 Subject: [PATCH 7/7] typo, unused import --- pandas/core/indexes/base.py | 14 +++++++------- pandas/core/internals/blocks.py | 2 +- 2 files changed, 8 insertions(+), 8 deletions(-) diff --git a/pandas/core/indexes/base.py b/pandas/core/indexes/base.py index b1ed6feec516f..530aaee24c7fb 100644 --- a/pandas/core/indexes/base.py +++ b/pandas/core/indexes/base.py @@ -305,7 +305,7 @@ def __new__( # ensure users don't accidentally put a PandasArray in an index. data = data.to_numpy() - data_dype = getattr(data, "dtype", None) + data_dtype = getattr(data, "dtype", None) # range if isinstance(data, RangeIndex): @@ -314,39 +314,39 @@ def __new__( return RangeIndex.from_range(data, dtype=dtype, name=name) # categorical - elif is_categorical_dtype(data_dype) or is_categorical_dtype(dtype): + elif is_categorical_dtype(data_dtype) or is_categorical_dtype(dtype): # Delay import for perf. https://github.com/pandas-dev/pandas/pull/31423 from pandas.core.indexes.category import CategoricalIndex return _maybe_asobject(dtype, CategoricalIndex, data, copy, name, **kwargs) # interval - elif is_interval_dtype(data_dype) or is_interval_dtype(dtype): + elif is_interval_dtype(data_dtype) or is_interval_dtype(dtype): # Delay import for perf. https://github.com/pandas-dev/pandas/pull/31423 from pandas.core.indexes.interval import IntervalIndex return _maybe_asobject(dtype, IntervalIndex, data, copy, name, **kwargs) - elif is_datetime64_any_dtype(data_dype) or is_datetime64_any_dtype(dtype): + elif is_datetime64_any_dtype(data_dtype) or is_datetime64_any_dtype(dtype): # Delay import for perf. https://github.com/pandas-dev/pandas/pull/31423 from pandas import DatetimeIndex return _maybe_asobject(dtype, DatetimeIndex, data, copy, name, **kwargs) - elif is_timedelta64_dtype(data_dype) or is_timedelta64_dtype(dtype): + elif is_timedelta64_dtype(data_dtype) or is_timedelta64_dtype(dtype): # Delay import for perf. https://github.com/pandas-dev/pandas/pull/31423 from pandas import TimedeltaIndex return _maybe_asobject(dtype, TimedeltaIndex, data, copy, name, **kwargs) - elif is_period_dtype(data_dype) or is_period_dtype(dtype): + elif is_period_dtype(data_dtype) or is_period_dtype(dtype): # Delay import for perf. https://github.com/pandas-dev/pandas/pull/31423 from pandas import PeriodIndex return _maybe_asobject(dtype, PeriodIndex, data, copy, name, **kwargs) # extension dtype - elif is_extension_array_dtype(data_dype) or is_extension_array_dtype(dtype): + elif is_extension_array_dtype(data_dtype) or is_extension_array_dtype(dtype): if not (dtype is None or is_object_dtype(dtype)): # coerce to the provided dtype ea_cls = dtype.construct_array_type() diff --git a/pandas/core/internals/blocks.py b/pandas/core/internals/blocks.py index 75c935cdf2e60..80573f32b936e 100644 --- a/pandas/core/internals/blocks.py +++ b/pandas/core/internals/blocks.py @@ -8,7 +8,7 @@ from pandas._libs import NaT, algos as libalgos, lib, writers import pandas._libs.internals as libinternals -from pandas._libs.tslibs import Timedelta, conversion +from pandas._libs.tslibs import conversion from pandas._libs.tslibs.timezones import tz_compare from pandas._typing import ArrayLike from pandas.util._validators import validate_bool_kwarg