Skip to content

CLN: private funcs in concat.py #36726

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 7 commits into from
Oct 4, 2020
Merged
Changes from 4 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
103 changes: 58 additions & 45 deletions pandas/core/internals/concat.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
from collections import defaultdict
import copy
from typing import Dict, List
from typing import Any, Dict, List, Sequence, Tuple

import numpy as np

Expand Down Expand Up @@ -344,7 +344,7 @@ def _concatenate_join_units(join_units, concat_axis, copy):
return concat_values


def _get_empty_dtype_and_na(join_units):
def _get_empty_dtype_and_na(join_units: Sequence[JoinUnit]) -> Tuple[DtypeObj, Any]:
"""
Return dtype and N/A values to use when concatenating specified units.

Expand Down Expand Up @@ -374,45 +374,8 @@ def _get_empty_dtype_and_na(join_units):
else:
dtypes[i] = unit.dtype

upcast_classes: Dict[str, List[DtypeObj]] = defaultdict(list)
null_upcast_classes: Dict[str, List[DtypeObj]] = defaultdict(list)
for dtype, unit in zip(dtypes, join_units):
if dtype is None:
continue
upcast_classes = _get_upcast_classes(join_units, dtypes)

if is_categorical_dtype(dtype):
upcast_cls = "category"
elif is_datetime64tz_dtype(dtype):
upcast_cls = "datetimetz"

elif is_extension_array_dtype(dtype):
upcast_cls = "extension"

elif issubclass(dtype.type, np.bool_):
upcast_cls = "bool"
elif issubclass(dtype.type, np.object_):
upcast_cls = "object"
elif is_datetime64_dtype(dtype):
upcast_cls = "datetime"
elif is_timedelta64_dtype(dtype):
upcast_cls = "timedelta"
elif is_sparse(dtype):
upcast_cls = dtype.subtype.name
elif is_float_dtype(dtype) or is_numeric_dtype(dtype):
upcast_cls = dtype.name
else:
upcast_cls = "float"

# Null blocks should not influence upcast class selection, unless there
# are only null blocks, when same upcasting rules must be applied to
# null upcast classes.
if unit.is_na:
null_upcast_classes[upcast_cls].append(dtype)
else:
upcast_classes[upcast_cls].append(dtype)

if not upcast_classes:
upcast_classes = null_upcast_classes
# TODO: de-duplicate with maybe_promote?
# create the result
if "extension" in upcast_classes:
Expand Down Expand Up @@ -441,23 +404,73 @@ def _get_empty_dtype_and_na(join_units):
return np.dtype("m8[ns]"), np.timedelta64("NaT", "ns")
else: # pragma
try:
g = np.find_common_type(upcast_classes, [])
common_dtype = np.find_common_type(upcast_classes, [])
except TypeError:
# At least one is an ExtensionArray
return np.dtype(np.object_), np.nan
else:
if is_float_dtype(g):
return g, g.type(np.nan)
elif is_numeric_dtype(g):
if is_float_dtype(common_dtype):
return common_dtype, common_dtype.type(np.nan)
elif is_numeric_dtype(common_dtype):
if has_none_blocks:
return np.dtype(np.float64), np.nan
else:
return g, None
return common_dtype, None

msg = "invalid dtype determination in get_concat_dtype"
raise AssertionError(msg)


def _get_upcast_classes(
join_units: Sequence[JoinUnit],
dtypes: Sequence[DtypeObj],
) -> Dict[str, List[DtypeObj]]:
upcast_classes: Dict[str, List[DtypeObj]] = defaultdict(list)
null_upcast_classes: Dict[str, List[DtypeObj]] = defaultdict(list)
for dtype, unit in zip(dtypes, join_units):
if dtype is None:
continue

upcast_cls = _select_upcast_cls_from_dtype(dtype)
# Null blocks should not influence upcast class selection, unless there
# are only null blocks, when same upcasting rules must be applied to
# null upcast classes.
if unit.is_na:
null_upcast_classes[upcast_cls].append(dtype)
else:
upcast_classes[upcast_cls].append(dtype)

if not upcast_classes:
upcast_classes = null_upcast_classes

return upcast_classes


def _select_upcast_cls_from_dtype(dtype: DtypeObj) -> str:
if is_categorical_dtype(dtype):
return "category"
elif is_datetime64tz_dtype(dtype):
return "datetimetz"
elif is_extension_array_dtype(dtype):
return "extension"
elif issubclass(dtype.type, np.bool_):
return "bool"
elif issubclass(dtype.type, np.object_):
return "object"
elif is_datetime64_dtype(dtype):
return "datetime"
elif is_timedelta64_dtype(dtype):
return "timedelta"
elif is_sparse(dtype):
# parent class ExtensionDtypes does not have attribute 'subtype',
# only some subclasses do (like SparseDtype), thus mypy error.
return dtype.subtype.name # type: ignore [union-attr]
elif is_float_dtype(dtype) or is_numeric_dtype(dtype):
return dtype.name
else:
return "float"


def _is_uniform_join_units(join_units: List[JoinUnit]) -> bool:
"""
Check if the join units consist of blocks of uniform type that can
Expand Down