diff --git a/pandas/core/arrays/categorical.py b/pandas/core/arrays/categorical.py index 612e54ba426f3..ce174baa66a97 100644 --- a/pandas/core/arrays/categorical.py +++ b/pandas/core/arrays/categorical.py @@ -2678,7 +2678,7 @@ def _convert_to_list_like(list_like): return [list_like] -def _factorize_from_iterable(values): +def factorize_from_iterable(values): """ Factorize an input `values` into `categories` and `codes`. Preserves categorical dtype in `categories`. @@ -2716,9 +2716,9 @@ def _factorize_from_iterable(values): return codes, categories -def _factorize_from_iterables(iterables): +def factorize_from_iterables(iterables): """ - A higher-level wrapper over `_factorize_from_iterable`. + A higher-level wrapper over `factorize_from_iterable`. *This is an internal function* @@ -2733,9 +2733,9 @@ def _factorize_from_iterables(iterables): Notes ----- - See `_factorize_from_iterable` for more info. + See `factorize_from_iterable` for more info. """ if len(iterables) == 0: # For consistency, it should return a list of 2 lists. return [[], []] - return map(list, zip(*(_factorize_from_iterable(it) for it in iterables))) + return map(list, zip(*(factorize_from_iterable(it) for it in iterables))) diff --git a/pandas/core/indexes/multi.py b/pandas/core/indexes/multi.py index f968a9eb4103c..66deacac37789 100644 --- a/pandas/core/indexes/multi.py +++ b/pandas/core/indexes/multi.py @@ -31,7 +31,7 @@ import pandas.core.algorithms as algos from pandas.core.arrays import Categorical -from pandas.core.arrays.categorical import _factorize_from_iterables +from pandas.core.arrays.categorical import factorize_from_iterables import pandas.core.common as com import pandas.core.indexes.base as ibase from pandas.core.indexes.base import ( @@ -440,7 +440,7 @@ def from_arrays(cls, arrays, sortorder=None, names=_no_default_names): if len(arrays[i]) != len(arrays[i - 1]): raise ValueError("all arrays must be same length") - codes, levels = _factorize_from_iterables(arrays) + codes, levels = factorize_from_iterables(arrays) if names is _no_default_names: names = [getattr(arr, "name", None) for arr in arrays] @@ -562,7 +562,7 @@ def from_product(cls, iterables, sortorder=None, names=_no_default_names): elif is_iterator(iterables): iterables = list(iterables) - codes, levels = _factorize_from_iterables(iterables) + codes, levels = factorize_from_iterables(iterables) if names is _no_default_names: names = [getattr(it, "name", None) for it in iterables] diff --git a/pandas/core/reshape/concat.py b/pandas/core/reshape/concat.py index bbf41fc28e9d2..c11915c00c59d 100644 --- a/pandas/core/reshape/concat.py +++ b/pandas/core/reshape/concat.py @@ -8,8 +8,8 @@ from pandas import DataFrame, Index, MultiIndex, Series from pandas.core.arrays.categorical import ( - _factorize_from_iterable, - _factorize_from_iterables, + factorize_from_iterable, + factorize_from_iterables, ) import pandas.core.common as com from pandas.core.generic import NDFrame @@ -604,7 +604,7 @@ def _make_concat_multiindex(indexes, keys, levels=None, names=None): names = [None] * len(zipped) if levels is None: - _, levels = _factorize_from_iterables(zipped) + _, levels = factorize_from_iterables(zipped) else: levels = [ensure_index(x) for x in levels] else: @@ -645,7 +645,7 @@ def _make_concat_multiindex(indexes, keys, levels=None, names=None): levels.extend(concat_index.levels) codes_list.extend(concat_index.codes) else: - codes, categories = _factorize_from_iterable(concat_index) + codes, categories = factorize_from_iterable(concat_index) levels.append(categories) codes_list.append(codes) diff --git a/pandas/core/reshape/reshape.py b/pandas/core/reshape/reshape.py index 949d8f1bfb09c..d7eae1c543804 100644 --- a/pandas/core/reshape/reshape.py +++ b/pandas/core/reshape/reshape.py @@ -22,7 +22,7 @@ import pandas.core.algorithms as algos from pandas.core.arrays import SparseArray -from pandas.core.arrays.categorical import _factorize_from_iterable +from pandas.core.arrays.categorical import factorize_from_iterable from pandas.core.construction import extract_array from pandas.core.frame import DataFrame from pandas.core.index import Index, MultiIndex @@ -504,7 +504,7 @@ def stack(frame, level=-1, dropna=True): def factorize(index): if index.is_unique: return index, np.arange(len(index)) - codes, categories = _factorize_from_iterable(index) + codes, categories = factorize_from_iterable(index) return categories, codes N, K = frame.shape @@ -725,7 +725,7 @@ def _convert_level_number(level_num, columns): new_names = list(this.index.names) new_codes = [lab.repeat(levsize) for lab in this.index.codes] else: - old_codes, old_levels = _factorize_from_iterable(this.index) + old_codes, old_levels = factorize_from_iterable(this.index) new_levels = [old_levels] new_codes = [old_codes.repeat(levsize)] new_names = [this.index.name] # something better? @@ -949,7 +949,7 @@ def _get_dummies_1d( from pandas.core.reshape.concat import concat # Series avoids inconsistent NaN handling - codes, levels = _factorize_from_iterable(Series(data)) + codes, levels = factorize_from_iterable(Series(data)) if dtype is None: dtype = np.uint8