diff --git a/pandas/core/algorithms.py b/pandas/core/algorithms.py index d616e3f92aa4d..9101fca58d5fa 100644 --- a/pandas/core/algorithms.py +++ b/pandas/core/algorithms.py @@ -10,8 +10,9 @@ maybe_promote, construct_1d_object_array_from_listlike) from pandas.core.dtypes.generic import ( ABCSeries, ABCIndex, - ABCIndexClass, ABCCategorical) + ABCIndexClass) from pandas.core.dtypes.common import ( + is_array_like, is_unsigned_integer_dtype, is_signed_integer_dtype, is_integer_dtype, is_complex_dtype, is_object_dtype, @@ -168,8 +169,7 @@ def _ensure_arraylike(values): """ ensure that we are arraylike if not already """ - if not isinstance(values, (np.ndarray, ABCCategorical, - ABCIndexClass, ABCSeries)): + if not is_array_like(values): inferred = lib.infer_dtype(values) if inferred in ['mixed', 'string', 'unicode']: if isinstance(values, tuple): @@ -353,11 +353,8 @@ def unique(values): values = _ensure_arraylike(values) - # categorical is a fast-path - # this will coerce Categorical, CategoricalIndex, - # and category dtypes Series to same return of Category - if is_categorical_dtype(values): - values = getattr(values, '.values', values) + if is_extension_array_dtype(values): + # Dispatch to extension dtype's unique. return values.unique() original = values diff --git a/pandas/core/arrays/base.py b/pandas/core/arrays/base.py index 37074b563efbd..1f33081a5f610 100644 --- a/pandas/core/arrays/base.py +++ b/pandas/core/arrays/base.py @@ -236,6 +236,18 @@ def isna(self): """ raise AbstractMethodError(self) + def unique(self): + """Compute the ExtensionArray of unique values. + + Returns + ------- + uniques : ExtensionArray + """ + from pandas import unique + + uniques = unique(self.astype(object)) + return self._constructor_from_sequence(uniques) + # ------------------------------------------------------------------------ # Indexing methods # ------------------------------------------------------------------------ diff --git a/pandas/tests/extension/base/methods.py b/pandas/tests/extension/base/methods.py index 74e5d180b1aa3..7ce80e25d8cf6 100644 --- a/pandas/tests/extension/base/methods.py +++ b/pandas/tests/extension/base/methods.py @@ -31,3 +31,14 @@ def test_count(self, data_missing): def test_apply_simple_series(self, data): result = pd.Series(data).apply(id) assert isinstance(result, pd.Series) + + @pytest.mark.parametrize('box', [pd.Series, lambda x: x]) + @pytest.mark.parametrize('method', [lambda x: x.unique(), pd.unique]) + def test_unique(self, data, box, method): + duplicated = box(data._constructor_from_sequence([data[0], data[0]])) + + result = method(duplicated) + + assert len(result) == 1 + assert isinstance(result, type(data)) + assert result[0] == duplicated[0] diff --git a/pandas/tests/extension/json/array.py b/pandas/tests/extension/json/array.py index 21addf9d1549f..322944129146a 100644 --- a/pandas/tests/extension/json/array.py +++ b/pandas/tests/extension/json/array.py @@ -88,6 +88,13 @@ def take(self, indexer, allow_fill=True, fill_value=None): def copy(self, deep=False): return type(self)(self.data[:]) + def unique(self): + # Parent method doesn't work since np.array will try to infer + # a 2-dim object. + return type(self)([ + dict(x) for x in list(set(tuple(d.items()) for d in self.data)) + ]) + @property def _na_value(self): return {}