pandas-dev · jorisvandenbossche · Apr 9, 2017 · Apr 7, 2017 · Apr 7, 2017 · Apr 7, 2017
diff --git a/doc/source/whatsnew/v0.20.0.txt b/doc/source/whatsnew/v0.20.0.txt
@@ -593,6 +593,76 @@ result. On the other hand, this might have backward incompatibilities: e.g.
 compared to numpy arrays, ``Index`` objects are not mutable. To get the original
 ndarray, you can always convert explicitly using ``np.asarray(idx.hour)``.
 
+.. _whatsnew_0200.api_breaking.unique:
+
+pd.unique will now be consistent with extension types
+^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
+
+In prior versions, using ``Series.unique()`` and ``pd.unique(Series)`` on ``Categorical`` and tz-aware
+datatypes would yield different return types. These are now made consistent. (:issue:`15903`)
+
+- Datetime tz-aware
+
+  Previous behaviour:
+
+  .. code-block:: ipython
+
+     # Series
+     In [5]: pd.Series([pd.Timestamp('20160101', tz='US/Eastern'),
+                        pd.Timestamp('20160101', tz='US/Eastern')]).unique()
+     Out[5]: array([Timestamp('2016-01-01 00:00:00-0500', tz='US/Eastern')], dtype=object)
+
+     In [6]: pd.unique(pd.Series([pd.Timestamp('20160101', tz='US/Eastern'),
+                                  pd.Timestamp('20160101', tz='US/Eastern')]))
+     Out[6]: array(['2016-01-01T05:00:00.000000000'], dtype='datetime64[ns]')
+
+     # Index
+     In [7]: pd.Index([pd.Timestamp('20160101', tz='US/Eastern'),
+                       pd.Timestamp('20160101', tz='US/Eastern')]).unique()
+     Out[7]: DatetimeIndex(['2016-01-01 00:00:00-05:00'], dtype='datetime64[ns, US/Eastern]', freq=None)
+
+     In [8]: pd.unique([pd.Timestamp('20160101', tz='US/Eastern'),
+                        pd.Timestamp('20160101', tz='US/Eastern')])
+     Out[8]: array(['2016-01-01T05:00:00.000000000'], dtype='datetime64[ns]')
+
+  New Behavior:
+
+  .. ipython:: python
+
+     # Series, returns an array of Timestamp tz-aware
+     pd.Series([pd.Timestamp('20160101', tz='US/Eastern'),
+               pd.Timestamp('20160101', tz='US/Eastern')]).unique()
+     pd.unique(pd.Series([pd.Timestamp('20160101', tz='US/Eastern'),
+                          pd.Timestamp('20160101', tz='US/Eastern')]))
+
+     # Index, returns a DatetimeIndex
+     pd.Index([pd.Timestamp('20160101', tz='US/Eastern'),
+               pd.Timestamp('20160101', tz='US/Eastern')]).unique()
+     pd.unique(pd.Index([pd.Timestamp('20160101', tz='US/Eastern'),
+                         pd.Timestamp('20160101', tz='US/Eastern')]))
+
+- Categoricals
+
+  Previous behaviour:
+
+  .. code-block:: ipython
+
+     In [1]: pd.Series(pd.Categorical(list('baabc'))).unique()
+     Out[1]:
+     [b, a, c]
+     Categories (3, object): [b, a, c]
+
+     In [2]: pd.unique(pd.Series(pd.Categorical(list('baabc'))))
+     Out[2]: array(['b', 'a', 'c'], dtype=object)
+
+  New Behavior:
+
+  .. ipython:: python
+
+     # returns a Categorical
+     pd.Series(pd.Categorical(list('baabc'))).unique()
+     pd.unique(pd.Series(pd.Categorical(list('baabc'))).unique())
+
 .. _whatsnew_0200.api_breaking.s3:
 
 S3 File Handling
@@ -1148,6 +1218,7 @@ Conversion
 - Bug in ``DataFrame`` construction with nulls and datetimes in a list-like (:issue:`15869`)
 - Bug in ``DataFrame.fillna()`` with tz-aware datetimes (:issue:`15855`)
 - Bug in ``is_string_dtype``, ``is_timedelta64_ns_dtype``, and ``is_string_like_dtype`` in which an error was raised when ``None`` was passed in (:issue:`15941`)
+- Bug in the return type of ``pd.unique`` on a ``Categorical``, which was returning an ndarray and not a ``Categorical`` (:issue:`15903`)
 
 Indexing
 ^^^^^^^^

diff --git a/pandas/core/algorithms.py b/pandas/core/algorithms.py
@@ -267,22 +267,103 @@ def match(to_match, values, na_sentinel=-1):
     return result
 
 
-def unique1d(values):
+def unique(values):
     """
-    Hash table-based unique
+    Hash table-based unique. Uniques are returned in order
+    of appearance. This does NOT sort.
+
+    Significantly faster than numpy.unique. Includes NA values.
+
+    Parameters
+    ----------
+    values : 1d array-like
+
+    Returns
+    -------
+    unique values.
+      - If the input is an Index, the return is an Index
+      - If the input is a Categorical dtype, the return is a Categorical
+      - If the input is a Series/ndarray, the return will be an ndarray
+
+    Examples
+    --------
+    pd.unique(pd.Series([2, 1, 3, 3]))
+    array([2, 1, 3])
+
+    >>> pd.unique(pd.Series([2] + [1] * 5))
+    array([2, 1])
+
+    >>> pd.unique(Series([pd.Timestamp('20160101'),
+    ...                   pd.Timestamp('20160101')]))
+    array(['2016-01-01T00:00:00.000000000'], dtype='datetime64[ns]')
+
+    >>> pd.unique(pd.Series([pd.Timestamp('20160101', tz='US/Eastern'),
+    ...                      pd.Timestamp('20160101', tz='US/Eastern')]))
+    array([Timestamp('2016-01-01 00:00:00-0500', tz='US/Eastern')],
+          dtype=object)
+
+    >>> pd.unique(pd.Index([pd.Timestamp('20160101', tz='US/Eastern'),
+    ...                     pd.Timestamp('20160101', tz='US/Eastern')]))
+    DatetimeIndex(['2016-01-01 00:00:00-05:00'],
+    ...           dtype='datetime64[ns, US/Eastern]', freq=None)
+
+    >>> pd.unique(list('baabc'))
+    array(['b', 'a', 'c'], dtype=object)
+
+    An unordered Categorical will return categories in the
+    order of appearance.
+
+    >>> pd.unique(Series(pd.Categorical(list('baabc'))))
+    [b, a, c]
+    Categories (3, object): [b, a, c]
+
+    >>> pd.unique(Series(pd.Categorical(list('baabc'),
+    ...                                 categories=list('abc'))))
+    [b, a, c]
+    Categories (3, object): [b, a, c]
+
+    An ordered Categorical preserves the category ordering.
+
+    >>> pd.unique(Series(pd.Categorical(list('baabc'),
+    ...                                 categories=list('abc'),
+    ...                                 ordered=True)))
+    [b, a, c]
+    Categories (3, object): [a < b < c]
+
+    See Also
+    --------
+    pandas.Index.unique
+    pandas.Series.unique
+
     """
+
     values = _ensure_arraylike(values)
+
+    # categorical is a fast-path
+    # this will coerce Categorical, CategoricalIndex,
+    # and category dtypes Series to same return of Category
+    if is_categorical_dtype(values):
+        values = getattr(values, '.values', values)
+        return values.unique()
+
     original = values
     htable, _, values, dtype, ndtype = _get_hashtable_algo(values)
 
     table = htable(len(values))
     uniques = table.unique(values)
     uniques = _reconstruct_data(uniques, dtype, original)
 
+    if isinstance(original, ABCSeries) and is_datetime64tz_dtype(dtype):
+        # we are special casing datetime64tz_dtype
+        # to return an object array of tz-aware Timestamps
+
+        # TODO: it must return DatetimeArray with tz in pandas 2.0
+        uniques = uniques.asobject.values
+
     return uniques
 
 
-unique = unique1d
+unique1d = unique
 
 
 def isin(comps, values):
@@ -651,7 +732,7 @@ def mode(values):
     if is_categorical_dtype(values):
 
         if isinstance(values, Series):
-            return Series(values.values.mode())
+            return Series(values.values.mode(), name=values.name)
         return values.mode()
 
     values, dtype, ndtype = _ensure_data(values)

diff --git a/pandas/core/base.py b/pandas/core/base.py
@@ -855,13 +855,24 @@ def value_counts(self, normalize=False, sort=True, ascending=False,
 
     _shared_docs['unique'] = (
         """
-        Return %(unique)s of unique values in the object.
-        Significantly faster than numpy.unique. Includes NA values.
-        The order of the original is preserved.
+        Return unique values in the object. Uniques are returned in order
+        of appearance, this does NOT sort. Hash table-based unique.
+
+        Parameters
+        ----------
+        values : 1d array-like
 
         Returns
         -------
-        uniques : %(unique)s
+        unique values.
+          - If the input is an Index, the return is an Index
+          - If the input is a Categorical dtype, the return is a Categorical
+          - If the input is a Series/ndarray, the return will be an ndarray
+
+        See Also
+        --------
+        pandas.unique
+        pandas.Categorical.unique
         """)
 
     @Appender(_shared_docs['unique'] % _indexops_doc_kwargs)
@@ -873,6 +884,7 @@ def unique(self):
         else:
             from pandas.core.algorithms import unique1d
             result = unique1d(values)
+
         return result
 
     def nunique(self, dropna=True):

diff --git a/pandas/core/categorical.py b/pandas/core/categorical.py
@@ -1895,6 +1895,33 @@ def unique(self):
         Returns
         -------
         unique values : ``Categorical``
+
+        Examples
+        --------
+        An unordered Categorical will return categories in the
+        order of appearance.
+
+        >>> pd.Categorical(list('baabc'))
+        [b, a, c]
+        Categories (3, object): [b, a, c]
+
+        >>> pd.Categorical(list('baabc'), categories=list('abc'))
+        [b, a, c]
+        Categories (3, object): [b, a, c]
+
+        An ordered Categorical preserves the category ordering.
+
+        >>> pd.Categorical(list('baabc'),
+        ...                categories=list('abc'),
+        ...                ordered=True)
+        [b, a, c]
+        Categories (3, object): [a < b < c]
+
+        See Also
+        --------
+        pandas.unique
+        pandas.CategoricalIndex.unique
+
         """
 
         # unlike np.unique, unique1d does not sort

diff --git a/pandas/core/series.py b/pandas/core/series.py
@@ -1204,10 +1204,14 @@ def mode(self):
     @Appender(base._shared_docs['unique'] % _shared_doc_kwargs)
     def unique(self):
         result = super(Series, self).unique()
+
         if is_datetime64tz_dtype(self.dtype):
-            # to return array of Timestamp with tz
-            # ToDo: it must return DatetimeArray with tz in pandas 2.0
-            return result.asobject.values
+            # we are special casing datetime64tz_dtype
+            # to return an object array of tz-aware Timestamps
+
+            # TODO: it must return DatetimeArray with tz in pandas 2.0
+            result = result.asobject.values
+
         return result
 
     @Appender(base._shared_docs['drop_duplicates'] % _shared_doc_kwargs)