Skip to content
16 changes: 11 additions & 5 deletions pandas/core/algorithms.py
Original file line number Diff line number Diff line change
Expand Up @@ -616,7 +616,7 @@ def factorize(values, sort=False, order=None, na_sentinel=-1, size_hint=None):
na_value=na_value)

if sort and len(uniques) > 0:
from pandas.core.sorting import safe_sort
from pandas.core.sorting import safe_sort, SortError
if na_sentinel == -1:
# GH-25409 take_1d only works for na_sentinels of -1
try:
Expand All @@ -626,13 +626,19 @@ def factorize(values, sort=False, order=None, na_sentinel=-1, size_hint=None):
uniques = uniques.take(order)
except TypeError:
# Mixed types, where uniques.argsort fails.
try:
uniques, labels = safe_sort(uniques, labels,
na_sentinel=na_sentinel,
assume_unique=True)
except SortError as e:
raise TypeError(e) from e
else:
try:
uniques, labels = safe_sort(uniques, labels,
na_sentinel=na_sentinel,
assume_unique=True)
else:
uniques, labels = safe_sort(uniques, labels,
na_sentinel=na_sentinel,
assume_unique=True)
except SortError as e:
raise TypeError(e) from e

uniques = _reconstruct_data(uniques, dtype, original)

Expand Down
12 changes: 8 additions & 4 deletions pandas/core/indexes/base.py
Original file line number Diff line number Diff line change
Expand Up @@ -43,6 +43,7 @@
import pandas.core.missing as missing
from pandas.core.ops import get_op_result_name, make_invalid_op
import pandas.core.sorting as sorting
from pandas.core.sorting import SortError
from pandas.core.strings import StringMethods

from pandas.io.formats.printing import (
Expand Down Expand Up @@ -2345,7 +2346,7 @@ def union(self, other, sort=None):
if sort is None:
try:
result = sorting.safe_sort(result)
except TypeError as e:
except SortError as e:
warnings.warn("{}, sort order is undefined for "
"incomparable objects".format(e),
RuntimeWarning, stacklevel=3)
Expand Down Expand Up @@ -2432,7 +2433,10 @@ def intersection(self, other, sort=False):
taken = other.take(indexer)

if sort is None:
taken = sorting.safe_sort(taken.values)
try:
taken = sorting.safe_sort(taken.values)
except sorting.SortError as e:
raise TypeError(e) from e
if self.name != other.name:
name = None
else:
Expand Down Expand Up @@ -2504,7 +2508,7 @@ def difference(self, other, sort=None):
if sort is None:
try:
the_diff = sorting.safe_sort(the_diff)
except TypeError:
except SortError:
pass

return this._shallow_copy(the_diff, name=result_name, freq=None)
Expand Down Expand Up @@ -2580,7 +2584,7 @@ def symmetric_difference(self, other, result_name=None, sort=None):
if sort is None:
try:
the_diff = sorting.safe_sort(the_diff)
except TypeError:
except SortError:
pass

attribs = self._get_attributes_dict()
Expand Down
5 changes: 4 additions & 1 deletion pandas/core/reshape/merge.py
Original file line number Diff line number Diff line change
Expand Up @@ -1738,7 +1738,10 @@ def _sort_labels(uniques, left, right):
llength = len(left)
labels = np.concatenate([left, right])

_, new_labels = sorting.safe_sort(uniques, labels, na_sentinel=-1)
try:
_, new_labels = sorting.safe_sort(uniques, labels, na_sentinel=-1)
except sorting.SortError as e:
raise TypeError(e) from e
new_labels = ensure_int64(new_labels)
new_left, new_right = new_labels[:llength], new_labels[llength:]

Expand Down
19 changes: 15 additions & 4 deletions pandas/core/sorting.py
Original file line number Diff line number Diff line change
Expand Up @@ -17,6 +17,13 @@
_INT64_MAX = np.iinfo(np.int64).max


class SortError(TypeError):
"""
Error raised when problems arise during sorting due to problems
with input data. Subclass of `TypeError`.
"""


def get_group_index(labels, shape, sort, xnull):
"""
For the particular label_list, gets the offsets into the hypothetical list
Expand Down Expand Up @@ -437,8 +444,9 @@ def safe_sort(values, labels=None, na_sentinel=-1, assume_unique=False):
------
TypeError
* If ``values`` is not list-like or if ``labels`` is neither None
nor list-like
* If ``values`` cannot be sorted
nor list-like.
SortError
* If ``values`` cannot be sorted.
ValueError
* If ``labels`` is not None and ``values`` contain duplicates.
"""
Expand All @@ -456,8 +464,11 @@ def sort_mixed(values):
# order ints before strings, safe in py3
str_pos = np.array([isinstance(x, string_types) for x in values],
dtype=bool)
nums = np.sort(values[~str_pos])
strs = np.sort(values[str_pos])
try:
nums = np.sort(values[~str_pos])
strs = np.sort(values[str_pos])
except TypeError as e:
raise SortError(e) from e
return np.concatenate([nums, np.asarray(strs, dtype=object)])

sorter = None
Expand Down
7 changes: 3 additions & 4 deletions pandas/tests/test_algos.py
Original file line number Diff line number Diff line change
Expand Up @@ -24,6 +24,7 @@
import pandas.core.algorithms as algos
from pandas.core.arrays import DatetimeArray
import pandas.core.common as com
from pandas.core.sorting import SortError
import pandas.util.testing as tm
from pandas.util.testing import assert_almost_equal

Expand Down Expand Up @@ -228,11 +229,9 @@ def test_complex_sorting(self):
# gh 12666 - check no segfault
x17 = np.array([complex(i) for i in range(17)], dtype=object)

msg = ("unorderable types: .* [<>] .*"
"|" # the above case happens for numpy < 1.14
"'[<>]' not supported between instances of .*")
with pytest.raises(TypeError, match=msg):
with pytest.raises(TypeError, match="complex") as excinfo:
algos.factorize(x17[::-1], sort=True)
assert type(excinfo.value.__cause__) == SortError

def test_float64_factorize(self, writable):
data = np.array([1.0, 1e8, 1.0, 1e-8, 1e8, 1.0], dtype=np.float64)
Expand Down
8 changes: 3 additions & 5 deletions pandas/tests/test_sorting.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,7 +10,7 @@
DataFrame, MultiIndex, Series, compat, concat, merge, to_datetime)
from pandas.core import common as com
from pandas.core.sorting import (
decons_group_index, get_group_index, is_int64_overflow_possible,
SortError, decons_group_index, get_group_index, is_int64_overflow_possible,
lexsort_indexer, nargsort, safe_sort)
from pandas.util import testing as tm
from pandas.util.testing import assert_frame_equal, assert_series_equal
Expand Down Expand Up @@ -413,10 +413,8 @@ def test_mixed_integer_from_list(self):
def test_unsortable(self):
# GH 13714
arr = np.array([1, 2, datetime.now(), 0, 3], dtype=object)
msg = ("unorderable types: .* [<>] .*"
"|" # the above case happens for numpy < 1.14
"'[<>]' not supported between instances of .*")
with pytest.raises(TypeError, match=msg):
msg = "int.*datetime|datetime.*int"
with pytest.raises(SortError, match=msg):
safe_sort(arr)

def test_exceptions(self):
Expand Down