Skip to content

REF: add custom Exception for safe_sort #25569

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Closed
wants to merge 12 commits into from
16 changes: 11 additions & 5 deletions pandas/core/algorithms.py
Original file line number Diff line number Diff line change
@@ -616,7 +616,7 @@ def factorize(values, sort=False, order=None, na_sentinel=-1, size_hint=None):
na_value=na_value)

if sort and len(uniques) > 0:
from pandas.core.sorting import safe_sort
from pandas.core.sorting import safe_sort, SortError
if na_sentinel == -1:
# GH-25409 take_1d only works for na_sentinels of -1
try:
@@ -626,13 +626,19 @@ def factorize(values, sort=False, order=None, na_sentinel=-1, size_hint=None):
uniques = uniques.take(order)
except TypeError:
# Mixed types, where uniques.argsort fails.
try:
uniques, labels = safe_sort(uniques, labels,
na_sentinel=na_sentinel,
assume_unique=True)
except SortError as e:
raise TypeError(e) from e
else:
try:
uniques, labels = safe_sort(uniques, labels,
na_sentinel=na_sentinel,
assume_unique=True)
else:
uniques, labels = safe_sort(uniques, labels,
na_sentinel=na_sentinel,
assume_unique=True)
except SortError as e:
raise TypeError(e) from e

uniques = _reconstruct_data(uniques, dtype, original)

12 changes: 8 additions & 4 deletions pandas/core/indexes/base.py
Original file line number Diff line number Diff line change
@@ -43,6 +43,7 @@
import pandas.core.missing as missing
from pandas.core.ops import get_op_result_name, make_invalid_op
import pandas.core.sorting as sorting
from pandas.core.sorting import SortError
from pandas.core.strings import StringMethods

from pandas.io.formats.printing import (
@@ -2345,7 +2346,7 @@ def union(self, other, sort=None):
if sort is None:
try:
result = sorting.safe_sort(result)
except TypeError as e:
except SortError as e:
warnings.warn("{}, sort order is undefined for "
"incomparable objects".format(e),
RuntimeWarning, stacklevel=3)
@@ -2432,7 +2433,10 @@ def intersection(self, other, sort=False):
taken = other.take(indexer)

if sort is None:
taken = sorting.safe_sort(taken.values)
try:
taken = sorting.safe_sort(taken.values)
except sorting.SortError as e:
raise TypeError(e) from e
if self.name != other.name:
name = None
else:
@@ -2504,7 +2508,7 @@ def difference(self, other, sort=None):
if sort is None:
try:
the_diff = sorting.safe_sort(the_diff)
except TypeError:
except SortError:
pass

return this._shallow_copy(the_diff, name=result_name, freq=None)
@@ -2580,7 +2584,7 @@ def symmetric_difference(self, other, result_name=None, sort=None):
if sort is None:
try:
the_diff = sorting.safe_sort(the_diff)
except TypeError:
except SortError:
pass

attribs = self._get_attributes_dict()
5 changes: 4 additions & 1 deletion pandas/core/reshape/merge.py
Original file line number Diff line number Diff line change
@@ -1738,7 +1738,10 @@ def _sort_labels(uniques, left, right):
llength = len(left)
labels = np.concatenate([left, right])

_, new_labels = sorting.safe_sort(uniques, labels, na_sentinel=-1)
try:
_, new_labels = sorting.safe_sort(uniques, labels, na_sentinel=-1)
except sorting.SortError as e:
raise TypeError(e) from e
new_labels = ensure_int64(new_labels)
new_left, new_right = new_labels[:llength], new_labels[llength:]

19 changes: 15 additions & 4 deletions pandas/core/sorting.py
Original file line number Diff line number Diff line change
@@ -17,6 +17,13 @@
_INT64_MAX = np.iinfo(np.int64).max


class SortError(TypeError):
"""
Error raised when problems arise during sorting due to problems
with input data. Subclass of `TypeError`.
"""


def get_group_index(labels, shape, sort, xnull):
"""
For the particular label_list, gets the offsets into the hypothetical list
@@ -437,8 +444,9 @@ def safe_sort(values, labels=None, na_sentinel=-1, assume_unique=False):
------
TypeError
* If ``values`` is not list-like or if ``labels`` is neither None
nor list-like
* If ``values`` cannot be sorted
nor list-like.
SortError
* If ``values`` cannot be sorted.
ValueError
* If ``labels`` is not None and ``values`` contain duplicates.
"""
@@ -456,8 +464,11 @@ def sort_mixed(values):
# order ints before strings, safe in py3
str_pos = np.array([isinstance(x, string_types) for x in values],
dtype=bool)
nums = np.sort(values[~str_pos])
strs = np.sort(values[str_pos])
try:
nums = np.sort(values[~str_pos])
strs = np.sort(values[str_pos])
except TypeError as e:
raise SortError(e) from e
return np.concatenate([nums, np.asarray(strs, dtype=object)])

sorter = None
7 changes: 3 additions & 4 deletions pandas/tests/test_algos.py
Original file line number Diff line number Diff line change
@@ -24,6 +24,7 @@
import pandas.core.algorithms as algos
from pandas.core.arrays import DatetimeArray
import pandas.core.common as com
from pandas.core.sorting import SortError
import pandas.util.testing as tm
from pandas.util.testing import assert_almost_equal

@@ -228,11 +229,9 @@ def test_complex_sorting(self):
# gh 12666 - check no segfault
x17 = np.array([complex(i) for i in range(17)], dtype=object)

msg = ("unorderable types: .* [<>] .*"
"|" # the above case happens for numpy < 1.14
"'[<>]' not supported between instances of .*")
with pytest.raises(TypeError, match=msg):
with pytest.raises(TypeError, match="complex") as excinfo:
algos.factorize(x17[::-1], sort=True)
assert type(excinfo.value.__cause__) == SortError

def test_float64_factorize(self, writable):
data = np.array([1.0, 1e8, 1.0, 1e-8, 1e8, 1.0], dtype=np.float64)
8 changes: 3 additions & 5 deletions pandas/tests/test_sorting.py
Original file line number Diff line number Diff line change
@@ -10,7 +10,7 @@
DataFrame, MultiIndex, Series, compat, concat, merge, to_datetime)
from pandas.core import common as com
from pandas.core.sorting import (
decons_group_index, get_group_index, is_int64_overflow_possible,
SortError, decons_group_index, get_group_index, is_int64_overflow_possible,
lexsort_indexer, nargsort, safe_sort)
from pandas.util import testing as tm
from pandas.util.testing import assert_frame_equal, assert_series_equal
@@ -413,10 +413,8 @@ def test_mixed_integer_from_list(self):
def test_unsortable(self):
# GH 13714
arr = np.array([1, 2, datetime.now(), 0, 3], dtype=object)
msg = ("unorderable types: .* [<>] .*"
"|" # the above case happens for numpy < 1.14
"'[<>]' not supported between instances of .*")
with pytest.raises(TypeError, match=msg):
msg = "int.*datetime|datetime.*int"
with pytest.raises(SortError, match=msg):
safe_sort(arr)

def test_exceptions(self):