diff --git a/pandas/_libs/algos.pyx b/pandas/_libs/algos.pyx index 9eed70a23c9dd..56444573f8a5f 100644 --- a/pandas/_libs/algos.pyx +++ b/pandas/_libs/algos.pyx @@ -162,7 +162,7 @@ cpdef ndarray[int64_t, ndim=1] unique_deltas(const int64_t[:] arr): kh_destroy_int64(table) result = np.array(uniques, dtype=np.int64) - result.sort() + result.sort(kind="stable") return result diff --git a/pandas/_libs/index.pyx b/pandas/_libs/index.pyx index 5c7680bc6fb6c..f8d84cfd0646f 100644 --- a/pandas/_libs/index.pyx +++ b/pandas/_libs/index.pyx @@ -790,7 +790,7 @@ cdef class BaseMultiIndexCodesEngine: ndarray[int64_t, ndim=1] new_codes, new_target_codes ndarray[intp_t, ndim=1] sorted_indexer - target_order = np.argsort(target).astype("int64") + target_order = np.argsort(target, kind="stable").astype("int64") target_values = target[target_order] num_values, num_target_values = len(values), len(target_values) new_codes, new_target_codes = ( @@ -831,7 +831,7 @@ cdef class BaseMultiIndexCodesEngine: # get the indexer, and undo the sorting of `target.values` algo = algos.backfill if method == "backfill" else algos.pad sorted_indexer = algo(new_codes, new_target_codes, limit=limit) - return sorted_indexer[np.argsort(target_order)] + return sorted_indexer[np.argsort(target_order, kind="stable")] def get_loc(self, object key): if is_definitely_invalid_key(key): diff --git a/pandas/_libs/lib.pyx b/pandas/_libs/lib.pyx index 0c0610f72044e..4304ba607d5d0 100644 --- a/pandas/_libs/lib.pyx +++ b/pandas/_libs/lib.pyx @@ -930,7 +930,7 @@ def get_level_sorter( Argsort for a single level of a multi-index, keeping the order of higher levels unchanged. `starts` points to starts of same-key indices w.r.t to leading levels; equivalent to: - np.hstack([codes[starts[i]:starts[i+1]].argsort(kind='mergesort') + np.hstack([codes[starts[i]:starts[i+1]].argsort(kind='stable') + starts[i] for i in range(len(starts) - 1)]) Parameters @@ -948,7 +948,7 @@ def get_level_sorter( for i in range(len(starts) - 1): l, r = starts[i], starts[i + 1] - out[l:r] = l + codes[l:r].argsort(kind="mergesort") + out[l:r] = l + codes[l:r].argsort(kind="stable") return out diff --git a/pandas/core/algorithms.py b/pandas/core/algorithms.py index 1d74bb8b83e4e..620b1db90b527 100644 --- a/pandas/core/algorithms.py +++ b/pandas/core/algorithms.py @@ -1041,7 +1041,7 @@ def mode( npresult = htable.mode(values, dropna=dropna, mask=mask) try: - npresult = np.sort(npresult) + npresult = np.sort(npresult, kind="stable") except TypeError as err: warnings.warn( f"Unable to sort modes: {err}", @@ -1593,7 +1593,7 @@ def safe_sort( ordered = _sort_mixed(values) else: try: - sorter = values.argsort() + sorter = values.argsort(kind="stable") ordered = values.take(sorter) except TypeError: # Previous sorters failed or were not applicable, try `_sort_mixed` @@ -1634,7 +1634,7 @@ def safe_sort( if use_na_sentinel: # take_nd is faster, but only works for na_sentinels of -1 - order2 = sorter.argsort() + order2 = sorter.argsort(kind="stable") new_codes = take_nd(order2, codes, fill_value=-1) if verify: mask = (codes < -len(values)) | (codes >= len(values)) @@ -1663,8 +1663,8 @@ def _sort_mixed(values) -> AnyArrayLike: str_pos = np.array([isinstance(x, str) for x in values], dtype=bool) null_pos = np.array([isna(x) for x in values], dtype=bool) num_pos = ~str_pos & ~null_pos - str_argsort = np.argsort(values[str_pos]) - num_argsort = np.argsort(values[num_pos]) + str_argsort = np.argsort(values[str_pos], kind="stable") + num_argsort = np.argsort(values[num_pos], kind="stable") # convert boolean arrays to positional indices, then order by underlying values str_locs = str_pos.nonzero()[0].take(str_argsort) num_locs = num_pos.nonzero()[0].take(num_argsort) diff --git a/pandas/core/arrays/arrow/array.py b/pandas/core/arrays/arrow/array.py index 2b2e0c843564f..6921dd9f969b9 100644 --- a/pandas/core/arrays/arrow/array.py +++ b/pandas/core/arrays/arrow/array.py @@ -1691,7 +1691,7 @@ def __setitem__(self, key, value) -> None: raise ValueError("Length of indexer and values mismatch") if len(indices) == 0: return - argsort = np.argsort(indices) + argsort = np.argsort(indices, kind="stable") indices = indices[argsort] value = value.take(argsort) mask = np.zeros(len(self), dtype=np.bool_) diff --git a/pandas/core/groupby/categorical.py b/pandas/core/groupby/categorical.py index 6ab98cf4fe55e..b7a1d81529253 100644 --- a/pandas/core/groupby/categorical.py +++ b/pandas/core/groupby/categorical.py @@ -54,7 +54,7 @@ def recode_for_groupby( take_codes = unique_codes[unique_codes != -1] if sort: - take_codes = np.sort(take_codes) + take_codes = np.sort(take_codes, kind="stable") # we recode according to the uniques categories = c.categories.take(take_codes) @@ -76,7 +76,7 @@ def recode_for_groupby( # GH 38140: exclude nan from indexer for categories unique_notnan_codes = unique1d(c.codes[c.codes != -1]) if sort: - unique_notnan_codes = np.sort(unique_notnan_codes) + unique_notnan_codes = np.sort(unique_notnan_codes, kind="stable") if len(all_codes) > len(unique_notnan_codes): # GH 13179: All categories need to be present, even if missing from the data missing_codes = np.setdiff1d(all_codes, unique_notnan_codes, assume_unique=True) diff --git a/pandas/core/groupby/groupby.py b/pandas/core/groupby/groupby.py index e6dd6a990d285..8c4f33b734d71 100644 --- a/pandas/core/groupby/groupby.py +++ b/pandas/core/groupby/groupby.py @@ -2060,7 +2060,7 @@ def _apply_filter(self, indices, dropna): if len(indices) == 0: indices = np.array([], dtype="int64") else: - indices = np.sort(np.concatenate(indices)) + indices = np.sort(np.concatenate(indices), kind="stable") if dropna: filtered = self._selected_obj.take(indices, axis=self.axis) else: @@ -2837,9 +2837,9 @@ def _value_counts( if sort: # Sort the values and then resort by the main grouping index_level = range(len(self.grouper.groupings)) - result_series = result_series.sort_values(ascending=ascending).sort_index( - level=index_level, sort_remaining=False - ) + result_series = result_series.sort_values( + ascending=ascending, kind="stable" + ).sort_index(level=index_level, sort_remaining=False, kind="stable") result: Series | DataFrame if self.as_index: @@ -3890,7 +3890,7 @@ def _fill(self, direction: Literal["ffill", "bfill"], limit: int | None = None): limit = -1 ids, _, _ = self.grouper.group_info - sorted_labels = np.argsort(ids, kind="mergesort").astype(np.intp, copy=False) + sorted_labels = np.argsort(ids, kind="stable").astype(np.intp, copy=False) if direction == "bfill": sorted_labels = sorted_labels[::-1] diff --git a/pandas/core/groupby/grouper.py b/pandas/core/groupby/grouper.py index c51c17e04796a..cd5b0d16c74a4 100644 --- a/pandas/core/groupby/grouper.py +++ b/pandas/core/groupby/grouper.py @@ -370,7 +370,7 @@ def _set_grouper( # before we call _grouper.take. assert self._grouper is not None if self._indexer is not None: - reverse_indexer = self._indexer.argsort() + reverse_indexer = self._indexer.argsort(kind="stable") unsorted_ax = self._grouper.take(reverse_indexer) ax = unsorted_ax.take(obj.index) else: @@ -401,7 +401,7 @@ def _set_grouper( # use stable sort to support first, last, nth # TODO: why does putting na_position="first" fix datetimelike cases? indexer = self._indexer_deprecated = ax.array.argsort( - kind="mergesort", na_position="first" + kind="stable", na_position="first" ) ax = ax.take(indexer) obj = obj.take(indexer, axis=self.axis) @@ -755,7 +755,7 @@ def _codes_and_uniques(self) -> tuple[npt.NDArray[np.signedinteger], ArrayLike]: ucodes = algorithms.unique1d(cat.codes) ucodes = ucodes[ucodes != -1] if self._sort: - ucodes = np.sort(ucodes) + ucodes = np.sort(ucodes, kind="stable") else: ucodes = np.arange(len(categories)) diff --git a/pandas/core/indexes/base.py b/pandas/core/indexes/base.py index 8756bb3f3c81b..56f3109acc8e2 100644 --- a/pandas/core/indexes/base.py +++ b/pandas/core/indexes/base.py @@ -3542,7 +3542,7 @@ def _intersection_via_get_indexer( if sort is False: # sort bc we want the elements in the same order they are in self # unnecessary in the case with sort=None bc we will sort later - taker = np.sort(taker) + taker = np.sort(taker, kind="stable") result: MultiIndex | ExtensionArray | np.ndarray if isinstance(left_unique, ABCMultiIndex): diff --git a/pandas/core/internals/managers.py b/pandas/core/internals/managers.py index b1db2d2e708e8..c5e51d5309cc3 100644 --- a/pandas/core/internals/managers.py +++ b/pandas/core/internals/managers.py @@ -514,7 +514,9 @@ def _combine( return self.make_empty() # FIXME: optimization potential - indexer = np.sort(np.concatenate([b.mgr_locs.as_array for b in blocks])) + indexer = np.sort( + np.concatenate([b.mgr_locs.as_array for b in blocks]), kind="stable" + ) inv_indexer = lib.get_reverse_indexer(indexer, self.shape[0]) new_blocks: list[Block] = [] @@ -2249,7 +2251,7 @@ def _merge_blocks( bvals2 = cast(Sequence[NDArrayBackedExtensionArray], bvals) new_values = bvals2[0]._concat_same_type(bvals2, axis=0) - argsort = np.argsort(new_mgr_locs) + argsort = np.argsort(new_mgr_locs, kind="stable") new_values = new_values[argsort] new_mgr_locs = new_mgr_locs[argsort] diff --git a/pandas/core/methods/selectn.py b/pandas/core/methods/selectn.py index 894791cb46371..53737fb3545f5 100644 --- a/pandas/core/methods/selectn.py +++ b/pandas/core/methods/selectn.py @@ -104,7 +104,7 @@ def compute(self, method: str) -> Series: # slow method if n >= len(self.obj): ascending = method == "nsmallest" - return self.obj.sort_values(ascending=ascending).head(n) + return self.obj.sort_values(ascending=ascending, kind="stable").head(n) # fast method new_dtype = dropped.dtype @@ -141,7 +141,7 @@ def compute(self, method: str) -> Series: # here because kth_smallest will modify its input kth_val = libalgos.kth_smallest(arr.copy(order="C"), n - 1) (ns,) = np.nonzero(arr <= kth_val) - inds = ns[arr[ns].argsort(kind="mergesort")] + inds = ns[arr[ns].argsort(kind="stable")] if self.keep != "all": inds = inds[:n] diff --git a/pandas/core/sorting.py b/pandas/core/sorting.py index d96fc02e16d0d..cfc739b689e82 100644 --- a/pandas/core/sorting.py +++ b/pandas/core/sorting.py @@ -651,7 +651,7 @@ def get_group_index_sorter( shape = map(len, keys) that is, linear in the number of combinations (cartesian product) of unique values of groupby keys. This can be huge when doing multi-key groupby. - np.argsort(kind='mergesort') is O(count x log(count)) where count is the + np.argsort(kind='stable') is O(count x log(count)) where count is the length of the data-frame; Both algorithms are `stable` sort and that is necessary for correctness of groupby operations. e.g. consider: @@ -680,7 +680,7 @@ def get_group_index_sorter( ) # sorter _should_ already be intp, but mypy is not yet able to verify else: - sorter = group_index.argsort(kind="mergesort") + sorter = group_index.argsort(kind="stable") return ensure_platform_int(sorter) diff --git a/pandas/io/stata.py b/pandas/io/stata.py index 7f19e62f40774..676c5d85a5b48 100644 --- a/pandas/io/stata.py +++ b/pandas/io/stata.py @@ -1629,7 +1629,7 @@ def _read_value_labels(self) -> None: val = np.frombuffer( self._path_or_buf.read(4 * n), dtype=f"{self._byteorder}i4", count=n ) - ii = np.argsort(off) + ii = np.argsort(off, kind="stable") off = off[ii] val = val[ii] txt = self._path_or_buf.read(txtlen) diff --git a/pandas/tests/arrays/categorical/test_sorting.py b/pandas/tests/arrays/categorical/test_sorting.py index ae527065b3fb9..14eef0ab0975e 100644 --- a/pandas/tests/arrays/categorical/test_sorting.py +++ b/pandas/tests/arrays/categorical/test_sorting.py @@ -29,7 +29,7 @@ def test_numpy_argsort(self): tm.assert_numpy_array_equal(np.argsort(c), expected, check_dtype=False) tm.assert_numpy_array_equal( - np.argsort(c, kind="mergesort"), expected, check_dtype=False + np.argsort(c, kind="stable"), expected, check_dtype=False ) msg = "the 'axis' parameter is not supported" diff --git a/pandas/tests/frame/methods/test_nlargest.py b/pandas/tests/frame/methods/test_nlargest.py index 0bdf9a0e5c007..d4eaccd2077ef 100644 --- a/pandas/tests/frame/methods/test_nlargest.py +++ b/pandas/tests/frame/methods/test_nlargest.py @@ -9,7 +9,6 @@ import pandas as pd import pandas._testing as tm -from pandas.util.version import Version @pytest.fixture @@ -156,28 +155,16 @@ def test_nlargest_n_identical_values(self): [["a", "b", "c"], ["c", "b", "a"], ["a"], ["b"], ["a", "b"], ["c", "b"]], ) @pytest.mark.parametrize("n", range(1, 6)) - def test_nlargest_n_duplicate_index(self, df_duplicates, n, order, request): + def test_nlargest_n_duplicate_index(self, df_duplicates, n, order): # GH#13412 df = df_duplicates result = df.nsmallest(n, order) - expected = df.sort_values(order).head(n) + expected = df.sort_values(order, kind="stable").head(n) tm.assert_frame_equal(result, expected) result = df.nlargest(n, order) - expected = df.sort_values(order, ascending=False).head(n) - if Version(np.__version__) >= Version("1.25") and ( - (order == ["a"] and n in (1, 2, 3, 4)) or (order == ["a", "b"]) and n == 5 - ): - request.node.add_marker( - pytest.mark.xfail( - reason=( - "pandas default unstable sorting of duplicates" - "issue with numpy>=1.25 with AVX instructions" - ), - strict=False, - ) - ) + expected = df.sort_values(order, ascending=False, kind="stable").head(n) tm.assert_frame_equal(result, expected) def test_nlargest_duplicate_keep_all_ties(self): diff --git a/pandas/tests/frame/methods/test_sort_values.py b/pandas/tests/frame/methods/test_sort_values.py index bd7d882f6d94a..18439fcf83249 100644 --- a/pandas/tests/frame/methods/test_sort_values.py +++ b/pandas/tests/frame/methods/test_sort_values.py @@ -10,7 +10,6 @@ date_range, ) import pandas._testing as tm -from pandas.util.version import Version class TestDataFrameSortValues: @@ -856,38 +855,26 @@ def ascending(request): class TestSortValuesLevelAsStr: def test_sort_index_level_and_column_label( - self, df_none, df_idx, sort_names, ascending, request + self, df_none, df_idx, sort_names, ascending ): # GH#14353 - if ( - Version(np.__version__) >= Version("1.25") - and request.node.callspec.id == "df_idx0-inner-True" - ): - request.node.add_marker( - pytest.mark.xfail( - reason=( - "pandas default unstable sorting of duplicates" - "issue with numpy>=1.25 with AVX instructions" - ), - strict=False, - ) - ) - # Get index levels from df_idx levels = df_idx.index.names # Compute expected by sorting on columns and the setting index expected = df_none.sort_values( - by=sort_names, ascending=ascending, axis=0 + by=sort_names, ascending=ascending, axis=0, kind="stable" ).set_index(levels) # Compute result sorting on mix on columns and index levels - result = df_idx.sort_values(by=sort_names, ascending=ascending, axis=0) + result = df_idx.sort_values( + by=sort_names, ascending=ascending, axis=0, kind="stable" + ) tm.assert_frame_equal(result, expected) def test_sort_column_level_and_index_label( - self, df_none, df_idx, sort_names, ascending, request + self, df_none, df_idx, sort_names, ascending ): # GH#14353 @@ -898,25 +885,17 @@ def test_sort_column_level_and_index_label( # transposing. For some cases this will result in a frame with # multiple column levels expected = ( - df_none.sort_values(by=sort_names, ascending=ascending, axis=0) + df_none.sort_values( + by=sort_names, ascending=ascending, axis=0, kind="stable" + ) .set_index(levels) .T ) # Compute result by transposing and sorting on axis=1. - result = df_idx.T.sort_values(by=sort_names, ascending=ascending, axis=1) - - if Version(np.__version__) >= Version("1.25"): - request.node.add_marker( - pytest.mark.xfail( - reason=( - "pandas default unstable sorting of duplicates" - "issue with numpy>=1.25 with AVX instructions" - ), - strict=False, - ) - ) - + result = df_idx.T.sort_values( + by=sort_names, ascending=ascending, axis=1, kind="stable" + ) tm.assert_frame_equal(result, expected) def test_sort_values_validate_ascending_for_value_error(self): diff --git a/pandas/tests/groupby/test_value_counts.py b/pandas/tests/groupby/test_value_counts.py index 944dda8977882..642ced3d1fd79 100644 --- a/pandas/tests/groupby/test_value_counts.py +++ b/pandas/tests/groupby/test_value_counts.py @@ -21,7 +21,6 @@ to_datetime, ) import pandas._testing as tm -from pandas.util.version import Version def tests_value_counts_index_names_category_column(): @@ -246,18 +245,8 @@ def test_bad_subset(education_df): gp.value_counts(subset=["country"]) -def test_basic(education_df, request): +def test_basic(education_df): # gh43564 - if Version(np.__version__) >= Version("1.25"): - request.node.add_marker( - pytest.mark.xfail( - reason=( - "pandas default unstable sorting of duplicates" - "issue with numpy>=1.25 with AVX instructions" - ), - strict=False, - ) - ) result = education_df.groupby("country")[["gender", "education"]].value_counts( normalize=True ) @@ -295,7 +284,7 @@ def _frame_value_counts(df, keys, normalize, sort, ascending): @pytest.mark.parametrize("as_index", [True, False]) @pytest.mark.parametrize("frame", [True, False]) def test_against_frame_and_seriesgroupby( - education_df, groupby, normalize, name, sort, ascending, as_index, frame, request + education_df, groupby, normalize, name, sort, ascending, as_index, frame ): # test all parameters: # - Use column, array or function as by= parameter @@ -305,16 +294,6 @@ def test_against_frame_and_seriesgroupby( # - 3-way compare against: # - apply with :meth:`~DataFrame.value_counts` # - `~SeriesGroupBy.value_counts` - if Version(np.__version__) >= Version("1.25") and frame and sort and normalize: - request.node.add_marker( - pytest.mark.xfail( - reason=( - "pandas default unstable sorting of duplicates" - "issue with numpy>=1.25 with AVX instructions" - ), - strict=False, - ) - ) by = { "column": "country", "array": education_df["country"].values, @@ -479,18 +458,8 @@ def nulls_df(): ], ) def test_dropna_combinations( - nulls_df, group_dropna, count_dropna, expected_rows, expected_values, request + nulls_df, group_dropna, count_dropna, expected_rows, expected_values ): - if Version(np.__version__) >= Version("1.25") and not group_dropna: - request.node.add_marker( - pytest.mark.xfail( - reason=( - "pandas default unstable sorting of duplicates" - "issue with numpy>=1.25 with AVX instructions" - ), - strict=False, - ) - ) gp = nulls_df.groupby(["A", "B"], dropna=group_dropna) result = gp.value_counts(normalize=True, sort=True, dropna=count_dropna) columns = DataFrame() @@ -581,21 +550,10 @@ def test_data_frame_value_counts_dropna( ], ) def test_categorical_single_grouper_with_only_observed_categories( - education_df, as_index, observed, normalize, name, expected_data, request + education_df, as_index, observed, normalize, name, expected_data ): # Test single categorical grouper with only observed grouping categories # when non-groupers are also categorical - if Version(np.__version__) >= Version("1.25"): - request.node.add_marker( - pytest.mark.xfail( - reason=( - "pandas default unstable sorting of duplicates" - "issue with numpy>=1.25 with AVX instructions" - ), - strict=False, - ) - ) - gp = education_df.astype("category").groupby( "country", as_index=as_index, observed=observed ) @@ -690,21 +648,9 @@ def assert_categorical_single_grouper( ], ) def test_categorical_single_grouper_observed_true( - education_df, as_index, normalize, name, expected_data, request + education_df, as_index, normalize, name, expected_data ): # GH#46357 - - if Version(np.__version__) >= Version("1.25"): - request.node.add_marker( - pytest.mark.xfail( - reason=( - "pandas default unstable sorting of duplicates" - "issue with numpy>=1.25 with AVX instructions" - ), - strict=False, - ) - ) - expected_index = [ ("FR", "male", "low"), ("FR", "female", "high"), @@ -771,39 +717,27 @@ def test_categorical_single_grouper_observed_true( ], ) def test_categorical_single_grouper_observed_false( - education_df, as_index, normalize, name, expected_data, request + education_df, as_index, normalize, name, expected_data ): # GH#46357 - - if Version(np.__version__) >= Version("1.25"): - request.node.add_marker( - pytest.mark.xfail( - reason=( - "pandas default unstable sorting of duplicates" - "issue with numpy>=1.25 with AVX instructions" - ), - strict=False, - ) - ) - expected_index = [ ("FR", "male", "low"), ("FR", "female", "high"), ("FR", "male", "medium"), ("FR", "female", "low"), - ("FR", "male", "high"), ("FR", "female", "medium"), + ("FR", "male", "high"), ("US", "female", "high"), ("US", "male", "low"), - ("US", "male", "medium"), - ("US", "male", "high"), - ("US", "female", "medium"), ("US", "female", "low"), - ("ASIA", "male", "low"), - ("ASIA", "male", "high"), - ("ASIA", "female", "medium"), - ("ASIA", "female", "low"), + ("US", "female", "medium"), + ("US", "male", "high"), + ("US", "male", "medium"), ("ASIA", "female", "high"), + ("ASIA", "female", "low"), + ("ASIA", "female", "medium"), + ("ASIA", "male", "high"), + ("ASIA", "male", "low"), ("ASIA", "male", "medium"), ] @@ -923,22 +857,10 @@ def test_categorical_multiple_groupers( ], ) def test_categorical_non_groupers( - education_df, as_index, observed, normalize, name, expected_data, request + education_df, as_index, observed, normalize, name, expected_data ): # GH#46357 Test non-observed categories are included in the result, # regardless of `observed` - - if Version(np.__version__) >= Version("1.25"): - request.node.add_marker( - pytest.mark.xfail( - reason=( - "pandas default unstable sorting of duplicates" - "issue with numpy>=1.25 with AVX instructions" - ), - strict=False, - ) - ) - education_df = education_df.copy() education_df["gender"] = education_df["gender"].astype("category") education_df["education"] = education_df["education"].astype("category") diff --git a/pandas/tests/indexes/multi/test_sorting.py b/pandas/tests/indexes/multi/test_sorting.py index 08c1a4092952c..4b465063b7afc 100644 --- a/pandas/tests/indexes/multi/test_sorting.py +++ b/pandas/tests/indexes/multi/test_sorting.py @@ -102,7 +102,7 @@ def test_numpy_argsort(idx): msg = "the 'kind' parameter is not supported" with pytest.raises(ValueError, match=msg): - np.argsort(idx, kind="mergesort") + np.argsort(idx, kind="stable") msg = "the 'order' parameter is not supported" with pytest.raises(ValueError, match=msg): diff --git a/pandas/tests/indexes/test_old_base.py b/pandas/tests/indexes/test_old_base.py index 79dc423f12a85..bb9e8366bf637 100644 --- a/pandas/tests/indexes/test_old_base.py +++ b/pandas/tests/indexes/test_old_base.py @@ -340,8 +340,8 @@ def test_numpy_argsort(self, index): expected = index.argsort() tm.assert_numpy_array_equal(result, expected) - result = np.argsort(index, kind="mergesort") - expected = index.argsort(kind="mergesort") + result = np.argsort(index, kind="stable") + expected = index.argsort(kind="stable") tm.assert_numpy_array_equal(result, expected) # these are the only two types that perform diff --git a/pandas/tests/indexes/test_setops.py b/pandas/tests/indexes/test_setops.py index a64994efec85a..53084049da9b8 100644 --- a/pandas/tests/indexes/test_setops.py +++ b/pandas/tests/indexes/test_setops.py @@ -849,7 +849,7 @@ def test_difference_incomparable_true(self, opname): b = Index([2, Timestamp("1999"), 1]) op = operator.methodcaller(opname, b, sort=True) - msg = "'<' not supported between instances of 'Timestamp' and 'int'" + msg = "'<' not supported between instances of 'int' and 'Timestamp'" with pytest.raises(TypeError, match=msg): op(a) diff --git a/pandas/tests/libs/test_join.py b/pandas/tests/libs/test_join.py index ba2e6e7130929..85a2f81d1705c 100644 --- a/pandas/tests/libs/test_join.py +++ b/pandas/tests/libs/test_join.py @@ -52,8 +52,8 @@ def test_cython_left_outer_join(self): ls, rs = left_outer_join(left, right, max_group) - exp_ls = left.argsort(kind="mergesort") - exp_rs = right.argsort(kind="mergesort") + exp_ls = left.argsort(kind="stable") + exp_rs = right.argsort(kind="stable") exp_li = np.array([0, 1, 2, 3, 3, 3, 4, 4, 4, 5, 5, 5, 6, 6, 7, 7, 8, 8, 9, 10]) exp_ri = np.array( @@ -76,8 +76,8 @@ def test_cython_right_outer_join(self): rs, ls = left_outer_join(right, left, max_group) - exp_ls = left.argsort(kind="mergesort") - exp_rs = right.argsort(kind="mergesort") + exp_ls = left.argsort(kind="stable") + exp_rs = right.argsort(kind="stable") # 0 1 1 1 exp_li = np.array( @@ -122,8 +122,8 @@ def test_cython_inner_join(self): ls, rs = inner_join(left, right, max_group) - exp_ls = left.argsort(kind="mergesort") - exp_rs = right.argsort(kind="mergesort") + exp_ls = left.argsort(kind="stable") + exp_rs = right.argsort(kind="stable") exp_li = np.array([0, 1, 2, 3, 3, 3, 4, 4, 4, 5, 5, 5, 6, 6, 7, 7, 8, 8]) exp_ri = np.array([0, 0, 0, 1, 2, 3, 1, 2, 3, 1, 2, 3, 4, 5, 4, 5, 4, 5]) diff --git a/pandas/tests/series/methods/test_argsort.py b/pandas/tests/series/methods/test_argsort.py index 5bcf42aad1db4..da765416d5821 100644 --- a/pandas/tests/series/methods/test_argsort.py +++ b/pandas/tests/series/methods/test_argsort.py @@ -61,10 +61,11 @@ def test_argsort(self, datetime_series): def test_argsort_stable(self): s = Series(np.random.default_rng(2).integers(0, 100, size=10000)) - mindexer = s.argsort(kind="mergesort") + mindexer = s.argsort(kind="stable") + qindexer = s.argsort() - mexpected = np.argsort(s.values, kind="mergesort") + mexpected = np.argsort(s.values, kind="stable") qexpected = np.argsort(s.values, kind="quicksort") tm.assert_series_equal(mindexer.astype(np.intp), Series(mexpected)) diff --git a/pandas/tests/test_algos.py b/pandas/tests/test_algos.py index 661290fb00d13..337919a7ad0ca 100644 --- a/pandas/tests/test_algos.py +++ b/pandas/tests/test_algos.py @@ -1879,7 +1879,7 @@ def test_groupsort_indexer(): # need to use a stable sort # np.argsort returns int, groupsort_indexer # always returns intp - expected = np.argsort(a, kind="mergesort") + expected = np.argsort(a, kind="stable") expected = expected.astype(np.intp) tm.assert_numpy_array_equal(result, expected)