From 4fcedb2c0c865028789eb18be862461dd9689b42 Mon Sep 17 00:00:00 2001
From: Matthew Roeschke <10647082+mroeschke@users.noreply.github.com>
Date: Tue, 6 Jun 2023 18:03:02 -0700
Subject: [PATCH 1/6] DEBUG: npdev build

---
 pandas/core/frame.py  | 4 ++--
 pandas/core/series.py | 4 ++--
 2 files changed, 4 insertions(+), 4 deletions(-)

diff --git a/pandas/core/frame.py b/pandas/core/frame.py
index 3e6c89139d06d..85ca70d51aca3 100644
--- a/pandas/core/frame.py
+++ b/pandas/core/frame.py
@@ -6567,7 +6567,7 @@ def sort_values(
         axis: Axis = 0,
         ascending: bool | list[bool] | tuple[bool, ...] = True,
         inplace: bool = False,
-        kind: SortKind = "quicksort",
+        kind: SortKind = "stable",
         na_position: str = "last",
         ignore_index: bool = False,
         key: ValueKeyFunc = None,
@@ -6592,7 +6592,7 @@ def sort_values(
              the by.
         inplace : bool, default False
              If True, perform operation in-place.
-        kind : {'quicksort', 'mergesort', 'heapsort', 'stable'}, default 'quicksort'
+        kind : {'quicksort', 'mergesort', 'heapsort', 'stable'}, default 'stable'
              Choice of sorting algorithm. See also :func:`numpy.sort` for more
              information. `mergesort` and `stable` are the only stable algorithms. For
              DataFrames, this option is only applied when sorting on a single
diff --git a/pandas/core/series.py b/pandas/core/series.py
index 9c7110cc21082..d0161a1498b63 100644
--- a/pandas/core/series.py
+++ b/pandas/core/series.py
@@ -3440,7 +3440,7 @@ def sort_values(
         axis: Axis = 0,
         ascending: bool | int | Sequence[bool] | Sequence[int] = True,
         inplace: bool = False,
-        kind: SortKind = "quicksort",
+        kind: SortKind = "stable",
         na_position: NaPosition = "last",
         ignore_index: bool = False,
         key: ValueKeyFunc = None,
@@ -3459,7 +3459,7 @@ def sort_values(
             If True, sort values in ascending order, otherwise descending.
         inplace : bool, default False
             If True, perform operation in-place.
-        kind : {'quicksort', 'mergesort', 'heapsort', 'stable'}, default 'quicksort'
+        kind : {'quicksort', 'mergesort', 'heapsort', 'stable'}, default 'stable'
             Choice of sorting algorithm. See also :func:`numpy.sort` for more
             information. 'mergesort' and 'stable' are the only stable  algorithms.
         na_position : {'first' or 'last'}, default 'last'

From 5c7b2e58f729a0d0ece2d6a58d26fe4d7c9aaa6d Mon Sep 17 00:00:00 2001
From: Matthew Roeschke <10647082+mroeschke@users.noreply.github.com>
Date: Wed, 7 Jun 2023 11:53:18 -0700
Subject: [PATCH 2/6] Address tests where sorting changed

---
 pandas/core/frame.py                          |   4 +-
 pandas/core/series.py                         |   4 +-
 pandas/tests/frame/methods/test_nlargest.py   |   2 +-
 .../tests/frame/methods/test_sort_values.py   |   8 +-
 pandas/tests/groupby/test_value_counts.py     | 208 +++++++++++++-----
 5 files changed, 164 insertions(+), 62 deletions(-)

diff --git a/pandas/core/frame.py b/pandas/core/frame.py
index 85ca70d51aca3..3e6c89139d06d 100644
--- a/pandas/core/frame.py
+++ b/pandas/core/frame.py
@@ -6567,7 +6567,7 @@ def sort_values(
         axis: Axis = 0,
         ascending: bool | list[bool] | tuple[bool, ...] = True,
         inplace: bool = False,
-        kind: SortKind = "stable",
+        kind: SortKind = "quicksort",
         na_position: str = "last",
         ignore_index: bool = False,
         key: ValueKeyFunc = None,
@@ -6592,7 +6592,7 @@ def sort_values(
              the by.
         inplace : bool, default False
              If True, perform operation in-place.
-        kind : {'quicksort', 'mergesort', 'heapsort', 'stable'}, default 'stable'
+        kind : {'quicksort', 'mergesort', 'heapsort', 'stable'}, default 'quicksort'
              Choice of sorting algorithm. See also :func:`numpy.sort` for more
              information. `mergesort` and `stable` are the only stable algorithms. For
              DataFrames, this option is only applied when sorting on a single
diff --git a/pandas/core/series.py b/pandas/core/series.py
index d0161a1498b63..9c7110cc21082 100644
--- a/pandas/core/series.py
+++ b/pandas/core/series.py
@@ -3440,7 +3440,7 @@ def sort_values(
         axis: Axis = 0,
         ascending: bool | int | Sequence[bool] | Sequence[int] = True,
         inplace: bool = False,
-        kind: SortKind = "stable",
+        kind: SortKind = "quicksort",
         na_position: NaPosition = "last",
         ignore_index: bool = False,
         key: ValueKeyFunc = None,
@@ -3459,7 +3459,7 @@ def sort_values(
             If True, sort values in ascending order, otherwise descending.
         inplace : bool, default False
             If True, perform operation in-place.
-        kind : {'quicksort', 'mergesort', 'heapsort', 'stable'}, default 'stable'
+        kind : {'quicksort', 'mergesort', 'heapsort', 'stable'}, default 'quicksort'
             Choice of sorting algorithm. See also :func:`numpy.sort` for more
             information. 'mergesort' and 'stable' are the only stable  algorithms.
         na_position : {'first' or 'last'}, default 'last'
diff --git a/pandas/tests/frame/methods/test_nlargest.py b/pandas/tests/frame/methods/test_nlargest.py
index b5c33a41dd780..c98bd9fc9ea9d 100644
--- a/pandas/tests/frame/methods/test_nlargest.py
+++ b/pandas/tests/frame/methods/test_nlargest.py
@@ -164,7 +164,7 @@ def test_nlargest_n_duplicate_index(self, df_duplicates, n, order):
         tm.assert_frame_equal(result, expected)
 
         result = df.nlargest(n, order)
-        expected = df.sort_values(order, ascending=False).head(n)
+        expected = df.sort_values(order, ascending=False, kind="stable").head(n)
         tm.assert_frame_equal(result, expected)
 
     def test_nlargest_duplicate_keep_all_ties(self):
diff --git a/pandas/tests/frame/methods/test_sort_values.py b/pandas/tests/frame/methods/test_sort_values.py
index e2877acbdd040..08967eafcecb3 100644
--- a/pandas/tests/frame/methods/test_sort_values.py
+++ b/pandas/tests/frame/methods/test_sort_values.py
@@ -878,13 +878,17 @@ def test_sort_column_level_and_index_label(
         # transposing. For some cases this will result in a frame with
         # multiple column levels
         expected = (
-            df_none.sort_values(by=sort_names, ascending=ascending, axis=0)
+            df_none.sort_values(
+                by=sort_names, ascending=ascending, axis=0, kind="stable"
+            )
             .set_index(levels)
             .T
         )
 
         # Compute result by transposing and sorting on axis=1.
-        result = df_idx.T.sort_values(by=sort_names, ascending=ascending, axis=1)
+        result = df_idx.T.sort_values(
+            by=sort_names, ascending=ascending, axis=1, kind="stable"
+        )
 
         tm.assert_frame_equal(result, expected)
 
diff --git a/pandas/tests/groupby/test_value_counts.py b/pandas/tests/groupby/test_value_counts.py
index 5477ad75a56f7..e3943a52f3f91 100644
--- a/pandas/tests/groupby/test_value_counts.py
+++ b/pandas/tests/groupby/test_value_counts.py
@@ -21,6 +21,7 @@
     to_datetime,
 )
 import pandas._testing as tm
+from pandas.util.version import Version
 
 
 def tests_value_counts_index_names_category_column():
@@ -285,7 +286,7 @@ def _frame_value_counts(df, keys, normalize, sort, ascending):
 @pytest.mark.parametrize("as_index", [True, False])
 @pytest.mark.parametrize("frame", [True, False])
 def test_against_frame_and_seriesgroupby(
-    education_df, groupby, normalize, name, sort, ascending, as_index, frame
+    education_df, groupby, normalize, name, sort, ascending, as_index, frame, request
 ):
     # test all parameters:
     # - Use column, array or function as by= parameter
@@ -295,6 +296,13 @@ def test_against_frame_and_seriesgroupby(
     # - 3-way compare against:
     #   - apply with :meth:`~DataFrame.value_counts`
     #   - `~SeriesGroupBy.value_counts`
+    if sort and name == "proportion" and Version(np.__version__) >= Version("1.25"):
+        # TODO: Change the expected comparison
+        request.node.add_marker(
+            pytest.mark.xfail(
+                reason="default sorting is unstable; numpy sorting changed in 1.25"
+            )
+        )
     by = {
         "column": "country",
         "array": education_df["country"].values,
@@ -441,22 +449,36 @@ def nulls_df():
     )
 
 
+# TODO: Actually fix the expected result for the xfails
 @pytest.mark.parametrize(
     "group_dropna, count_dropna, expected_rows, expected_values",
     [
-        (
+        pytest.param(
             False,
             False,
             [0, 1, 3, 5, 7, 6, 8, 2, 4],
             [0.5, 0.5, 1.0, 0.25, 0.25, 0.25, 0.25, 1.0, 1.0],
+            marks=pytest.mark.xfail(
+                Version(np.__version__) >= Version("1.25"),
+                reason="default sorting is unstable; numpy sorting changed in 1.25",
+            ),
+        ),
+        pytest.param(
+            False,
+            True,
+            [0, 1, 3, 5, 2, 4],
+            [0.5, 0.5, 1.0, 1.0, 1.0, 1.0],
+            marks=pytest.mark.xfail(
+                Version(np.__version__) >= Version("1.25"),
+                reason="default sorting is unstable; numpy sorting changed in 1.25",
+            ),
         ),
-        (False, True, [0, 1, 3, 5, 2, 4], [0.5, 0.5, 1.0, 1.0, 1.0, 1.0]),
         (True, False, [0, 1, 5, 7, 6, 8], [0.5, 0.5, 0.25, 0.25, 0.25, 0.25]),
         (True, True, [0, 1, 5], [0.5, 0.5, 1.0]),
     ],
 )
 def test_dropna_combinations(
-    nulls_df, group_dropna, count_dropna, expected_rows, expected_values
+    nulls_df, group_dropna, count_dropna, expected_rows, expected_values, request
 ):
     gp = nulls_df.groupby(["A", "B"], dropna=group_dropna)
     result = gp.value_counts(normalize=True, sort=True, dropna=count_dropna)
@@ -558,8 +580,9 @@ def test_categorical_single_grouper_with_only_observed_categories(
     )
     result = gp.value_counts(normalize=normalize)
 
-    expected_index = MultiIndex.from_tuples(
-        [
+    if Version(np.__version__) < Version("1.25"):
+        # default sorting is unstable; numpy sorting changed
+        expected_tuples = [
             ("FR", "male", "low"),
             ("FR", "female", "high"),
             ("FR", "male", "medium"),
@@ -572,7 +595,25 @@ def test_categorical_single_grouper_with_only_observed_categories(
             ("US", "female", "medium"),
             ("US", "male", "high"),
             ("US", "male", "medium"),
-        ],
+        ]
+    else:
+        expected_tuples = [
+            ("FR", "male", "low"),
+            ("FR", "female", "high"),
+            ("FR", "male", "medium"),
+            ("FR", "male", "high"),
+            ("FR", "female", "low"),
+            ("FR", "female", "medium"),
+            ("US", "female", "high"),
+            ("US", "male", "low"),
+            ("US", "female", "low"),
+            ("US", "female", "medium"),
+            ("US", "male", "high"),
+            ("US", "male", "medium"),
+        ]
+
+    expected_index = MultiIndex.from_tuples(
+        expected_tuples,
         names=["country", "gender", "education"],
     )
 
@@ -651,20 +692,37 @@ def test_categorical_single_grouper_observed_true(
 ):
     # GH#46357
 
-    expected_index = [
-        ("FR", "male", "low"),
-        ("FR", "female", "high"),
-        ("FR", "male", "medium"),
-        ("FR", "female", "low"),
-        ("FR", "female", "medium"),
-        ("FR", "male", "high"),
-        ("US", "female", "high"),
-        ("US", "male", "low"),
-        ("US", "female", "low"),
-        ("US", "female", "medium"),
-        ("US", "male", "high"),
-        ("US", "male", "medium"),
-    ]
+    if Version(np.__version__) < Version("1.25"):
+        # default sorting is unstable; numpy sorting changed
+        expected_index = [
+            ("FR", "male", "low"),
+            ("FR", "female", "high"),
+            ("FR", "male", "medium"),
+            ("FR", "female", "low"),
+            ("FR", "female", "medium"),
+            ("FR", "male", "high"),
+            ("US", "female", "high"),
+            ("US", "male", "low"),
+            ("US", "female", "low"),
+            ("US", "female", "medium"),
+            ("US", "male", "high"),
+            ("US", "male", "medium"),
+        ]
+    else:
+        expected_index = [
+            ("FR", "male", "low"),
+            ("FR", "female", "high"),
+            ("FR", "female", "low"),
+            ("FR", "female", "medium"),
+            ("FR", "male", "high"),
+            ("FR", "male", "medium"),
+            ("US", "female", "high"),
+            ("US", "male", "low"),
+            ("US", "female", "low"),
+            ("US", "female", "medium"),
+            ("US", "male", "high"),
+            ("US", "male", "medium"),
+        ]
 
     assert_categorical_single_grouper(
         education_df=education_df,
@@ -721,26 +779,49 @@ def test_categorical_single_grouper_observed_false(
 ):
     # GH#46357
 
-    expected_index = [
-        ("FR", "male", "low"),
-        ("FR", "female", "high"),
-        ("FR", "male", "medium"),
-        ("FR", "female", "low"),
-        ("FR", "male", "high"),
-        ("FR", "female", "medium"),
-        ("US", "female", "high"),
-        ("US", "male", "low"),
-        ("US", "male", "medium"),
-        ("US", "male", "high"),
-        ("US", "female", "medium"),
-        ("US", "female", "low"),
-        ("ASIA", "male", "low"),
-        ("ASIA", "male", "high"),
-        ("ASIA", "female", "medium"),
-        ("ASIA", "female", "low"),
-        ("ASIA", "female", "high"),
-        ("ASIA", "male", "medium"),
-    ]
+    if Version(np.__version__) < Version("1.25"):
+        # default sorting is unstable; numpy sorting changed
+        expected_index = [
+            ("FR", "male", "low"),
+            ("FR", "female", "high"),
+            ("FR", "male", "medium"),
+            ("FR", "female", "low"),
+            ("FR", "male", "high"),
+            ("FR", "female", "medium"),
+            ("US", "female", "high"),
+            ("US", "male", "low"),
+            ("US", "male", "medium"),
+            ("US", "male", "high"),
+            ("US", "female", "medium"),
+            ("US", "female", "low"),
+            ("ASIA", "male", "low"),
+            ("ASIA", "male", "high"),
+            ("ASIA", "female", "medium"),
+            ("ASIA", "female", "low"),
+            ("ASIA", "female", "high"),
+            ("ASIA", "male", "medium"),
+        ]
+    else:
+        expected_index = [
+            ("FR", "male", "low"),
+            ("FR", "female", "high"),
+            ("FR", "male", "medium"),
+            ("FR", "female", "low"),
+            ("FR", "female", "medium"),
+            ("FR", "male", "high"),
+            ("US", "female", "high"),
+            ("US", "male", "low"),
+            ("US", "female", "low"),
+            ("US", "female", "medium"),
+            ("US", "male", "high"),
+            ("US", "male", "medium"),
+            ("ASIA", "female", "medium"),
+            ("ASIA", "female", "low"),
+            ("ASIA", "female", "high"),
+            ("ASIA", "male", "medium"),
+            ("ASIA", "male", "low"),
+            ("ASIA", "male", "high"),
+        ]
 
     assert_categorical_single_grouper(
         education_df=education_df,
@@ -869,20 +950,37 @@ def test_categorical_non_groupers(
     gp = education_df.groupby("country", as_index=as_index, observed=observed)
     result = gp.value_counts(normalize=normalize)
 
-    expected_index = [
-        ("FR", "male", "low"),
-        ("FR", "female", "high"),
-        ("FR", "male", "medium"),
-        ("FR", "female", "low"),
-        ("FR", "female", "medium"),
-        ("FR", "male", "high"),
-        ("US", "female", "high"),
-        ("US", "male", "low"),
-        ("US", "female", "low"),
-        ("US", "female", "medium"),
-        ("US", "male", "high"),
-        ("US", "male", "medium"),
-    ]
+    if Version(np.__version__) < Version("1.25"):
+        # default sorting is unstable; numpy sorting changed
+        expected_index = [
+            ("FR", "male", "low"),
+            ("FR", "female", "high"),
+            ("FR", "male", "medium"),
+            ("FR", "female", "low"),
+            ("FR", "female", "medium"),
+            ("FR", "male", "high"),
+            ("US", "female", "high"),
+            ("US", "male", "low"),
+            ("US", "female", "low"),
+            ("US", "female", "medium"),
+            ("US", "male", "high"),
+            ("US", "male", "medium"),
+        ]
+    else:
+        expected_index = [
+            ("FR", "male", "low"),
+            ("FR", "female", "high"),
+            ("FR", "male", "medium"),
+            ("FR", "male", "high"),
+            ("FR", "female", "medium"),
+            ("FR", "female", "low"),
+            ("US", "female", "high"),
+            ("US", "male", "low"),
+            ("US", "female", "low"),
+            ("US", "female", "medium"),
+            ("US", "male", "high"),
+            ("US", "male", "medium"),
+        ]
     expected_series = Series(
         data=expected_data,
         index=MultiIndex.from_tuples(

From 72149150647b13a6f67667c777c031b995fb9ce6 Mon Sep 17 00:00:00 2001
From: Matthew Roeschke <10647082+mroeschke@users.noreply.github.com>
Date: Wed, 7 Jun 2023 15:05:49 -0700
Subject: [PATCH 3/6] Adjust more tests

---
 pandas/tests/frame/methods/test_nlargest.py | 14 +++++-
 pandas/tests/groupby/test_value_counts.py   | 53 +++++++++++++--------
 2 files changed, 45 insertions(+), 22 deletions(-)

diff --git a/pandas/tests/frame/methods/test_nlargest.py b/pandas/tests/frame/methods/test_nlargest.py
index c98bd9fc9ea9d..0717a99de74d4 100644
--- a/pandas/tests/frame/methods/test_nlargest.py
+++ b/pandas/tests/frame/methods/test_nlargest.py
@@ -9,6 +9,7 @@
 
 import pandas as pd
 import pandas._testing as tm
+from pandas.util.version import Version
 
 
 @pytest.fixture
@@ -155,7 +156,7 @@ def test_nlargest_n_identical_values(self):
         [["a", "b", "c"], ["c", "b", "a"], ["a"], ["b"], ["a", "b"], ["c", "b"]],
     )
     @pytest.mark.parametrize("n", range(1, 6))
-    def test_nlargest_n_duplicate_index(self, df_duplicates, n, order):
+    def test_nlargest_n_duplicate_index(self, df_duplicates, n, order, request):
         # GH#13412
 
         df = df_duplicates
@@ -165,6 +166,17 @@ def test_nlargest_n_duplicate_index(self, df_duplicates, n, order):
 
         result = df.nlargest(n, order)
         expected = df.sort_values(order, ascending=False, kind="stable").head(n)
+        if (
+            n == 5
+            and order in (["a"], ["a", "b"])
+            and Version(np.__version__) >= Version("1.25")
+        ):
+            # TODO: Change the expected comparison
+            request.node.add_marker(
+                pytest.mark.xfail(
+                    reason="default sorting is unstable; numpy sorting changed in 1.25"
+                )
+            )
         tm.assert_frame_equal(result, expected)
 
     def test_nlargest_duplicate_keep_all_ties(self):
diff --git a/pandas/tests/groupby/test_value_counts.py b/pandas/tests/groupby/test_value_counts.py
index e3943a52f3f91..ee83dbcefd305 100644
--- a/pandas/tests/groupby/test_value_counts.py
+++ b/pandas/tests/groupby/test_value_counts.py
@@ -252,16 +252,27 @@ def test_basic(education_df):
     result = education_df.groupby("country")[["gender", "education"]].value_counts(
         normalize=True
     )
+    if Version(np.__version__) >= Version("1.25"):
+        # default sorting is unstable; numpy sorting changed in 1.25
+        expected_tuples = [
+            ("FR", "male", "low"),
+            ("FR", "female", "high"),
+            ("FR", "male", "medium"),
+            ("US", "male", "low"),
+            ("US", "female", "high"),
+        ]
+    else:
+        expected_tuples = [
+            ("FR", "male", "low"),
+            ("FR", "female", "high"),
+            ("FR", "male", "medium"),
+            ("US", "female", "high"),
+            ("US", "male", "low"),
+        ]
     expected = Series(
         data=[0.5, 0.25, 0.25, 0.5, 0.5],
         index=MultiIndex.from_tuples(
-            [
-                ("FR", "male", "low"),
-                ("FR", "female", "high"),
-                ("FR", "male", "medium"),
-                ("US", "female", "high"),
-                ("US", "male", "low"),
-            ],
+            expected_tuples,
             names=["country", "gender", "education"],
         ),
         name="proportion",
@@ -296,13 +307,13 @@ def test_against_frame_and_seriesgroupby(
     # - 3-way compare against:
     #   - apply with :meth:`~DataFrame.value_counts`
     #   - `~SeriesGroupBy.value_counts`
-    if sort and name == "proportion" and Version(np.__version__) >= Version("1.25"):
-        # TODO: Change the expected comparison
-        request.node.add_marker(
-            pytest.mark.xfail(
-                reason="default sorting is unstable; numpy sorting changed in 1.25"
-            )
-        )
+    # if sort and name == "proportion" and Version(np.__version__) >= Version("1.25"):
+    #     # TODO: Change the expected comparison
+    #     request.node.add_marker(
+    #         pytest.mark.xfail(
+    #             reason="default sorting is unstable; numpy sorting changed in 1.25"
+    #         )
+    #     )
     by = {
         "column": "country",
         "array": education_df["country"].values,
@@ -712,10 +723,10 @@ def test_categorical_single_grouper_observed_true(
         expected_index = [
             ("FR", "male", "low"),
             ("FR", "female", "high"),
-            ("FR", "female", "low"),
-            ("FR", "female", "medium"),
             ("FR", "male", "high"),
             ("FR", "male", "medium"),
+            ("FR", "female", "low"),
+            ("FR", "female", "medium"),
             ("US", "female", "high"),
             ("US", "male", "low"),
             ("US", "female", "low"),
@@ -815,12 +826,12 @@ def test_categorical_single_grouper_observed_false(
             ("US", "female", "medium"),
             ("US", "male", "high"),
             ("US", "male", "medium"),
-            ("ASIA", "female", "medium"),
-            ("ASIA", "female", "low"),
             ("ASIA", "female", "high"),
-            ("ASIA", "male", "medium"),
-            ("ASIA", "male", "low"),
+            ("ASIA", "female", "low"),
+            ("ASIA", "female", "medium"),
             ("ASIA", "male", "high"),
+            ("ASIA", "male", "low"),
+            ("ASIA", "male", "medium"),
         ]
 
     assert_categorical_single_grouper(
@@ -972,8 +983,8 @@ def test_categorical_non_groupers(
             ("FR", "female", "high"),
             ("FR", "male", "medium"),
             ("FR", "male", "high"),
-            ("FR", "female", "medium"),
             ("FR", "female", "low"),
+            ("FR", "female", "medium"),
             ("US", "female", "high"),
             ("US", "male", "low"),
             ("US", "female", "low"),

From eb0c1bee460fe2c9c27701bb2ce116d52e813a89 Mon Sep 17 00:00:00 2001
From: Matthew Roeschke <10647082+mroeschke@users.noreply.github.com>
Date: Wed, 7 Jun 2023 16:34:07 -0700
Subject: [PATCH 4/6] Undo everything, even nanargsort

---
 pandas/core/sorting.py                        |   2 +-
 pandas/tests/frame/methods/test_nlargest.py   |  16 +-
 .../tests/frame/methods/test_sort_values.py   |   8 +-
 pandas/tests/groupby/test_value_counts.py     | 233 +++++-------------
 4 files changed, 67 insertions(+), 192 deletions(-)

diff --git a/pandas/core/sorting.py b/pandas/core/sorting.py
index 0c0b312c11c48..b63f3f28b8f6c 100644
--- a/pandas/core/sorting.py
+++ b/pandas/core/sorting.py
@@ -424,7 +424,7 @@ def lexsort_indexer(
 
 def nargsort(
     items: ArrayLike | Index | Series,
-    kind: SortKind = "stable",
+    kind: SortKind = "quicksort",
     ascending: bool = True,
     na_position: str = "last",
     key: Callable | None = None,
diff --git a/pandas/tests/frame/methods/test_nlargest.py b/pandas/tests/frame/methods/test_nlargest.py
index 0717a99de74d4..b5c33a41dd780 100644
--- a/pandas/tests/frame/methods/test_nlargest.py
+++ b/pandas/tests/frame/methods/test_nlargest.py
@@ -9,7 +9,6 @@
 
 import pandas as pd
 import pandas._testing as tm
-from pandas.util.version import Version
 
 
 @pytest.fixture
@@ -156,7 +155,7 @@ def test_nlargest_n_identical_values(self):
         [["a", "b", "c"], ["c", "b", "a"], ["a"], ["b"], ["a", "b"], ["c", "b"]],
     )
     @pytest.mark.parametrize("n", range(1, 6))
-    def test_nlargest_n_duplicate_index(self, df_duplicates, n, order, request):
+    def test_nlargest_n_duplicate_index(self, df_duplicates, n, order):
         # GH#13412
 
         df = df_duplicates
@@ -165,18 +164,7 @@ def test_nlargest_n_duplicate_index(self, df_duplicates, n, order, request):
         tm.assert_frame_equal(result, expected)
 
         result = df.nlargest(n, order)
-        expected = df.sort_values(order, ascending=False, kind="stable").head(n)
-        if (
-            n == 5
-            and order in (["a"], ["a", "b"])
-            and Version(np.__version__) >= Version("1.25")
-        ):
-            # TODO: Change the expected comparison
-            request.node.add_marker(
-                pytest.mark.xfail(
-                    reason="default sorting is unstable; numpy sorting changed in 1.25"
-                )
-            )
+        expected = df.sort_values(order, ascending=False).head(n)
         tm.assert_frame_equal(result, expected)
 
     def test_nlargest_duplicate_keep_all_ties(self):
diff --git a/pandas/tests/frame/methods/test_sort_values.py b/pandas/tests/frame/methods/test_sort_values.py
index 08967eafcecb3..e2877acbdd040 100644
--- a/pandas/tests/frame/methods/test_sort_values.py
+++ b/pandas/tests/frame/methods/test_sort_values.py
@@ -878,17 +878,13 @@ def test_sort_column_level_and_index_label(
         # transposing. For some cases this will result in a frame with
         # multiple column levels
         expected = (
-            df_none.sort_values(
-                by=sort_names, ascending=ascending, axis=0, kind="stable"
-            )
+            df_none.sort_values(by=sort_names, ascending=ascending, axis=0)
             .set_index(levels)
             .T
         )
 
         # Compute result by transposing and sorting on axis=1.
-        result = df_idx.T.sort_values(
-            by=sort_names, ascending=ascending, axis=1, kind="stable"
-        )
+        result = df_idx.T.sort_values(by=sort_names, ascending=ascending, axis=1)
 
         tm.assert_frame_equal(result, expected)
 
diff --git a/pandas/tests/groupby/test_value_counts.py b/pandas/tests/groupby/test_value_counts.py
index ee83dbcefd305..5477ad75a56f7 100644
--- a/pandas/tests/groupby/test_value_counts.py
+++ b/pandas/tests/groupby/test_value_counts.py
@@ -21,7 +21,6 @@
     to_datetime,
 )
 import pandas._testing as tm
-from pandas.util.version import Version
 
 
 def tests_value_counts_index_names_category_column():
@@ -252,27 +251,16 @@ def test_basic(education_df):
     result = education_df.groupby("country")[["gender", "education"]].value_counts(
         normalize=True
     )
-    if Version(np.__version__) >= Version("1.25"):
-        # default sorting is unstable; numpy sorting changed in 1.25
-        expected_tuples = [
-            ("FR", "male", "low"),
-            ("FR", "female", "high"),
-            ("FR", "male", "medium"),
-            ("US", "male", "low"),
-            ("US", "female", "high"),
-        ]
-    else:
-        expected_tuples = [
-            ("FR", "male", "low"),
-            ("FR", "female", "high"),
-            ("FR", "male", "medium"),
-            ("US", "female", "high"),
-            ("US", "male", "low"),
-        ]
     expected = Series(
         data=[0.5, 0.25, 0.25, 0.5, 0.5],
         index=MultiIndex.from_tuples(
-            expected_tuples,
+            [
+                ("FR", "male", "low"),
+                ("FR", "female", "high"),
+                ("FR", "male", "medium"),
+                ("US", "female", "high"),
+                ("US", "male", "low"),
+            ],
             names=["country", "gender", "education"],
         ),
         name="proportion",
@@ -297,7 +285,7 @@ def _frame_value_counts(df, keys, normalize, sort, ascending):
 @pytest.mark.parametrize("as_index", [True, False])
 @pytest.mark.parametrize("frame", [True, False])
 def test_against_frame_and_seriesgroupby(
-    education_df, groupby, normalize, name, sort, ascending, as_index, frame, request
+    education_df, groupby, normalize, name, sort, ascending, as_index, frame
 ):
     # test all parameters:
     # - Use column, array or function as by= parameter
@@ -307,13 +295,6 @@ def test_against_frame_and_seriesgroupby(
     # - 3-way compare against:
     #   - apply with :meth:`~DataFrame.value_counts`
     #   - `~SeriesGroupBy.value_counts`
-    # if sort and name == "proportion" and Version(np.__version__) >= Version("1.25"):
-    #     # TODO: Change the expected comparison
-    #     request.node.add_marker(
-    #         pytest.mark.xfail(
-    #             reason="default sorting is unstable; numpy sorting changed in 1.25"
-    #         )
-    #     )
     by = {
         "column": "country",
         "array": education_df["country"].values,
@@ -460,36 +441,22 @@ def nulls_df():
     )
 
 
-# TODO: Actually fix the expected result for the xfails
 @pytest.mark.parametrize(
     "group_dropna, count_dropna, expected_rows, expected_values",
     [
-        pytest.param(
+        (
             False,
             False,
             [0, 1, 3, 5, 7, 6, 8, 2, 4],
             [0.5, 0.5, 1.0, 0.25, 0.25, 0.25, 0.25, 1.0, 1.0],
-            marks=pytest.mark.xfail(
-                Version(np.__version__) >= Version("1.25"),
-                reason="default sorting is unstable; numpy sorting changed in 1.25",
-            ),
-        ),
-        pytest.param(
-            False,
-            True,
-            [0, 1, 3, 5, 2, 4],
-            [0.5, 0.5, 1.0, 1.0, 1.0, 1.0],
-            marks=pytest.mark.xfail(
-                Version(np.__version__) >= Version("1.25"),
-                reason="default sorting is unstable; numpy sorting changed in 1.25",
-            ),
         ),
+        (False, True, [0, 1, 3, 5, 2, 4], [0.5, 0.5, 1.0, 1.0, 1.0, 1.0]),
         (True, False, [0, 1, 5, 7, 6, 8], [0.5, 0.5, 0.25, 0.25, 0.25, 0.25]),
         (True, True, [0, 1, 5], [0.5, 0.5, 1.0]),
     ],
 )
 def test_dropna_combinations(
-    nulls_df, group_dropna, count_dropna, expected_rows, expected_values, request
+    nulls_df, group_dropna, count_dropna, expected_rows, expected_values
 ):
     gp = nulls_df.groupby(["A", "B"], dropna=group_dropna)
     result = gp.value_counts(normalize=True, sort=True, dropna=count_dropna)
@@ -591,9 +558,8 @@ def test_categorical_single_grouper_with_only_observed_categories(
     )
     result = gp.value_counts(normalize=normalize)
 
-    if Version(np.__version__) < Version("1.25"):
-        # default sorting is unstable; numpy sorting changed
-        expected_tuples = [
+    expected_index = MultiIndex.from_tuples(
+        [
             ("FR", "male", "low"),
             ("FR", "female", "high"),
             ("FR", "male", "medium"),
@@ -606,25 +572,7 @@ def test_categorical_single_grouper_with_only_observed_categories(
             ("US", "female", "medium"),
             ("US", "male", "high"),
             ("US", "male", "medium"),
-        ]
-    else:
-        expected_tuples = [
-            ("FR", "male", "low"),
-            ("FR", "female", "high"),
-            ("FR", "male", "medium"),
-            ("FR", "male", "high"),
-            ("FR", "female", "low"),
-            ("FR", "female", "medium"),
-            ("US", "female", "high"),
-            ("US", "male", "low"),
-            ("US", "female", "low"),
-            ("US", "female", "medium"),
-            ("US", "male", "high"),
-            ("US", "male", "medium"),
-        ]
-
-    expected_index = MultiIndex.from_tuples(
-        expected_tuples,
+        ],
         names=["country", "gender", "education"],
     )
 
@@ -703,37 +651,20 @@ def test_categorical_single_grouper_observed_true(
 ):
     # GH#46357
 
-    if Version(np.__version__) < Version("1.25"):
-        # default sorting is unstable; numpy sorting changed
-        expected_index = [
-            ("FR", "male", "low"),
-            ("FR", "female", "high"),
-            ("FR", "male", "medium"),
-            ("FR", "female", "low"),
-            ("FR", "female", "medium"),
-            ("FR", "male", "high"),
-            ("US", "female", "high"),
-            ("US", "male", "low"),
-            ("US", "female", "low"),
-            ("US", "female", "medium"),
-            ("US", "male", "high"),
-            ("US", "male", "medium"),
-        ]
-    else:
-        expected_index = [
-            ("FR", "male", "low"),
-            ("FR", "female", "high"),
-            ("FR", "male", "high"),
-            ("FR", "male", "medium"),
-            ("FR", "female", "low"),
-            ("FR", "female", "medium"),
-            ("US", "female", "high"),
-            ("US", "male", "low"),
-            ("US", "female", "low"),
-            ("US", "female", "medium"),
-            ("US", "male", "high"),
-            ("US", "male", "medium"),
-        ]
+    expected_index = [
+        ("FR", "male", "low"),
+        ("FR", "female", "high"),
+        ("FR", "male", "medium"),
+        ("FR", "female", "low"),
+        ("FR", "female", "medium"),
+        ("FR", "male", "high"),
+        ("US", "female", "high"),
+        ("US", "male", "low"),
+        ("US", "female", "low"),
+        ("US", "female", "medium"),
+        ("US", "male", "high"),
+        ("US", "male", "medium"),
+    ]
 
     assert_categorical_single_grouper(
         education_df=education_df,
@@ -790,49 +721,26 @@ def test_categorical_single_grouper_observed_false(
 ):
     # GH#46357
 
-    if Version(np.__version__) < Version("1.25"):
-        # default sorting is unstable; numpy sorting changed
-        expected_index = [
-            ("FR", "male", "low"),
-            ("FR", "female", "high"),
-            ("FR", "male", "medium"),
-            ("FR", "female", "low"),
-            ("FR", "male", "high"),
-            ("FR", "female", "medium"),
-            ("US", "female", "high"),
-            ("US", "male", "low"),
-            ("US", "male", "medium"),
-            ("US", "male", "high"),
-            ("US", "female", "medium"),
-            ("US", "female", "low"),
-            ("ASIA", "male", "low"),
-            ("ASIA", "male", "high"),
-            ("ASIA", "female", "medium"),
-            ("ASIA", "female", "low"),
-            ("ASIA", "female", "high"),
-            ("ASIA", "male", "medium"),
-        ]
-    else:
-        expected_index = [
-            ("FR", "male", "low"),
-            ("FR", "female", "high"),
-            ("FR", "male", "medium"),
-            ("FR", "female", "low"),
-            ("FR", "female", "medium"),
-            ("FR", "male", "high"),
-            ("US", "female", "high"),
-            ("US", "male", "low"),
-            ("US", "female", "low"),
-            ("US", "female", "medium"),
-            ("US", "male", "high"),
-            ("US", "male", "medium"),
-            ("ASIA", "female", "high"),
-            ("ASIA", "female", "low"),
-            ("ASIA", "female", "medium"),
-            ("ASIA", "male", "high"),
-            ("ASIA", "male", "low"),
-            ("ASIA", "male", "medium"),
-        ]
+    expected_index = [
+        ("FR", "male", "low"),
+        ("FR", "female", "high"),
+        ("FR", "male", "medium"),
+        ("FR", "female", "low"),
+        ("FR", "male", "high"),
+        ("FR", "female", "medium"),
+        ("US", "female", "high"),
+        ("US", "male", "low"),
+        ("US", "male", "medium"),
+        ("US", "male", "high"),
+        ("US", "female", "medium"),
+        ("US", "female", "low"),
+        ("ASIA", "male", "low"),
+        ("ASIA", "male", "high"),
+        ("ASIA", "female", "medium"),
+        ("ASIA", "female", "low"),
+        ("ASIA", "female", "high"),
+        ("ASIA", "male", "medium"),
+    ]
 
     assert_categorical_single_grouper(
         education_df=education_df,
@@ -961,37 +869,20 @@ def test_categorical_non_groupers(
     gp = education_df.groupby("country", as_index=as_index, observed=observed)
     result = gp.value_counts(normalize=normalize)
 
-    if Version(np.__version__) < Version("1.25"):
-        # default sorting is unstable; numpy sorting changed
-        expected_index = [
-            ("FR", "male", "low"),
-            ("FR", "female", "high"),
-            ("FR", "male", "medium"),
-            ("FR", "female", "low"),
-            ("FR", "female", "medium"),
-            ("FR", "male", "high"),
-            ("US", "female", "high"),
-            ("US", "male", "low"),
-            ("US", "female", "low"),
-            ("US", "female", "medium"),
-            ("US", "male", "high"),
-            ("US", "male", "medium"),
-        ]
-    else:
-        expected_index = [
-            ("FR", "male", "low"),
-            ("FR", "female", "high"),
-            ("FR", "male", "medium"),
-            ("FR", "male", "high"),
-            ("FR", "female", "low"),
-            ("FR", "female", "medium"),
-            ("US", "female", "high"),
-            ("US", "male", "low"),
-            ("US", "female", "low"),
-            ("US", "female", "medium"),
-            ("US", "male", "high"),
-            ("US", "male", "medium"),
-        ]
+    expected_index = [
+        ("FR", "male", "low"),
+        ("FR", "female", "high"),
+        ("FR", "male", "medium"),
+        ("FR", "female", "low"),
+        ("FR", "female", "medium"),
+        ("FR", "male", "high"),
+        ("US", "female", "high"),
+        ("US", "male", "low"),
+        ("US", "female", "low"),
+        ("US", "female", "medium"),
+        ("US", "male", "high"),
+        ("US", "male", "medium"),
+    ]
     expected_series = Series(
         data=expected_data,
         index=MultiIndex.from_tuples(

From 679dae4dbeef108378158b23f266da646acc26fa Mon Sep 17 00:00:00 2001
From: Matthew Roeschke <10647082+mroeschke@users.noreply.github.com>
Date: Wed, 7 Jun 2023 18:19:37 -0700
Subject: [PATCH 5/6] xfail the relevant tests

---
 pandas/tests/frame/methods/test_nlargest.py   | 15 +++-
 .../tests/frame/methods/test_sort_values.py   | 16 +++-
 pandas/tests/groupby/test_value_counts.py     | 89 +++++++++++++++++--
 3 files changed, 111 insertions(+), 9 deletions(-)

diff --git a/pandas/tests/frame/methods/test_nlargest.py b/pandas/tests/frame/methods/test_nlargest.py
index b5c33a41dd780..17dea51263222 100644
--- a/pandas/tests/frame/methods/test_nlargest.py
+++ b/pandas/tests/frame/methods/test_nlargest.py
@@ -9,6 +9,7 @@
 
 import pandas as pd
 import pandas._testing as tm
+from pandas.util.version import Version
 
 
 @pytest.fixture
@@ -155,7 +156,7 @@ def test_nlargest_n_identical_values(self):
         [["a", "b", "c"], ["c", "b", "a"], ["a"], ["b"], ["a", "b"], ["c", "b"]],
     )
     @pytest.mark.parametrize("n", range(1, 6))
-    def test_nlargest_n_duplicate_index(self, df_duplicates, n, order):
+    def test_nlargest_n_duplicate_index(self, df_duplicates, n, order, request):
         # GH#13412
 
         df = df_duplicates
@@ -165,6 +166,18 @@ def test_nlargest_n_duplicate_index(self, df_duplicates, n, order):
 
         result = df.nlargest(n, order)
         expected = df.sort_values(order, ascending=False).head(n)
+        if Version(np.__version__) >= Version("1.25") and (
+            (order == ["a"] and n in (1, 2, 3, 4)) or (order == ["a", "b"]) and n == 5
+        ):
+            request.node.add_marker(
+                pytest.mark.xfail(
+                    reason=(
+                        "pandas default unstable sorting of duplicates"
+                        "issue with numpy>=1.25 with AVX instructions"
+                    ),
+                    strict=False,
+                )
+            )
         tm.assert_frame_equal(result, expected)
 
     def test_nlargest_duplicate_keep_all_ties(self):
diff --git a/pandas/tests/frame/methods/test_sort_values.py b/pandas/tests/frame/methods/test_sort_values.py
index e2877acbdd040..3440c73d19ecf 100644
--- a/pandas/tests/frame/methods/test_sort_values.py
+++ b/pandas/tests/frame/methods/test_sort_values.py
@@ -12,6 +12,7 @@
     date_range,
 )
 import pandas._testing as tm
+from pandas.util.version import Version
 
 
 class TestDataFrameSortValues:
@@ -849,9 +850,22 @@ def ascending(request):
 
 class TestSortValuesLevelAsStr:
     def test_sort_index_level_and_column_label(
-        self, df_none, df_idx, sort_names, ascending
+        self, df_none, df_idx, sort_names, ascending, request
     ):
         # GH#14353
+        if (
+            Version(np.__version__) >= Version("1.25")
+            and request.node.callspec.id == "df_idx0-inner-True"
+        ):
+            request.node.add_marker(
+                pytest.mark.xfail(
+                    reason=(
+                        "pandas default unstable sorting of duplicates"
+                        "issue with numpy>=1.25 with AVX instructions"
+                    ),
+                    strict=False,
+                )
+            )
 
         # Get index levels from df_idx
         levels = df_idx.index.names
diff --git a/pandas/tests/groupby/test_value_counts.py b/pandas/tests/groupby/test_value_counts.py
index 5477ad75a56f7..78c8b6b236b65 100644
--- a/pandas/tests/groupby/test_value_counts.py
+++ b/pandas/tests/groupby/test_value_counts.py
@@ -21,6 +21,7 @@
     to_datetime,
 )
 import pandas._testing as tm
+from pandas.util.version import Version
 
 
 def tests_value_counts_index_names_category_column():
@@ -246,8 +247,18 @@ def test_bad_subset(education_df):
         gp.value_counts(subset=["country"])
 
 
-def test_basic(education_df):
+def test_basic(education_df, request):
     # gh43564
+    if Version(np.__version__) >= Version("1.25"):
+        request.node.add_marker(
+            pytest.mark.xfail(
+                reason=(
+                    "pandas default unstable sorting of duplicates"
+                    "issue with numpy>=1.25 with AVX instructions"
+                ),
+                strict=False,
+            )
+        )
     result = education_df.groupby("country")[["gender", "education"]].value_counts(
         normalize=True
     )
@@ -285,7 +296,7 @@ def _frame_value_counts(df, keys, normalize, sort, ascending):
 @pytest.mark.parametrize("as_index", [True, False])
 @pytest.mark.parametrize("frame", [True, False])
 def test_against_frame_and_seriesgroupby(
-    education_df, groupby, normalize, name, sort, ascending, as_index, frame
+    education_df, groupby, normalize, name, sort, ascending, as_index, frame, request
 ):
     # test all parameters:
     # - Use column, array or function as by= parameter
@@ -295,6 +306,16 @@ def test_against_frame_and_seriesgroupby(
     # - 3-way compare against:
     #   - apply with :meth:`~DataFrame.value_counts`
     #   - `~SeriesGroupBy.value_counts`
+    if Version(np.__version__) >= Version("1.25") and frame and sort and normalize:
+        request.node.add_marker(
+            pytest.mark.xfail(
+                reason=(
+                    "pandas default unstable sorting of duplicates"
+                    "issue with numpy>=1.25 with AVX instructions"
+                ),
+                strict=False,
+            )
+        )
     by = {
         "column": "country",
         "array": education_df["country"].values,
@@ -456,8 +477,18 @@ def nulls_df():
     ],
 )
 def test_dropna_combinations(
-    nulls_df, group_dropna, count_dropna, expected_rows, expected_values
+    nulls_df, group_dropna, count_dropna, expected_rows, expected_values, request
 ):
+    if Version(np.__version__) >= Version("1.25") and not group_dropna:
+        request.node.add_marker(
+            pytest.mark.xfail(
+                reason=(
+                    "pandas default unstable sorting of duplicates"
+                    "issue with numpy>=1.25 with AVX instructions"
+                ),
+                strict=False,
+            )
+        )
     gp = nulls_df.groupby(["A", "B"], dropna=group_dropna)
     result = gp.value_counts(normalize=True, sort=True, dropna=count_dropna)
     columns = DataFrame()
@@ -548,10 +579,20 @@ def test_data_frame_value_counts_dropna(
     ],
 )
 def test_categorical_single_grouper_with_only_observed_categories(
-    education_df, as_index, observed, normalize, name, expected_data
+    education_df, as_index, observed, normalize, name, expected_data, request
 ):
     # Test single categorical grouper with only observed grouping categories
     # when non-groupers are also categorical
+    if Version(np.__version__) >= Version("1.25"):
+        request.node.add_marker(
+            pytest.mark.xfail(
+                reason=(
+                    "pandas default unstable sorting of duplicates"
+                    "issue with numpy>=1.25 with AVX instructions"
+                ),
+                strict=False,
+            )
+        )
 
     gp = education_df.astype("category").groupby(
         "country", as_index=as_index, observed=observed
@@ -647,10 +688,21 @@ def assert_categorical_single_grouper(
     ],
 )
 def test_categorical_single_grouper_observed_true(
-    education_df, as_index, normalize, name, expected_data
+    education_df, as_index, normalize, name, expected_data, request
 ):
     # GH#46357
 
+    if Version(np.__version__) >= Version("1.25"):
+        request.node.add_marker(
+            pytest.mark.xfail(
+                reason=(
+                    "pandas default unstable sorting of duplicates"
+                    "issue with numpy>=1.25 with AVX instructions"
+                ),
+                strict=False,
+            )
+        )
+
     expected_index = [
         ("FR", "male", "low"),
         ("FR", "female", "high"),
@@ -717,10 +769,21 @@ def test_categorical_single_grouper_observed_true(
     ],
 )
 def test_categorical_single_grouper_observed_false(
-    education_df, as_index, normalize, name, expected_data
+    education_df, as_index, normalize, name, expected_data, request
 ):
     # GH#46357
 
+    if Version(np.__version__) >= Version("1.25"):
+        request.node.add_marker(
+            pytest.mark.xfail(
+                reason=(
+                    "pandas default unstable sorting of duplicates"
+                    "issue with numpy>=1.25 with AVX instructions"
+                ),
+                strict=False,
+            )
+        )
+
     expected_index = [
         ("FR", "male", "low"),
         ("FR", "female", "high"),
@@ -858,10 +921,22 @@ def test_categorical_multiple_groupers(
     ],
 )
 def test_categorical_non_groupers(
-    education_df, as_index, observed, normalize, name, expected_data
+    education_df, as_index, observed, normalize, name, expected_data, request
 ):
     # GH#46357 Test non-observed categories are included in the result,
     # regardless of `observed`
+
+    if Version(np.__version__) >= Version("1.25"):
+        request.node.add_marker(
+            pytest.mark.xfail(
+                reason=(
+                    "pandas default unstable sorting of duplicates"
+                    "issue with numpy>=1.25 with AVX instructions"
+                ),
+                strict=False,
+            )
+        )
+
     education_df = education_df.copy()
     education_df["gender"] = education_df["gender"].astype("category")
     education_df["education"] = education_df["education"].astype("category")

From 3ee8d834edff48506d71be1a91029948e592b3e6 Mon Sep 17 00:00:00 2001
From: Matthew Roeschke <10647082+mroeschke@users.noreply.github.com>
Date: Thu, 8 Jun 2023 08:19:38 -0700
Subject: [PATCH 6/6] Add xfail to test_sort_column_level_and_index_label

---
 pandas/tests/frame/methods/test_sort_values.py | 13 ++++++++++++-
 1 file changed, 12 insertions(+), 1 deletion(-)

diff --git a/pandas/tests/frame/methods/test_sort_values.py b/pandas/tests/frame/methods/test_sort_values.py
index 3440c73d19ecf..4c41632040dbe 100644
--- a/pandas/tests/frame/methods/test_sort_values.py
+++ b/pandas/tests/frame/methods/test_sort_values.py
@@ -881,7 +881,7 @@ def test_sort_index_level_and_column_label(
         tm.assert_frame_equal(result, expected)
 
     def test_sort_column_level_and_index_label(
-        self, df_none, df_idx, sort_names, ascending
+        self, df_none, df_idx, sort_names, ascending, request
     ):
         # GH#14353
 
@@ -900,6 +900,17 @@ def test_sort_column_level_and_index_label(
         # Compute result by transposing and sorting on axis=1.
         result = df_idx.T.sort_values(by=sort_names, ascending=ascending, axis=1)
 
+        if Version(np.__version__) >= Version("1.25"):
+            request.node.add_marker(
+                pytest.mark.xfail(
+                    reason=(
+                        "pandas default unstable sorting of duplicates"
+                        "issue with numpy>=1.25 with AVX instructions"
+                    ),
+                    strict=False,
+                )
+            )
+
         tm.assert_frame_equal(result, expected)
 
     def test_sort_values_validate_ascending_for_value_error(self):