TST/CLN: group tests in frame/test_analytics; prep for #21645

h-vetinari · h-vetinari · commit 1fba88cee17a · 2018-07-13T21:30:27.000+02:00
diff --git a/pandas/tests/frame/test_analytics.py b/pandas/tests/frame/test_analytics.py
@@ -1545,6 +1545,36 @@ def test_isin_empty_datetimelike(self):
     # ----------------------------------------------------------------------
     # Row deduplication
 
+    @pytest.mark.parametrize('subset', ['a', ['a'], ['a', 'B']])
+    def test_duplicated_with_misspelled_column_name(self, subset):
+        # GH 19730
+        df = pd.DataFrame({'A': [0, 0, 1],
+                           'B': [0, 0, 1],
+                           'C': [0, 0, 1]})
+
+        with pytest.raises(KeyError):
+            df.duplicated(subset)
+
+        with pytest.raises(KeyError):
+            df.drop_duplicates(subset)
+
+    @pytest.mark.slow
+    def test_duplicated_do_not_fail_on_wide_dataframes(self):
+        # gh-21524
+        # Given the wide dataframe with a lot of columns
+        # with different (important!) values
+        data = {'col_{0:02d}'.format(i): np.random.randint(0, 1000, 30000)
+                for i in range(100)}
+        df = pd.DataFrame(data).T
+        result = df.duplicated()
+
+        # Then duplicates produce the bool pd.Series as a result
+        # and don't fail during calculation.
+        # Actual values doesn't matter here, though usually
+        # it's all False in this case
+        assert isinstance(result, pd.Series)
+        assert result.dtype == np.bool
+
     def test_drop_duplicates(self):
         df = DataFrame({'AAA': ['foo', 'bar', 'foo', 'bar',
                                 'foo', 'bar', 'bar', 'foo'],
@@ -1640,36 +1670,6 @@ def test_drop_duplicates(self):
         for keep in ['first', 'last', False]:
             assert df.duplicated(keep=keep).sum() == 0
 
-    @pytest.mark.parametrize('subset', ['a', ['a'], ['a', 'B']])
-    def test_duplicated_with_misspelled_column_name(self, subset):
-        # GH 19730
-        df = pd.DataFrame({'A': [0, 0, 1],
-                           'B': [0, 0, 1],
-                           'C': [0, 0, 1]})
-
-        with pytest.raises(KeyError):
-            df.duplicated(subset)
-
-        with pytest.raises(KeyError):
-            df.drop_duplicates(subset)
-
-    @pytest.mark.slow
-    def test_duplicated_do_not_fail_on_wide_dataframes(self):
-        # gh-21524
-        # Given the wide dataframe with a lot of columns
-        # with different (important!) values
-        data = {'col_{0:02d}'.format(i): np.random.randint(0, 1000, 30000)
-                for i in range(100)}
-        df = pd.DataFrame(data).T
-        result = df.duplicated()
-
-        # Then duplicates produce the bool pd.Series as a result
-        # and don't fail during calculation.
-        # Actual values doesn't matter here, though usually
-        # it's all False in this case
-        assert isinstance(result, pd.Series)
-        assert result.dtype == np.bool
-
     def test_drop_duplicates_with_duplicate_column_names(self):
         # GH17836
         df = DataFrame([