Skip to content

Commit 1fba88c

Browse files
committed
TST/CLN: group tests in frame/test_analytics; prep for #21645
1 parent da6e26d commit 1fba88c

File tree

1 file changed

+30
-30
lines changed

1 file changed

+30
-30
lines changed

pandas/tests/frame/test_analytics.py

+30-30
Original file line numberDiff line numberDiff line change
@@ -1545,6 +1545,36 @@ def test_isin_empty_datetimelike(self):
15451545
# ----------------------------------------------------------------------
15461546
# Row deduplication
15471547

1548+
@pytest.mark.parametrize('subset', ['a', ['a'], ['a', 'B']])
1549+
def test_duplicated_with_misspelled_column_name(self, subset):
1550+
# GH 19730
1551+
df = pd.DataFrame({'A': [0, 0, 1],
1552+
'B': [0, 0, 1],
1553+
'C': [0, 0, 1]})
1554+
1555+
with pytest.raises(KeyError):
1556+
df.duplicated(subset)
1557+
1558+
with pytest.raises(KeyError):
1559+
df.drop_duplicates(subset)
1560+
1561+
@pytest.mark.slow
1562+
def test_duplicated_do_not_fail_on_wide_dataframes(self):
1563+
# gh-21524
1564+
# Given the wide dataframe with a lot of columns
1565+
# with different (important!) values
1566+
data = {'col_{0:02d}'.format(i): np.random.randint(0, 1000, 30000)
1567+
for i in range(100)}
1568+
df = pd.DataFrame(data).T
1569+
result = df.duplicated()
1570+
1571+
# Then duplicates produce the bool pd.Series as a result
1572+
# and don't fail during calculation.
1573+
# Actual values doesn't matter here, though usually
1574+
# it's all False in this case
1575+
assert isinstance(result, pd.Series)
1576+
assert result.dtype == np.bool
1577+
15481578
def test_drop_duplicates(self):
15491579
df = DataFrame({'AAA': ['foo', 'bar', 'foo', 'bar',
15501580
'foo', 'bar', 'bar', 'foo'],
@@ -1640,36 +1670,6 @@ def test_drop_duplicates(self):
16401670
for keep in ['first', 'last', False]:
16411671
assert df.duplicated(keep=keep).sum() == 0
16421672

1643-
@pytest.mark.parametrize('subset', ['a', ['a'], ['a', 'B']])
1644-
def test_duplicated_with_misspelled_column_name(self, subset):
1645-
# GH 19730
1646-
df = pd.DataFrame({'A': [0, 0, 1],
1647-
'B': [0, 0, 1],
1648-
'C': [0, 0, 1]})
1649-
1650-
with pytest.raises(KeyError):
1651-
df.duplicated(subset)
1652-
1653-
with pytest.raises(KeyError):
1654-
df.drop_duplicates(subset)
1655-
1656-
@pytest.mark.slow
1657-
def test_duplicated_do_not_fail_on_wide_dataframes(self):
1658-
# gh-21524
1659-
# Given the wide dataframe with a lot of columns
1660-
# with different (important!) values
1661-
data = {'col_{0:02d}'.format(i): np.random.randint(0, 1000, 30000)
1662-
for i in range(100)}
1663-
df = pd.DataFrame(data).T
1664-
result = df.duplicated()
1665-
1666-
# Then duplicates produce the bool pd.Series as a result
1667-
# and don't fail during calculation.
1668-
# Actual values doesn't matter here, though usually
1669-
# it's all False in this case
1670-
assert isinstance(result, pd.Series)
1671-
assert result.dtype == np.bool
1672-
16731673
def test_drop_duplicates_with_duplicate_column_names(self):
16741674
# GH17836
16751675
df = DataFrame([

0 commit comments

Comments
 (0)