diff --git a/doc/source/whatsnew/v0.24.0.txt b/doc/source/whatsnew/v0.24.0.txt index cf12759c051fc..9a77e3accb3dc 100644 --- a/doc/source/whatsnew/v0.24.0.txt +++ b/doc/source/whatsnew/v0.24.0.txt @@ -711,7 +711,7 @@ Reshaping - Bug in :func:`get_dummies` with Unicode attributes in Python 2 (:issue:`22084`) - Bug in :meth:`DataFrame.replace` raises ``RecursionError`` when replacing empty lists (:issue:`22083`) - Bug in :meth:`Series.replace` and meth:`DataFrame.replace` when dict is used as the `to_replace` value and one key in the dict is is another key's value, the results were inconsistent between using integer key and using string key (:issue:`20656`) -- +- Bug in :meth:`DataFrame.drop_duplicates` for empty ``DataFrame`` which incorrectly raises an error (:issue:`20516`) Build Changes ^^^^^^^^^^^^^ diff --git a/pandas/core/frame.py b/pandas/core/frame.py index 78ad9728800d6..0e1463693a7fc 100644 --- a/pandas/core/frame.py +++ b/pandas/core/frame.py @@ -4335,6 +4335,9 @@ def drop_duplicates(self, subset=None, keep='first', inplace=False): ------- deduplicated : DataFrame """ + if self.empty: + return self.copy() + inplace = validate_bool_kwarg(inplace, 'inplace') duplicated = self.duplicated(subset, keep=keep) @@ -4369,6 +4372,9 @@ def duplicated(self, subset=None, keep='first'): from pandas.core.sorting import get_group_index from pandas._libs.hashtable import duplicated_int64, _SIZE_HINT_LIMIT + if self.empty: + return Series() + def f(vals): labels, shape = algorithms.factorize( vals, size_hint=min(len(self), _SIZE_HINT_LIMIT)) diff --git a/pandas/tests/frame/test_duplicates.py b/pandas/tests/frame/test_duplicates.py index 940692ec5b46a..3478d66b919a6 100644 --- a/pandas/tests/frame/test_duplicates.py +++ b/pandas/tests/frame/test_duplicates.py @@ -263,6 +263,23 @@ def test_drop_duplicates_tuple(): tm.assert_frame_equal(result, expected) +@pytest.mark.parametrize('df', [ + DataFrame(), + DataFrame(columns=[]), + DataFrame(columns=['A', 'B', 'C']), + DataFrame(index=[]), + DataFrame(index=['A', 'B', 'C']) +]) +def test_drop_duplicates_empty(df): + # GH 20516 + result = df.drop_duplicates() + tm.assert_frame_equal(result, df) + + result = df.copy() + result.drop_duplicates(inplace=True) + tm.assert_frame_equal(result, df) + + def test_drop_duplicates_NA(): # none df = DataFrame({'A': [None, None, 'foo', 'bar',