From b2e39265154f8a5ec14a14f9d9c70d24798c14f7 Mon Sep 17 00:00:00 2001 From: jbrockmendel Date: Fri, 27 Mar 2020 11:29:15 -0700 Subject: [PATCH 1/2] CLN: avoid using internals methods for DataFrame.drop_duplicates --- pandas/core/frame.py | 18 ++++++------------ 1 file changed, 6 insertions(+), 12 deletions(-) diff --git a/pandas/core/frame.py b/pandas/core/frame.py index 3fbe171a1dade..05c6d4b88f8db 100644 --- a/pandas/core/frame.py +++ b/pandas/core/frame.py @@ -4678,22 +4678,16 @@ def drop_duplicates( inplace = validate_bool_kwarg(inplace, "inplace") duplicated = self.duplicated(subset, keep=keep) - if inplace: - (inds,) = np.asarray(-duplicated).nonzero() - new_data = self._data.take(inds) + result = self[-duplicated] + if ignore_index: + result.index = range(len(result)) - if ignore_index: - new_data.axes[1] = ibase.default_index(len(inds)) - self._update_inplace(new_data) + if inplace: + self._update_inplace(result._data) + return None else: - result = self[-duplicated] - - if ignore_index: - result.index = ibase.default_index(len(result)) return result - return None - def duplicated( self, subset: Optional[Union[Hashable, Sequence[Hashable]]] = None, From a0cb76b1c6601014f7d0c22cca1c2bb3d6a9c1bb Mon Sep 17 00:00:00 2001 From: jbrockmendel Date: Sat, 28 Mar 2020 18:16:40 -0700 Subject: [PATCH 2/2] use default_index --- pandas/core/frame.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pandas/core/frame.py b/pandas/core/frame.py index a6241d591a484..5c04678cd5fa6 100644 --- a/pandas/core/frame.py +++ b/pandas/core/frame.py @@ -4680,7 +4680,7 @@ def drop_duplicates( result = self[-duplicated] if ignore_index: - result.index = range(len(result)) + result.index = ibase.default_index(len(result)) if inplace: self._update_inplace(result._data)