From 7710240d74259f555dec7448f2d3378a11172a72 Mon Sep 17 00:00:00 2001 From: MainHanzo <33153091+MainHanzo@users.noreply.github.com> Date: Wed, 6 Dec 2023 21:22:56 +0100 Subject: [PATCH 1/6] Update nlargest nsmallest doc --- pandas/core/frame.py | 24 ++++++++++++++++++++---- 1 file changed, 20 insertions(+), 4 deletions(-) diff --git a/pandas/core/frame.py b/pandas/core/frame.py index e741fa7b37f33..1329cc451eec9 100644 --- a/pandas/core/frame.py +++ b/pandas/core/frame.py @@ -7505,7 +7505,7 @@ def nlargest( - ``first`` : prioritize the first occurrence(s) - ``last`` : prioritize the last occurrence(s) - - ``all`` : do not drop any duplicates, even it means + - ``all`` : keep all the ties of the smallest item even it means selecting more than `n` items. Returns @@ -7568,7 +7568,9 @@ def nlargest( Italy 59000000 1937894 IT Brunei 434000 12128 BN - When using ``keep='all'``, all duplicate items are maintained: + When using ``keep='all'``, the number of element kept can go beyond n + if there are duplicates value for the smallest element, all the + ties are kept: >>> df.nlargest(3, 'population', keep='all') population GDP alpha-2 @@ -7578,6 +7580,17 @@ def nlargest( Maldives 434000 4520 MV Brunei 434000 12128 BN + However, ``nlargest`` does not keep n distinct largest elements: + + >>> df.nlargest(5, 'population', keep='all') + population GDP alpha-2 + France 65000000 2583560 FR + Italy 59000000 1937894 IT + Malta 434000 12011 MT + Maldives 434000 4520 MV + Brunei 434000 12128 BN + + To order by the largest values in column "population" and then "GDP", we can specify multiple columns like in the next example. @@ -7614,7 +7627,7 @@ def nsmallest( - ``first`` : take the first occurrence. - ``last`` : take the last occurrence. - - ``all`` : do not drop any duplicates, even it means + - ``all`` : keep all the ties of the largest item even it means selecting more than `n` items. Returns @@ -7669,7 +7682,10 @@ def nsmallest( Tuvalu 11300 38 TV Nauru 337000 182 NR - When using ``keep='all'``, all duplicate items are maintained: + When using ``keep='all'``, the number of element kept can go beyond n + if there are duplicates value for the largest element, all the + ties are kept. However, ``nsmallest`` does not keep n distinct + smallest elements: >>> df.nsmallest(3, 'population', keep='all') population GDP alpha-2 From da0026c75b072bc101cb71307244365aec9609e8 Mon Sep 17 00:00:00 2001 From: MainHanzo <33153091+MainHanzo@users.noreply.github.com> Date: Wed, 6 Dec 2023 22:21:09 +0100 Subject: [PATCH 2/6] double line break --- pandas/core/frame.py | 1 - 1 file changed, 1 deletion(-) diff --git a/pandas/core/frame.py b/pandas/core/frame.py index 1329cc451eec9..9514a6684ab75 100644 --- a/pandas/core/frame.py +++ b/pandas/core/frame.py @@ -7590,7 +7590,6 @@ def nlargest( Maldives 434000 4520 MV Brunei 434000 12128 BN - To order by the largest values in column "population" and then "GDP", we can specify multiple columns like in the next example. From 479728f2a855bb21941fc6b0ae25f960d5200928 Mon Sep 17 00:00:00 2001 From: MainHanzo <33153091+MainHanzo@users.noreply.github.com> Date: Thu, 7 Dec 2023 16:28:59 +0100 Subject: [PATCH 3/6] code review --- pandas/core/frame.py | 20 ++++++++++---------- 1 file changed, 10 insertions(+), 10 deletions(-) diff --git a/pandas/core/frame.py b/pandas/core/frame.py index 9514a6684ab75..a22142904e73d 100644 --- a/pandas/core/frame.py +++ b/pandas/core/frame.py @@ -7505,8 +7505,8 @@ def nlargest( - ``first`` : prioritize the first occurrence(s) - ``last`` : prioritize the last occurrence(s) - - ``all`` : keep all the ties of the smallest item even it means - selecting more than `n` items. + - ``all`` : keep all the ties of the smallest item even if it means + selecting more than ``n`` items. Returns ------- @@ -7568,8 +7568,8 @@ def nlargest( Italy 59000000 1937894 IT Brunei 434000 12128 BN - When using ``keep='all'``, the number of element kept can go beyond n - if there are duplicates value for the smallest element, all the + When using ``keep='all'``, the number of element kept can go beyond ``n`` + if there are duplicate values for the smallest element, all the ties are kept: >>> df.nlargest(3, 'population', keep='all') @@ -7580,7 +7580,7 @@ def nlargest( Maldives 434000 4520 MV Brunei 434000 12128 BN - However, ``nlargest`` does not keep n distinct largest elements: + However, ``nlargest`` does not keep ``n`` distinct largest elements: >>> df.nlargest(5, 'population', keep='all') population GDP alpha-2 @@ -7626,8 +7626,8 @@ def nsmallest( - ``first`` : take the first occurrence. - ``last`` : take the last occurrence. - - ``all`` : keep all the ties of the largest item even it means - selecting more than `n` items. + - ``all`` : keep all the ties of the largest item even if it means + selecting more than ``n`` items. Returns ------- @@ -7681,9 +7681,9 @@ def nsmallest( Tuvalu 11300 38 TV Nauru 337000 182 NR - When using ``keep='all'``, the number of element kept can go beyond n - if there are duplicates value for the largest element, all the - ties are kept. However, ``nsmallest`` does not keep n distinct + When using ``keep='all'``, the number of element kept can go beyond ``n`` + if there are duplicate values for the largest element, all the + ties are kept. However, ``nsmallest`` does not keep ``n`` distinct smallest elements: >>> df.nsmallest(3, 'population', keep='all') From 56ff72417b4076132e6947c03673c10f964e1a9c Mon Sep 17 00:00:00 2001 From: MainHanzo <33153091+MainHanzo@users.noreply.github.com> Date: Sat, 9 Dec 2023 01:54:54 +0100 Subject: [PATCH 4/6] code review for nsmallest --- pandas/core/frame.py | 13 +++++++++++-- 1 file changed, 11 insertions(+), 2 deletions(-) diff --git a/pandas/core/frame.py b/pandas/core/frame.py index a22142904e73d..49f57c47d3ecd 100644 --- a/pandas/core/frame.py +++ b/pandas/core/frame.py @@ -7683,8 +7683,7 @@ def nsmallest( When using ``keep='all'``, the number of element kept can go beyond ``n`` if there are duplicate values for the largest element, all the - ties are kept. However, ``nsmallest`` does not keep ``n`` distinct - smallest elements: + ties are kept. >>> df.nsmallest(3, 'population', keep='all') population GDP alpha-2 @@ -7692,6 +7691,16 @@ def nsmallest( Anguilla 11300 311 AI Iceland 337000 17036 IS Nauru 337000 182 NR + + However, ``nsmallest`` does not keep ``n`` distinct + smallest elements: + + >>> df.nsmallest(4, 'population', keep='all') + population GDP alpha-2 + Tuvalu 11300 38 TV + Anguilla 11300 311 AI + Iceland 337000 17036 IS + Nauru 337000 182 NR To order by the smallest values in column "population" and then "GDP", we can specify multiple columns like in the next example. From 07399030aabe13df1e15e3fe30dfb400e04efafb Mon Sep 17 00:00:00 2001 From: MainHanzo <33153091+MainHanzo@users.noreply.github.com> Date: Mon, 11 Dec 2023 21:42:14 +0100 Subject: [PATCH 5/6] whitespace --- pandas/core/frame.py | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/pandas/core/frame.py b/pandas/core/frame.py index 49f57c47d3ecd..ce4f10b1bea1b 100644 --- a/pandas/core/frame.py +++ b/pandas/core/frame.py @@ -7568,7 +7568,7 @@ def nlargest( Italy 59000000 1937894 IT Brunei 434000 12128 BN - When using ``keep='all'``, the number of element kept can go beyond ``n`` + When using ``keep='all'``, the number of element kept can go beyond ``n`` if there are duplicate values for the smallest element, all the ties are kept: @@ -7581,7 +7581,7 @@ def nlargest( Brunei 434000 12128 BN However, ``nlargest`` does not keep ``n`` distinct largest elements: - + >>> df.nlargest(5, 'population', keep='all') population GDP alpha-2 France 65000000 2583560 FR @@ -7589,7 +7589,7 @@ def nlargest( Malta 434000 12011 MT Maldives 434000 4520 MV Brunei 434000 12128 BN - + To order by the largest values in column "population" and then "GDP", we can specify multiple columns like in the next example. @@ -7691,7 +7691,7 @@ def nsmallest( Anguilla 11300 311 AI Iceland 337000 17036 IS Nauru 337000 182 NR - + However, ``nsmallest`` does not keep ``n`` distinct smallest elements: From a803dbfa4043c3ba2ac75e91a3bbb5718374c296 Mon Sep 17 00:00:00 2001 From: MainHanzo <33153091+MainHanzo@users.noreply.github.com> Date: Mon, 11 Dec 2023 22:18:16 +0100 Subject: [PATCH 6/6] whitespace --- pandas/core/frame.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/pandas/core/frame.py b/pandas/core/frame.py index ce4f10b1bea1b..8ba9926c054ba 100644 --- a/pandas/core/frame.py +++ b/pandas/core/frame.py @@ -7681,9 +7681,9 @@ def nsmallest( Tuvalu 11300 38 TV Nauru 337000 182 NR - When using ``keep='all'``, the number of element kept can go beyond ``n`` + When using ``keep='all'``, the number of element kept can go beyond ``n`` if there are duplicate values for the largest element, all the - ties are kept. + ties are kept. >>> df.nsmallest(3, 'population', keep='all') population GDP alpha-2