Skip to content

Commit fc06a19

Browse files
authored
bpo-35892: Fix mode() and add multimode() (#12089)
1 parent 3e93643 commit fc06a19

File tree

4 files changed

+97
-48
lines changed

4 files changed

+97
-48
lines changed

Doc/library/statistics.rst

Lines changed: 30 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -37,16 +37,17 @@ Averages and measures of central location
3737
These functions calculate an average or typical value from a population
3838
or sample.
3939

40-
======================= =============================================
40+
======================= ===============================================================
4141
:func:`mean` Arithmetic mean ("average") of data.
4242
:func:`fmean` Fast, floating point arithmetic mean.
4343
:func:`harmonic_mean` Harmonic mean of data.
4444
:func:`median` Median (middle value) of data.
4545
:func:`median_low` Low median of data.
4646
:func:`median_high` High median of data.
4747
:func:`median_grouped` Median, or 50th percentile, of grouped data.
48-
:func:`mode` Mode (most common value) of discrete data.
49-
======================= =============================================
48+
:func:`mode` Single mode (most common value) of discrete or nominal data.
49+
:func:`multimode` List of modes (most common values) of discrete or nomimal data.
50+
======================= ===============================================================
5051

5152
Measures of spread
5253
------------------
@@ -287,12 +288,12 @@ However, for reading convenience, most of the examples show sorted sequences.
287288

288289
.. function:: mode(data)
289290

290-
Return the most common data point from discrete or nominal *data*. The mode
291-
(when it exists) is the most typical value, and is a robust measure of
292-
central location.
291+
Return the single most common data point from discrete or nominal *data*.
292+
The mode (when it exists) is the most typical value and serves as a
293+
measure of central location.
293294

294-
If *data* is empty, or if there is not exactly one most common value,
295-
:exc:`StatisticsError` is raised.
295+
If there are multiple modes, returns the first one encountered in the *data*.
296+
If *data* is empty, :exc:`StatisticsError` is raised.
296297

297298
``mode`` assumes discrete data, and returns a single value. This is the
298299
standard treatment of the mode as commonly taught in schools:
@@ -310,6 +311,27 @@ However, for reading convenience, most of the examples show sorted sequences.
310311
>>> mode(["red", "blue", "blue", "red", "green", "red", "red"])
311312
'red'
312313

314+
.. versionchanged:: 3.8
315+
Now handles multimodal datasets by returning the first mode encountered.
316+
Formerly, it raised :exc:`StatisticsError` when more than one mode was
317+
found.
318+
319+
320+
.. function:: multimode(data)
321+
322+
Return a list of the most frequently occurring values in the order they
323+
were first encountered in the *data*. Will return more than one result if
324+
there are multiple modes or an empty list if the *data* is empty:
325+
326+
.. doctest::
327+
328+
>>> multimode('aabbbbccddddeeffffgg')
329+
['b', 'd', 'f']
330+
>>> multimode('')
331+
[]
332+
333+
.. versionadded:: 3.8
334+
313335

314336
.. function:: pstdev(data, mu=None)
315337

Doc/whatsnew/3.8.rst

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -282,6 +282,9 @@ Added :func:`statistics.fmean` as a faster, floating point variant of
282282
:func:`statistics.mean()`. (Contributed by Raymond Hettinger and
283283
Steven D'Aprano in :issue:`35904`.)
284284

285+
Added :func:`statistics.multimode` that returns a list of the most
286+
common values. (Contributed by Raymond Hettinger in :issue:`35892`.)
287+
285288
Added :class:`statistics.NormalDist`, a tool for creating
286289
and manipulating normal distributions of a random variable.
287290
(Contributed by Raymond Hettinger in :issue:`36018`.)
@@ -591,6 +594,11 @@ Changes in the Python API
591594
* The function :func:`platform.popen` has been removed, it was deprecated since
592595
Python 3.3: use :func:`os.popen` instead.
593596

597+
* The :func:`statistics.mode` function no longer raises an exception
598+
when given multimodal data. Instead, it returns the first mode
599+
encountered in the input data. (Contributed by Raymond Hettinger
600+
in :issue:`35892`.)
601+
594602
* The :meth:`~tkinter.ttk.Treeview.selection` method of the
595603
:class:`tkinter.ttk.Treeview` class no longer takes arguments. Using it with
596604
arguments for changing the selection was deprecated in Python 3.6. Use

Lib/statistics.py

Lines changed: 40 additions & 32 deletions
Original file line numberDiff line numberDiff line change
@@ -17,6 +17,7 @@
1717
median_high High median of data.
1818
median_grouped Median, or 50th percentile, of grouped data.
1919
mode Mode (most common value) of data.
20+
multimode List of modes (most common values of data)
2021
================== =============================================
2122
2223
Calculate the arithmetic mean ("the average") of data:
@@ -79,10 +80,9 @@
7980
__all__ = [ 'StatisticsError', 'NormalDist',
8081
'pstdev', 'pvariance', 'stdev', 'variance',
8182
'median', 'median_low', 'median_high', 'median_grouped',
82-
'mean', 'mode', 'harmonic_mean', 'fmean',
83+
'mean', 'mode', 'multimode', 'harmonic_mean', 'fmean',
8384
]
8485

85-
import collections
8686
import math
8787
import numbers
8888
import random
@@ -92,8 +92,8 @@
9292
from itertools import groupby
9393
from bisect import bisect_left, bisect_right
9494
from math import hypot, sqrt, fabs, exp, erf, tau, log, fsum
95-
96-
95+
from operator import itemgetter
96+
from collections import Counter
9797

9898
# === Exceptions ===
9999

@@ -249,20 +249,6 @@ def _convert(value, T):
249249
raise
250250

251251

252-
def _counts(data):
253-
# Generate a table of sorted (value, frequency) pairs.
254-
table = collections.Counter(iter(data)).most_common()
255-
if not table:
256-
return table
257-
# Extract the values with the highest frequency.
258-
maxfreq = table[0][1]
259-
for i in range(1, len(table)):
260-
if table[i][1] != maxfreq:
261-
table = table[:i]
262-
break
263-
return table
264-
265-
266252
def _find_lteq(a, x):
267253
'Locate the leftmost value exactly equal to x'
268254
i = bisect_left(a, x)
@@ -334,9 +320,9 @@ def count(x):
334320
nonlocal n
335321
n += 1
336322
return x
337-
total = math.fsum(map(count, data))
323+
total = fsum(map(count, data))
338324
else:
339-
total = math.fsum(data)
325+
total = fsum(data)
340326
try:
341327
return total / n
342328
except ZeroDivisionError:
@@ -523,19 +509,38 @@ def mode(data):
523509
>>> mode(["red", "blue", "blue", "red", "green", "red", "red"])
524510
'red'
525511
526-
If there is not exactly one most common value, ``mode`` will raise
527-
StatisticsError.
512+
If there are multiple modes, return the first one encountered.
513+
514+
>>> mode(['red', 'red', 'green', 'blue', 'blue'])
515+
'red'
516+
517+
If *data* is empty, ``mode``, raises StatisticsError.
518+
528519
"""
529-
# Generate a table of sorted (value, frequency) pairs.
530-
table = _counts(data)
531-
if len(table) == 1:
532-
return table[0][0]
533-
elif table:
534-
raise StatisticsError(
535-
'no unique mode; found %d equally common values' % len(table)
536-
)
537-
else:
538-
raise StatisticsError('no mode for empty data')
520+
data = iter(data)
521+
try:
522+
return Counter(data).most_common(1)[0][0]
523+
except IndexError:
524+
raise StatisticsError('no mode for empty data') from None
525+
526+
527+
def multimode(data):
528+
""" Return a list of the most frequently occurring values.
529+
530+
Will return more than one result if there are multiple modes
531+
or an empty list if *data* is empty.
532+
533+
>>> multimode('aabbbbbbbbcc')
534+
['b']
535+
>>> multimode('aabbbbccddddeeffffgg')
536+
['b', 'd', 'f']
537+
>>> multimode('')
538+
[]
539+
540+
"""
541+
counts = Counter(iter(data)).most_common()
542+
maxcount, mode_items = next(groupby(counts, key=itemgetter(1)), (0, []))
543+
return list(map(itemgetter(0), mode_items))
539544

540545

541546
# === Measures of spread ===
@@ -836,6 +841,7 @@ def __repr__(self):
836841
from math import isclose
837842
from operator import add, sub, mul, truediv
838843
from itertools import repeat
844+
import doctest
839845

840846
g1 = NormalDist(10, 20)
841847
g2 = NormalDist(-5, 25)
@@ -893,3 +899,5 @@ def assert_close(G1, G2):
893899
S = NormalDist.from_samples([x - y for x, y in zip(X.samples(n),
894900
Y.samples(n))])
895901
assert_close(X - Y, S)
902+
903+
print(doctest.testmod())

Lib/test/test_statistics.py

Lines changed: 19 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -1769,7 +1769,7 @@ def prepare_data(self):
17691769
def test_range_data(self):
17701770
# Override test from UnivariateCommonMixin.
17711771
data = range(20, 50, 3)
1772-
self.assertRaises(statistics.StatisticsError, self.func, data)
1772+
self.assertEqual(self.func(data), 20)
17731773

17741774
def test_nominal_data(self):
17751775
# Test mode with nominal data.
@@ -1790,13 +1790,14 @@ def test_bimodal_data(self):
17901790
# Test mode with bimodal data.
17911791
data = [1, 1, 2, 2, 2, 2, 3, 4, 5, 6, 6, 6, 6, 7, 8, 9, 9]
17921792
assert data.count(2) == data.count(6) == 4
1793-
# Check for an exception.
1794-
self.assertRaises(statistics.StatisticsError, self.func, data)
1793+
# mode() should return 2, the first encounted mode
1794+
self.assertEqual(self.func(data), 2)
17951795

1796-
def test_unique_data_failure(self):
1797-
# Test mode exception when data points are all unique.
1796+
def test_unique_data(self):
1797+
# Test mode when data points are all unique.
17981798
data = list(range(10))
1799-
self.assertRaises(statistics.StatisticsError, self.func, data)
1799+
# mode() should return 0, the first encounted mode
1800+
self.assertEqual(self.func(data), 0)
18001801

18011802
def test_none_data(self):
18021803
# Test that mode raises TypeError if given None as data.
@@ -1809,8 +1810,18 @@ def test_counter_data(self):
18091810
# Test that a Counter is treated like any other iterable.
18101811
data = collections.Counter([1, 1, 1, 2])
18111812
# Since the keys of the counter are treated as data points, not the
1812-
# counts, this should raise.
1813-
self.assertRaises(statistics.StatisticsError, self.func, data)
1813+
# counts, this should return the first mode encountered, 1
1814+
self.assertEqual(self.func(data), 1)
1815+
1816+
1817+
class TestMultiMode(unittest.TestCase):
1818+
1819+
def test_basics(self):
1820+
multimode = statistics.multimode
1821+
self.assertEqual(multimode('aabbbbbbbbcc'), ['b'])
1822+
self.assertEqual(multimode('aabbbbccddddeeffffgg'), ['b', 'd', 'f'])
1823+
self.assertEqual(multimode(''), [])
1824+
18141825

18151826
class TestFMean(unittest.TestCase):
18161827

0 commit comments

Comments
 (0)