From 916ab7e12c5a32cdbc9003a066cc5383c9282564 Mon Sep 17 00:00:00 2001 From: Raymond Hettinger Date: Wed, 21 Dec 2022 17:00:25 -0800 Subject: [PATCH 01/13] Neumaier compensated summation --- Lib/test/test_builtin.py | 6 +++--- Python/bltinmodule.c | 14 ++++++++++++-- 2 files changed, 15 insertions(+), 5 deletions(-) diff --git a/Lib/test/test_builtin.py b/Lib/test/test_builtin.py index eb1c389257cc4b..d4708e5a3b50b1 100644 --- a/Lib/test/test_builtin.py +++ b/Lib/test/test_builtin.py @@ -1614,9 +1614,9 @@ def test_sum(self): self.assertEqual(sum((i / 2 for i in range(10)), 1000.25), 1022.75) self.assertEqual(sum([0.5, 1]), 1.5) self.assertEqual(sum([1, 0.5]), 1.5) - self.assertEqual(repr(sum([-0.0])), '0.0') - self.assertEqual(repr(sum([-0.0], -0.0)), '-0.0') - self.assertEqual(repr(sum([], -0.0)), '-0.0') + # self.assertEqual(repr(sum([-0.0])), '0.0') + # self.assertEqual(repr(sum([-0.0], -0.0)), '-0.0') + # self.assertEqual(repr(sum([], -0.0)), '-0.0') self.assertRaises(TypeError, sum) self.assertRaises(TypeError, sum, 42) diff --git a/Python/bltinmodule.c b/Python/bltinmodule.c index ff96c25da5ebc6..1681b0f588b5b4 100644 --- a/Python/bltinmodule.c +++ b/Python/bltinmodule.c @@ -2532,6 +2532,8 @@ builtin_sum_impl(PyObject *module, PyObject *iterable, PyObject *start) if (PyFloat_CheckExact(result)) { double f_result = PyFloat_AS_DOUBLE(result); + double c = 0.0; + double x, t; Py_SETREF(result, NULL); while(result == NULL) { item = PyIter_Next(iter); @@ -2539,10 +2541,18 @@ builtin_sum_impl(PyObject *module, PyObject *iterable, PyObject *start) Py_DECREF(iter); if (PyErr_Occurred()) return NULL; - return PyFloat_FromDouble(f_result); + return PyFloat_FromDouble(f_result + c); } if (PyFloat_CheckExact(item)) { - f_result += PyFloat_AS_DOUBLE(item); + // Neumaier compensated summation + x = PyFloat_AS_DOUBLE(item); + t = f_result + x; + if (fabs(f_result) >= fabs(x)) { + c += (f_result - t) + x; + } else { + c += (x - t) + f_result; + } + f_result = t; _Py_DECREF_SPECIALIZED(item, _PyFloat_ExactDealloc); continue; } From 787f2e52b94f5501bf933a54f4720ef1c4784b00 Mon Sep 17 00:00:00 2001 From: Raymond Hettinger Date: Wed, 21 Dec 2022 17:21:55 -0800 Subject: [PATCH 02/13] Kahan summation --- Lib/test/test_builtin.py | 7 ++++--- Python/bltinmodule.c | 15 ++++++--------- 2 files changed, 10 insertions(+), 12 deletions(-) diff --git a/Lib/test/test_builtin.py b/Lib/test/test_builtin.py index d4708e5a3b50b1..23e0567575e116 100644 --- a/Lib/test/test_builtin.py +++ b/Lib/test/test_builtin.py @@ -1614,9 +1614,10 @@ def test_sum(self): self.assertEqual(sum((i / 2 for i in range(10)), 1000.25), 1022.75) self.assertEqual(sum([0.5, 1]), 1.5) self.assertEqual(sum([1, 0.5]), 1.5) - # self.assertEqual(repr(sum([-0.0])), '0.0') - # self.assertEqual(repr(sum([-0.0], -0.0)), '-0.0') - # self.assertEqual(repr(sum([], -0.0)), '-0.0') + self.assertEqual(sum([0.1] * 10), 1.0) # Test accuracy of Kahan summation + self.assertEqual(repr(sum([-0.0])), '0.0') + self.assertEqual(repr(sum([-0.0], -0.0)), '-0.0') + self.assertEqual(repr(sum([], -0.0)), '-0.0') self.assertRaises(TypeError, sum) self.assertRaises(TypeError, sum, 42) diff --git a/Python/bltinmodule.c b/Python/bltinmodule.c index 1681b0f588b5b4..87feed299d8789 100644 --- a/Python/bltinmodule.c +++ b/Python/bltinmodule.c @@ -2533,7 +2533,6 @@ builtin_sum_impl(PyObject *module, PyObject *iterable, PyObject *start) if (PyFloat_CheckExact(result)) { double f_result = PyFloat_AS_DOUBLE(result); double c = 0.0; - double x, t; Py_SETREF(result, NULL); while(result == NULL) { item = PyIter_Next(iter); @@ -2541,17 +2540,15 @@ builtin_sum_impl(PyObject *module, PyObject *iterable, PyObject *start) Py_DECREF(iter); if (PyErr_Occurred()) return NULL; - return PyFloat_FromDouble(f_result + c); + return PyFloat_FromDouble(f_result); } if (PyFloat_CheckExact(item)) { - // Neumaier compensated summation + // Kahan compensated summation + double x, y, t; x = PyFloat_AS_DOUBLE(item); - t = f_result + x; - if (fabs(f_result) >= fabs(x)) { - c += (f_result - t) + x; - } else { - c += (x - t) + f_result; - } + y = x - c; + t = f_result + y; + c = (t - f_result) - y; f_result = t; _Py_DECREF_SPECIALIZED(item, _PyFloat_ExactDealloc); continue; From bc308aabca045f22e2607e2448b2c8608951d989 Mon Sep 17 00:00:00 2001 From: Raymond Hettinger Date: Wed, 21 Dec 2022 17:29:54 -0800 Subject: [PATCH 03/13] Revert "Kahan summation" This reverts commit 787f2e52b94f5501bf933a54f4720ef1c4784b00. --- Lib/test/test_builtin.py | 7 +++---- Python/bltinmodule.c | 15 +++++++++------ 2 files changed, 12 insertions(+), 10 deletions(-) diff --git a/Lib/test/test_builtin.py b/Lib/test/test_builtin.py index 23e0567575e116..d4708e5a3b50b1 100644 --- a/Lib/test/test_builtin.py +++ b/Lib/test/test_builtin.py @@ -1614,10 +1614,9 @@ def test_sum(self): self.assertEqual(sum((i / 2 for i in range(10)), 1000.25), 1022.75) self.assertEqual(sum([0.5, 1]), 1.5) self.assertEqual(sum([1, 0.5]), 1.5) - self.assertEqual(sum([0.1] * 10), 1.0) # Test accuracy of Kahan summation - self.assertEqual(repr(sum([-0.0])), '0.0') - self.assertEqual(repr(sum([-0.0], -0.0)), '-0.0') - self.assertEqual(repr(sum([], -0.0)), '-0.0') + # self.assertEqual(repr(sum([-0.0])), '0.0') + # self.assertEqual(repr(sum([-0.0], -0.0)), '-0.0') + # self.assertEqual(repr(sum([], -0.0)), '-0.0') self.assertRaises(TypeError, sum) self.assertRaises(TypeError, sum, 42) diff --git a/Python/bltinmodule.c b/Python/bltinmodule.c index 87feed299d8789..1681b0f588b5b4 100644 --- a/Python/bltinmodule.c +++ b/Python/bltinmodule.c @@ -2533,6 +2533,7 @@ builtin_sum_impl(PyObject *module, PyObject *iterable, PyObject *start) if (PyFloat_CheckExact(result)) { double f_result = PyFloat_AS_DOUBLE(result); double c = 0.0; + double x, t; Py_SETREF(result, NULL); while(result == NULL) { item = PyIter_Next(iter); @@ -2540,15 +2541,17 @@ builtin_sum_impl(PyObject *module, PyObject *iterable, PyObject *start) Py_DECREF(iter); if (PyErr_Occurred()) return NULL; - return PyFloat_FromDouble(f_result); + return PyFloat_FromDouble(f_result + c); } if (PyFloat_CheckExact(item)) { - // Kahan compensated summation - double x, y, t; + // Neumaier compensated summation x = PyFloat_AS_DOUBLE(item); - y = x - c; - t = f_result + y; - c = (t - f_result) - y; + t = f_result + x; + if (fabs(f_result) >= fabs(x)) { + c += (f_result - t) + x; + } else { + c += (x - t) + f_result; + } f_result = t; _Py_DECREF_SPECIALIZED(item, _PyFloat_ExactDealloc); continue; From 0c2ba04d23057d4228ba8be9d31b5b884f74d0a1 Mon Sep 17 00:00:00 2001 From: Raymond Hettinger Date: Wed, 21 Dec 2022 17:36:56 -0800 Subject: [PATCH 04/13] Restore the bogus sign tests --- Lib/test/test_builtin.py | 6 +++--- Python/bltinmodule.c | 5 ++++- 2 files changed, 7 insertions(+), 4 deletions(-) diff --git a/Lib/test/test_builtin.py b/Lib/test/test_builtin.py index d4708e5a3b50b1..eb1c389257cc4b 100644 --- a/Lib/test/test_builtin.py +++ b/Lib/test/test_builtin.py @@ -1614,9 +1614,9 @@ def test_sum(self): self.assertEqual(sum((i / 2 for i in range(10)), 1000.25), 1022.75) self.assertEqual(sum([0.5, 1]), 1.5) self.assertEqual(sum([1, 0.5]), 1.5) - # self.assertEqual(repr(sum([-0.0])), '0.0') - # self.assertEqual(repr(sum([-0.0], -0.0)), '-0.0') - # self.assertEqual(repr(sum([], -0.0)), '-0.0') + self.assertEqual(repr(sum([-0.0])), '0.0') + self.assertEqual(repr(sum([-0.0], -0.0)), '-0.0') + self.assertEqual(repr(sum([], -0.0)), '-0.0') self.assertRaises(TypeError, sum) self.assertRaises(TypeError, sum, 42) diff --git a/Python/bltinmodule.c b/Python/bltinmodule.c index 1681b0f588b5b4..f09af877ca584c 100644 --- a/Python/bltinmodule.c +++ b/Python/bltinmodule.c @@ -2541,7 +2541,10 @@ builtin_sum_impl(PyObject *module, PyObject *iterable, PyObject *start) Py_DECREF(iter); if (PyErr_Occurred()) return NULL; - return PyFloat_FromDouble(f_result + c); + if (c) { + f_result += c; + } + return PyFloat_FromDouble(f_result); } if (PyFloat_CheckExact(item)) { // Neumaier compensated summation From 89e46c83b5057ad367aa826c5040d752a3ba41d8 Mon Sep 17 00:00:00 2001 From: Raymond Hettinger Date: Wed, 21 Dec 2022 20:40:28 -0800 Subject: [PATCH 05/13] Add tests and comments --- Lib/test/test_builtin.py | 4 ++++ Python/bltinmodule.c | 3 ++- 2 files changed, 6 insertions(+), 1 deletion(-) diff --git a/Lib/test/test_builtin.py b/Lib/test/test_builtin.py index eb1c389257cc4b..9b6eb3fcc766c3 100644 --- a/Lib/test/test_builtin.py +++ b/Lib/test/test_builtin.py @@ -1618,6 +1618,10 @@ def test_sum(self): self.assertEqual(repr(sum([-0.0], -0.0)), '-0.0') self.assertEqual(repr(sum([], -0.0)), '-0.0') + # Accuracy tests + self.assertEqual(sum([0.1] * 10), 1.0) + self.assertEqual(sum([1.0, 10E100, 1.0, -10E100]), 2.0) + self.assertRaises(TypeError, sum) self.assertRaises(TypeError, sum, 42) self.assertRaises(TypeError, sum, ['a', 'b', 'c']) diff --git a/Python/bltinmodule.c b/Python/bltinmodule.c index f09af877ca584c..ee3eb8ee94a567 100644 --- a/Python/bltinmodule.c +++ b/Python/bltinmodule.c @@ -2547,7 +2547,8 @@ builtin_sum_impl(PyObject *module, PyObject *iterable, PyObject *start) return PyFloat_FromDouble(f_result); } if (PyFloat_CheckExact(item)) { - // Neumaier compensated summation + // Improved Kahan–Babuška algorithm by Arnold Neumaier + // https://www.mat.univie.ac.at/~neum/scan/01.pdf x = PyFloat_AS_DOUBLE(item); t = f_result + x; if (fabs(f_result) >= fabs(x)) { From a9cb7d2ab06374c22de09b70cc4a1317a7b8c24d Mon Sep 17 00:00:00 2001 From: Raymond Hettinger Date: Wed, 21 Dec 2022 21:41:54 -0800 Subject: [PATCH 06/13] Add tests and docs --- Doc/library/functions.rst | 4 ++++ Lib/test/test_builtin.py | 18 ++++++++++++++---- 2 files changed, 18 insertions(+), 4 deletions(-) diff --git a/Doc/library/functions.rst b/Doc/library/functions.rst index 2110990d188973..817c1f858aae89 100644 --- a/Doc/library/functions.rst +++ b/Doc/library/functions.rst @@ -1733,6 +1733,10 @@ are always available. They are listed here in alphabetical order. .. versionchanged:: 3.8 The *start* parameter can be specified as a keyword argument. + .. versionchanged:: 3.12 Summation of floats switched to an algorithm + that gives higher accuracy on most builds. + + .. class:: super() super(type, object_or_type=None) diff --git a/Lib/test/test_builtin.py b/Lib/test/test_builtin.py index 9b6eb3fcc766c3..644910facf1070 100644 --- a/Lib/test/test_builtin.py +++ b/Lib/test/test_builtin.py @@ -31,6 +31,7 @@ from test.support.os_helper import (EnvironmentVarGuard, TESTFN, unlink) from test.support.script_helper import assert_python_ok from test.support.warnings_helper import check_warnings +from test.support import requires_IEEE_754 from unittest.mock import MagicMock, patch try: import pty, signal @@ -38,6 +39,12 @@ pty = signal = None +# Detect evidence of double-rounding: sum() does not always +# get improved accuracy on machines that suffer from double rounding. +x, y = 1e16, 2.9999 # use temporary values to defeat peephole optimizer +HAVE_DOUBLE_ROUNDING = (x + y == 1e16 + 4) + + class Squares: def __init__(self, max): @@ -1618,10 +1625,6 @@ def test_sum(self): self.assertEqual(repr(sum([-0.0], -0.0)), '-0.0') self.assertEqual(repr(sum([], -0.0)), '-0.0') - # Accuracy tests - self.assertEqual(sum([0.1] * 10), 1.0) - self.assertEqual(sum([1.0, 10E100, 1.0, -10E100]), 2.0) - self.assertRaises(TypeError, sum) self.assertRaises(TypeError, sum, 42) self.assertRaises(TypeError, sum, ['a', 'b', 'c']) @@ -1645,6 +1648,13 @@ def __getitem__(self, index): sum(([x] for x in range(10)), empty) self.assertEqual(empty, []) + @requires_IEEE_754 + @unittest.skipIf(HAVE_DOUBLE_ROUNDING, + "sum accuracy not guaranteed on machines with double rounding") + def test_sum_accuracy(self): + self.assertEqual(sum([0.1] * 10), 1.0) + self.assertEqual(sum([1.0, 10E100, 1.0, -10E100]), 2.0) + def test_type(self): self.assertEqual(type(''), type('123')) self.assertNotEqual(type(''), type(())) From 839cc05a2927256dee8fb1f4d4c92a421d150bfb Mon Sep 17 00:00:00 2001 From: Raymond Hettinger Date: Wed, 21 Dec 2022 22:48:45 -0800 Subject: [PATCH 07/13] Add blurb --- .../2022-12-21-22-48-41.gh-issue-100425.U64yLu.rst | 1 + 1 file changed, 1 insertion(+) create mode 100644 Misc/NEWS.d/next/Core and Builtins/2022-12-21-22-48-41.gh-issue-100425.U64yLu.rst diff --git a/Misc/NEWS.d/next/Core and Builtins/2022-12-21-22-48-41.gh-issue-100425.U64yLu.rst b/Misc/NEWS.d/next/Core and Builtins/2022-12-21-22-48-41.gh-issue-100425.U64yLu.rst new file mode 100644 index 00000000000000..5559020b11d389 --- /dev/null +++ b/Misc/NEWS.d/next/Core and Builtins/2022-12-21-22-48-41.gh-issue-100425.U64yLu.rst @@ -0,0 +1 @@ +Improve the accuracy of ``sum()`` with compensated summation. From 66db9377f5c0317149b9c3c1bd31b68d449a92ae Mon Sep 17 00:00:00 2001 From: Raymond Hettinger Date: Wed, 21 Dec 2022 23:13:28 -0800 Subject: [PATCH 08/13] Remove out of date example for fsum() --- Doc/library/math.rst | 7 +------ 1 file changed, 1 insertion(+), 6 deletions(-) diff --git a/Doc/library/math.rst b/Doc/library/math.rst index 559c6ec5dd9d8a..aeebcaf6ab0864 100644 --- a/Doc/library/math.rst +++ b/Doc/library/math.rst @@ -108,12 +108,7 @@ Number-theoretic and representation functions .. function:: fsum(iterable) Return an accurate floating point sum of values in the iterable. Avoids - loss of precision by tracking multiple intermediate partial sums: - - >>> sum([.1, .1, .1, .1, .1, .1, .1, .1, .1, .1]) - 0.9999999999999999 - >>> fsum([.1, .1, .1, .1, .1, .1, .1, .1, .1, .1]) - 1.0 + loss of precision by tracking multiple intermediate partial sums. The algorithm's accuracy depends on IEEE-754 arithmetic guarantees and the typical case where the rounding mode is half-even. On some non-Windows From af7613ebcf1d118291320f07e9fab8100266c13c Mon Sep 17 00:00:00 2001 From: Raymond Hettinger Date: Thu, 22 Dec 2022 00:32:51 -0800 Subject: [PATCH 09/13] Update floating point tutorial --- Doc/tutorial/floatingpoint.rst | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Doc/tutorial/floatingpoint.rst b/Doc/tutorial/floatingpoint.rst index e1cd7f9ece75d0..cedade6e336608 100644 --- a/Doc/tutorial/floatingpoint.rst +++ b/Doc/tutorial/floatingpoint.rst @@ -192,7 +192,7 @@ added onto a running total. That can make a difference in overall accuracy so that the errors do not accumulate to the point where they affect the final total: - >>> sum([0.1] * 10) == 1.0 + >>> 0.1 + 0.1 + 0.1 + 0.1 + 0.1 + 0.1 + 0.1 + 0.1 + 0.1 + 0.1 == 1.0 False >>> math.fsum([0.1] * 10) == 1.0 True From 4f4b73107da1a1509e5484512461f269ddc6711a Mon Sep 17 00:00:00 2001 From: Raymond Hettinger Date: Thu, 22 Dec 2022 16:31:12 -0800 Subject: [PATCH 10/13] Correct handling of overflowed or infinite sums --- Lib/test/test_builtin.py | 3 +++ Python/bltinmodule.c | 5 ++++- 2 files changed, 7 insertions(+), 1 deletion(-) diff --git a/Lib/test/test_builtin.py b/Lib/test/test_builtin.py index 644910facf1070..5a42bb44e939f1 100644 --- a/Lib/test/test_builtin.py +++ b/Lib/test/test_builtin.py @@ -9,6 +9,7 @@ import gc import io import locale +import math import os import pickle import platform @@ -1624,6 +1625,8 @@ def test_sum(self): self.assertEqual(repr(sum([-0.0])), '0.0') self.assertEqual(repr(sum([-0.0], -0.0)), '-0.0') self.assertEqual(repr(sum([], -0.0)), '-0.0') + self.assertTrue(math.isinf(sum([float("inf"), float("inf")]))) + self.assertTrue(math.isinf(sum([1e308, 1e308]))) self.assertRaises(TypeError, sum) self.assertRaises(TypeError, sum, 42) diff --git a/Python/bltinmodule.c b/Python/bltinmodule.c index ee3eb8ee94a567..02cb255b9a704e 100644 --- a/Python/bltinmodule.c +++ b/Python/bltinmodule.c @@ -2541,7 +2541,10 @@ builtin_sum_impl(PyObject *module, PyObject *iterable, PyObject *start) Py_DECREF(iter); if (PyErr_Occurred()) return NULL; - if (c) { + /* Avoid losing the sign on a negative result, + and don't let adding the compensation convert + an infinite or overflowed sum to a NaN. */ + if (c && Py_IS_FINITE(c)) { f_result += c; } return PyFloat_FromDouble(f_result); From 4da0b8ff2e83ecea46bab85d56850851ea3c7eec Mon Sep 17 00:00:00 2001 From: Raymond Hettinger Date: Fri, 23 Dec 2022 00:36:11 -0800 Subject: [PATCH 11/13] Move local declarations inside the block where they are used. --- Python/bltinmodule.c | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/Python/bltinmodule.c b/Python/bltinmodule.c index 02cb255b9a704e..43e4643c23ebe6 100644 --- a/Python/bltinmodule.c +++ b/Python/bltinmodule.c @@ -2533,7 +2533,6 @@ builtin_sum_impl(PyObject *module, PyObject *iterable, PyObject *start) if (PyFloat_CheckExact(result)) { double f_result = PyFloat_AS_DOUBLE(result); double c = 0.0; - double x, t; Py_SETREF(result, NULL); while(result == NULL) { item = PyIter_Next(iter); @@ -2552,8 +2551,8 @@ builtin_sum_impl(PyObject *module, PyObject *iterable, PyObject *start) if (PyFloat_CheckExact(item)) { // Improved Kahan–Babuška algorithm by Arnold Neumaier // https://www.mat.univie.ac.at/~neum/scan/01.pdf - x = PyFloat_AS_DOUBLE(item); - t = f_result + x; + double x = PyFloat_AS_DOUBLE(item); + double t = f_result + x; if (fabs(f_result) >= fabs(x)) { c += (f_result - t) + x; } else { From 6c7894c82d2b2e562c2f72ef32745533917fbeee Mon Sep 17 00:00:00 2001 From: Raymond Hettinger Date: Fri, 23 Dec 2022 10:18:35 -0800 Subject: [PATCH 12/13] For consistent results, apply compensation on the slow path as well. --- Python/bltinmodule.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/Python/bltinmodule.c b/Python/bltinmodule.c index 43e4643c23ebe6..2d4822e6d468aa 100644 --- a/Python/bltinmodule.c +++ b/Python/bltinmodule.c @@ -2572,6 +2572,9 @@ builtin_sum_impl(PyObject *module, PyObject *iterable, PyObject *start) continue; } } + if (c && Py_IS_FINITE(c)) { + f_result += c; + } result = PyFloat_FromDouble(f_result); if (result == NULL) { Py_DECREF(item); From 3cf9536ad7210bf5090b6e8e517130dd4c3d9a1a Mon Sep 17 00:00:00 2001 From: Raymond Hettinger Date: Fri, 23 Dec 2022 10:21:55 -0800 Subject: [PATCH 13/13] Mark as an implementation detail. --- Lib/test/test_builtin.py | 1 + 1 file changed, 1 insertion(+) diff --git a/Lib/test/test_builtin.py b/Lib/test/test_builtin.py index 5a42bb44e939f1..c65600483258a7 100644 --- a/Lib/test/test_builtin.py +++ b/Lib/test/test_builtin.py @@ -1654,6 +1654,7 @@ def __getitem__(self, index): @requires_IEEE_754 @unittest.skipIf(HAVE_DOUBLE_ROUNDING, "sum accuracy not guaranteed on machines with double rounding") + @support.cpython_only # Other implementations may choose a different algorithm def test_sum_accuracy(self): self.assertEqual(sum([0.1] * 10), 1.0) self.assertEqual(sum([1.0, 10E100, 1.0, -10E100]), 2.0)