python · skirpichev · Jan 7, 2024 · Feb 29, 2024 · Apr 2, 2024 · May 29, 2024
diff --git a/Doc/library/functions.rst b/Doc/library/functions.rst
@@ -770,7 +770,8 @@ are always available.  They are listed here in alphabetical order.
       >>> float('-Infinity')
       -inf
 
-   If the argument is a string, it should contain a decimal number, optionally
+   If the argument is a string, it should contain a decimal number
+   or a hexadecimal number, optionally
    preceded by a sign, and optionally embedded in whitespace.  The optional
    sign may be ``'+'`` or ``'-'``; a ``'+'`` sign has no effect on the value
    produced.  The argument may also be a string representing a NaN
@@ -786,13 +787,17 @@ are always available.  They are listed here in alphabetical order.
       digit: <a Unicode decimal digit, i.e. characters in Unicode general category Nd>
       digitpart: `digit` (["_"] `digit`)*
       number: [`digitpart`] "." `digitpart` | `digitpart` ["."]
-      exponent: ("e" | "E") [`sign`] `digitpart`
-      floatnumber: `number` [`exponent`]
+      exponent: ("e" | "E") ["+" | "-"] `digitpart`
+      hexfloatnumber: `~python-grammar:hexinteger` | `~python-grammar:hexfraction` | `~python-grammar:hexfloat`
+      floatnumber: (`number` [`exponent`]) | `hexfloatnumber`
       absfloatvalue: `floatnumber` | `infinity` | `nan`
       floatvalue: [`sign`] `absfloatvalue`
 
    Case is not significant, so, for example, "inf", "Inf", "INFINITY", and
-   "iNfINity" are all acceptable spellings for positive infinity.
+   "iNfINity" are all acceptable spellings for positive infinity.  Note also
+   that the exponent of a hexadecimal floating point number is written in
+   decimal, and that it gives the power of 2 by which to multiply the
+   coefficient.
 
    Otherwise, if the argument is an integer or a floating-point number, a
    floating-point number with the same value (within Python's floating-point
@@ -818,6 +823,9 @@ are always available.  They are listed here in alphabetical order.
    .. versionchanged:: 3.8
       Falls back to :meth:`~object.__index__` if :meth:`~object.__float__` is not defined.
 
+   .. versionchanged:: next
+      Added support for hexadecimal floating-point numbers.
+
 
 .. index::
    single: __format__

@@ -948,25 +948,36 @@ Floating-point literals
 Floating-point literals are described by the following lexical definitions:
 
 .. productionlist:: python-grammar
-   floatnumber: `pointfloat` | `exponentfloat`
+   floatnumber: `pointfloat` | `exponentfloat` | `hexfloat`
    pointfloat: [`digitpart`] `fraction` | `digitpart` "."
    exponentfloat: (`digitpart` | `pointfloat`) `exponent`
+   hexfloat: ("0x | "0X") ["_"] (`hexdigitpart` | `hexpointfloat`) [`binexponent`]
    digitpart: `digit` (["_"] `digit`)*
    fraction: "." `digitpart`
    exponent: ("e" | "E") ["+" | "-"] `digitpart`
+   hexpointfloat: [`hexdigit`] `hexfraction` | `hexdigitpart` "."
+   hexfraction: "." `hexdigitpart`
+   hexdigitpart: `hexdigit` (["_"] `hexdigit`)*
+   binexponent: ("p" | "P") ["+" | "-"] `digitpart`
 
-Note that the integer and exponent parts are always interpreted using radix 10.
+Note that the exponent parts are always interpreted using radix 10.
 For example, ``077e010`` is legal, and denotes the same number as ``77e10``. The
 allowed range of floating-point literals is implementation-dependent.  As in
 integer literals, underscores are supported for digit grouping.
 
+The exponent of a hexadecimal floating point literal is written in decimal, and
+it gives the power of 2 by which to multiply the coefficient.
+
 Some examples of floating-point literals::
 
    3.14    10.    .001    1e100    3.14e-10    0e0    3.14_15_93
 
 .. versionchanged:: 3.6
    Underscores are now allowed for grouping purposes in literals.
 
+.. versionchanged:: next
+   Added support for hexadecimal floating-point literals.
+
 
 .. index::
    single: j; in numeric literal

diff --git a/Doc/tutorial/floatingpoint.rst b/Doc/tutorial/floatingpoint.rst
@@ -210,7 +210,7 @@ the float value exactly:
 
 .. doctest::
 
-    >>> x == float.fromhex('0x1.921f9f01b866ep+1')
+    >>> x == 0x1.921f9f01b866ep+1
     True
 
 Since the representation is exact, it is useful for reliably porting values

diff --git a/Include/cpython/pyctype.h b/Include/cpython/pyctype.h
@@ -21,11 +21,17 @@ PyAPI_DATA(const unsigned int) _Py_ctype_table[256];
 #define Py_ISLOWER(c)  (_Py_ctype_table[Py_CHARMASK(c)] & PY_CTF_LOWER)
 #define Py_ISUPPER(c)  (_Py_ctype_table[Py_CHARMASK(c)] & PY_CTF_UPPER)
 #define Py_ISALPHA(c)  (_Py_ctype_table[Py_CHARMASK(c)] & PY_CTF_ALPHA)
-#define Py_ISDIGIT(c)  (_Py_ctype_table[Py_CHARMASK(c)] & PY_CTF_DIGIT)
-#define Py_ISXDIGIT(c) (_Py_ctype_table[Py_CHARMASK(c)] & PY_CTF_XDIGIT)
 #define Py_ISALNUM(c)  (_Py_ctype_table[Py_CHARMASK(c)] & PY_CTF_ALNUM)
 #define Py_ISSPACE(c)  (_Py_ctype_table[Py_CHARMASK(c)] & PY_CTF_SPACE)
 
+static inline int Py_ISDIGIT(char c) {
+    return _Py_ctype_table[Py_CHARMASK(c)] & PY_CTF_DIGIT;
+}
+
+static inline int Py_ISXDIGIT(char c) {
+    return _Py_ctype_table[Py_CHARMASK(c)] & PY_CTF_XDIGIT;
+}
+
 PyAPI_DATA(const unsigned char) _Py_ctype_tolower[256];
 PyAPI_DATA(const unsigned char) _Py_ctype_toupper[256];
 

diff --git a/Include/internal/pycore_floatobject.h b/Include/internal/pycore_floatobject.h
@@ -40,6 +40,7 @@ extern PyObject* _Py_string_to_number_with_underscores(
 
 extern double _Py_parse_inf_or_nan(const char *p, char **endptr);
 
+extern double _Py_dg_strtod_hex(const char *str, char **ptr);
 extern int _Py_convert_int_to_double(PyObject **v, double *dbl);
 
 

diff --git a/Lib/test/support/numbers.py b/Lib/test/support/numbers.py
@@ -24,6 +24,16 @@
     '.1_4j',
     '(1_2.5+3_3j)',
     '(.5_6j)',
+    '0x_.1p1',
+    '0X_.1p1',
+    '0x1_1.p1',
+    '0x_1_1.p1',
+    '0x1.1_1p1',
+    '0x1.p1_1',
+    '0xa.p1',
+    '0x.ap1',
+    '0xa_c.p1',
+    '0x.a_cp1',
 ]
 INVALID_UNDERSCORE_LITERALS = [
     # Trailing underscores:
@@ -35,6 +45,8 @@
     '0xf_',
     '0o5_',
     '0 if 1_Else 1',
+    '0x1p1_',
+    '0x1.1p1_',
     # Underscores in the base selector:
     '0_b0',
     '0_xf',
@@ -52,28 +64,39 @@
     '0o5__77',
     '1e1__0',
     '1e1__0j',
+    '0x1__1.1p1',
     # Underscore right before a dot:
     '1_.4',
     '1_.4j',
+    '0x1_.p1',
+    '0xa_.p1',
     # Underscore right after a dot:
     '1._4',
     '1._4j',
     '._5',
     '._5j',
+    '0x1._p1',
+    '0xa._p1',
     # Underscore right after a sign:
     '1.0e+_1',
     '1.0e+_1j',
     # Underscore right before j:
     '1.4_j',
     '1.4e5_j',
-    # Underscore right before e:
+    # Underscore right before e or p:
     '1_e1',
     '1.4_e1',
     '1.4_e1j',
-    # Underscore right after e:
+    '0x1_p1',
+    '0x1_P1',
+    '0x1.1_p1',
+    '0x1.1_P1',
+    # Underscore right after e or p:
     '1e_1',
     '1.4e_1',
     '1.4e_1j',
+    '0x1p_1',
+    '0x1.1p_1',
     # Complex cases with parens:
     '(1+1.5_j_)',
     '(1+1.5_j)',

diff --git a/Lib/test/test_float.py b/Lib/test/test_float.py
@@ -63,9 +63,9 @@ def test_float(self):
         self.assertEqual(float(3.14), 3.14)
         self.assertEqual(float(314), 314.0)
         self.assertEqual(float("  3.14  "), 3.14)
-        self.assertRaises(ValueError, float, "  0x3.1  ")
-        self.assertRaises(ValueError, float, "  -0x3.p-1  ")
-        self.assertRaises(ValueError, float, "  +0x3.p-1  ")
+        self.assertEqual(float("  0x3.1  "), 3.0625)
+        self.assertEqual(float("  -0x3.p-1  "), -1.5)
+        self.assertEqual(float("  +0x3.p-1  "), 1.5)
         self.assertRaises(ValueError, float, "++3.14")
         self.assertRaises(ValueError, float, "+-3.14")
         self.assertRaises(ValueError, float, "-+3.14")
@@ -95,13 +95,13 @@ def test_noargs(self):
 
     def test_underscores(self):
         for lit in VALID_UNDERSCORE_LITERALS:
-            if not any(ch in lit for ch in 'jJxXoObB'):
+            if not any(ch in lit for ch in 'jJoObB'):
                 self.assertEqual(float(lit), eval(lit))
                 self.assertEqual(float(lit), float(lit.replace('_', '')))
         for lit in INVALID_UNDERSCORE_LITERALS:
             if lit in ('0_7', '09_99'):  # octals are not recognized here
                 continue
-            if not any(ch in lit for ch in 'jJxXoObB'):
+            if not any(ch in lit for ch in 'jJoObB'):
                 self.assertRaises(ValueError, float, lit)
         # Additional test cases; nan and inf are never valid as literals,
         # only in the float() constructor, but we don't allow underscores
@@ -198,9 +198,9 @@ def test_float_with_comma(self):
         self.assertRaises(ValueError, float, "  3,14  ")
         self.assertRaises(ValueError, float, "  +3,14  ")
         self.assertRaises(ValueError, float, "  -3,14  ")
-        self.assertRaises(ValueError, float, "  0x3.1  ")
-        self.assertRaises(ValueError, float, "  -0x3.p-1  ")
-        self.assertRaises(ValueError, float, "  +0x3.p-1  ")
+        self.assertEqual(float("  0x3.1  "), 3.0625)
+        self.assertEqual(float("  -0x3.p-1  "), -1.5)
+        self.assertEqual(float("  +0x3.p-1  "), 1.5)
         self.assertEqual(float("  25.e-1  "), 2.5)
         self.assertAlmostEqual(float("  .25e-1  "), .025)
 
@@ -1557,7 +1557,7 @@ def roundtrip(x):
             except OverflowError:
                 pass
             else:
-                self.identical(x, fromHex(toHex(x)))
+                self.identical(x, roundtrip(x))
 
     def test_subclass(self):
         class F(float):

diff --git a/Lib/test/test_grammar.py b/Lib/test/test_grammar.py
@@ -74,6 +74,17 @@ def test_plain_integers(self):
         else:
             self.fail('Weird maxsize value %r' % maxsize)
 
+    def test_attrs_on_hexintegers(self):
+        good_meth = [m for m in dir(int) if not m.startswith('_')]
+        for m in good_meth:
+            with self.assertWarns(SyntaxWarning):
+                v = eval('0x1.' + m)
+            self.assertEqual(v, eval('(0x1).' + m))
+        self.check_syntax_error('0x1.spam', "invalid hexadecimal literal",
+                                lineno=1, offset=4)
+        self.check_syntax_error('0x1.foo', "invalid hexadecimal literal",
+                                lineno=1, offset=5)
+
     def test_long_integers(self):
         x = 0
         x = 0xffffffffffffffff
@@ -97,6 +108,23 @@ def test_floats(self):
         x = 3.e14
         x = .3e14
         x = 3.1e4
+        x = 0x1.2p1
+        x = 0x1.2p+1
+        x = 0x1.p1
+        x = 0x1.p-1
+        x = 0x1p0
+        x = 0x1ap1
+        x = 0x1P1
+        x = 0x1cp2
+        x = 0x1.p1
+        x = 0x1.P1
+        x = 0x001.1p2
+        x = 0X1p1
+        x = 0x1.1_1p1
+        x = 0x1.1p1_1
+        x = 0x1.
+        x = 0x1.1
+        x = 0x.1
 
     def test_float_exponent_tokenization(self):
         # See issue 21642.
@@ -134,7 +162,14 @@ def test_bad_numerical_literals(self):
               "use an 0o prefix for octal integers")
         check("1.2_", "invalid decimal literal")
         check("1e2_", "invalid decimal literal")
-        check("1e+", "invalid decimal literal")
+        check("1e+", "invalid float literal")
+        check("0x.p", "invalid float literal")
+        check("0x_.p", "invalid float literal")
+        check("0x1.1p", "invalid float literal")
+        check("0x1.1_p", "invalid float literal")
+        check("0x1.1p_", "invalid float literal")
+        check("0xp", "invalid hexadecimal literal")
+        check("0xP", "invalid hexadecimal literal")
 
     def test_end_of_numerical_literals(self):
         def check(test, error=False):

@@ -271,6 +271,16 @@ def test_float(self):
     NAME       'x'           (1, 0) (1, 1)
     OP         '='           (1, 2) (1, 3)
     NUMBER     '3.14e159'    (1, 4) (1, 12)
+    """)
+        self.check_tokenize("x = 0x1p1", """\
+    NAME       'x'           (1, 0) (1, 1)
+    OP         '='           (1, 2) (1, 3)
+    NUMBER     '0x1p1'       (1, 4) (1, 9)
+    """)
+        self.check_tokenize("x = 0x.1p1", """\
+    NAME       'x'           (1, 0) (1, 1)
+    OP         '='           (1, 2) (1, 3)
+    NUMBER     '0x.1p1'      (1, 4) (1, 10)
     """)
 
     def test_underscore_literals(self):

@@ -77,7 +77,10 @@ def maybe(*choices): return group(*choices) + '?'
 Pointfloat = group(r'[0-9](?:_?[0-9])*\.(?:[0-9](?:_?[0-9])*)?',
                    r'\.[0-9](?:_?[0-9])*') + maybe(Exponent)
 Expfloat = r'[0-9](?:_?[0-9])*' + Exponent
-Floatnumber = group(Pointfloat, Expfloat)
+HexExponent = r'[pP][-+]?[0-9](?:_?[0-9])*'
+Hexfloat = group(r'0[xX]_?[0-9a-f](?:_?[0-9a-f])*\.(?:[0-9a-f](?:_?[0-9a-f])*)?',
+                 r'0[xX]_?\.[0-9a-f](?:_?[0-9a-f])*') + HexExponent
+Floatnumber = group(Pointfloat, Expfloat, Hexfloat)
 Imagnumber = group(r'[0-9](?:_?[0-9])*[jJ]', Floatnumber + r'[jJ]')
 Number = group(Imagnumber, Floatnumber, Intnumber)
 

diff --git a/Misc/NEWS.d/next/Core_and_Builtins/2024-01-28-08-17-08.gh-issue-114667.8w_l9I.rst b/Misc/NEWS.d/next/Core_and_Builtins/2024-01-28-08-17-08.gh-issue-114667.8w_l9I.rst
@@ -0,0 +1,3 @@
+Add hexadecimal floating point literals (IEEE 754-2008 §5.12.3) and support
+construction of floats from hexadecimal strings.  Patch by Sergey B
+Kirpichev.
Original file line number	Diff line number	Diff line change
Expand Up		@@ -40,6 +40,7 @@ extern PyObject* _Py_string_to_number_with_underscores(

		extern double _Py_parse_inf_or_nan(const char p, char *endptr);

		extern double _Py_dg_strtod_hex(const char str, char *ptr);
		extern int _Py_convert_int_to_double(PyObject *v, double dbl);


Expand Down