diff --git a/Doc/library/functions.rst b/Doc/library/functions.rst index 7e367a0f2b6b25..9e80806b200340 100644 --- a/Doc/library/functions.rst +++ b/Doc/library/functions.rst @@ -770,7 +770,8 @@ are always available. They are listed here in alphabetical order. >>> float('-Infinity') -inf - If the argument is a string, it should contain a decimal number, optionally + If the argument is a string, it should contain a decimal number + or a hexadecimal number, optionally preceded by a sign, and optionally embedded in whitespace. The optional sign may be ``'+'`` or ``'-'``; a ``'+'`` sign has no effect on the value produced. The argument may also be a string representing a NaN @@ -786,13 +787,17 @@ are always available. They are listed here in alphabetical order. digit: digitpart: `digit` (["_"] `digit`)* number: [`digitpart`] "." `digitpart` | `digitpart` ["."] - exponent: ("e" | "E") [`sign`] `digitpart` - floatnumber: `number` [`exponent`] + exponent: ("e" | "E") ["+" | "-"] `digitpart` + hexfloatnumber: `~python-grammar:hexinteger` | `~python-grammar:hexfraction` | `~python-grammar:hexfloat` + floatnumber: (`number` [`exponent`]) | `hexfloatnumber` absfloatvalue: `floatnumber` | `infinity` | `nan` floatvalue: [`sign`] `absfloatvalue` Case is not significant, so, for example, "inf", "Inf", "INFINITY", and - "iNfINity" are all acceptable spellings for positive infinity. + "iNfINity" are all acceptable spellings for positive infinity. Note also + that the exponent of a hexadecimal floating point number is written in + decimal, and that it gives the power of 2 by which to multiply the + coefficient. Otherwise, if the argument is an integer or a floating-point number, a floating-point number with the same value (within Python's floating-point @@ -818,6 +823,9 @@ are always available. They are listed here in alphabetical order. .. versionchanged:: 3.8 Falls back to :meth:`~object.__index__` if :meth:`~object.__float__` is not defined. + .. versionchanged:: next + Added support for hexadecimal floating-point numbers. + .. index:: single: __format__ diff --git a/Doc/reference/lexical_analysis.rst b/Doc/reference/lexical_analysis.rst index ff801a7d4fc494..653ad12101af13 100644 --- a/Doc/reference/lexical_analysis.rst +++ b/Doc/reference/lexical_analysis.rst @@ -948,18 +948,26 @@ Floating-point literals Floating-point literals are described by the following lexical definitions: .. productionlist:: python-grammar - floatnumber: `pointfloat` | `exponentfloat` + floatnumber: `pointfloat` | `exponentfloat` | `hexfloat` pointfloat: [`digitpart`] `fraction` | `digitpart` "." exponentfloat: (`digitpart` | `pointfloat`) `exponent` + hexfloat: ("0x | "0X") ["_"] (`hexdigitpart` | `hexpointfloat`) [`binexponent`] digitpart: `digit` (["_"] `digit`)* fraction: "." `digitpart` exponent: ("e" | "E") ["+" | "-"] `digitpart` + hexpointfloat: [`hexdigit`] `hexfraction` | `hexdigitpart` "." + hexfraction: "." `hexdigitpart` + hexdigitpart: `hexdigit` (["_"] `hexdigit`)* + binexponent: ("p" | "P") ["+" | "-"] `digitpart` -Note that the integer and exponent parts are always interpreted using radix 10. +Note that the exponent parts are always interpreted using radix 10. For example, ``077e010`` is legal, and denotes the same number as ``77e10``. The allowed range of floating-point literals is implementation-dependent. As in integer literals, underscores are supported for digit grouping. +The exponent of a hexadecimal floating point literal is written in decimal, and +it gives the power of 2 by which to multiply the coefficient. + Some examples of floating-point literals:: 3.14 10. .001 1e100 3.14e-10 0e0 3.14_15_93 @@ -967,6 +975,9 @@ Some examples of floating-point literals:: .. versionchanged:: 3.6 Underscores are now allowed for grouping purposes in literals. +.. versionchanged:: next + Added support for hexadecimal floating-point literals. + .. index:: single: j; in numeric literal diff --git a/Doc/tutorial/floatingpoint.rst b/Doc/tutorial/floatingpoint.rst index dfe2d1d3a8378f..44baeee12d1165 100644 --- a/Doc/tutorial/floatingpoint.rst +++ b/Doc/tutorial/floatingpoint.rst @@ -210,7 +210,7 @@ the float value exactly: .. doctest:: - >>> x == float.fromhex('0x1.921f9f01b866ep+1') + >>> x == 0x1.921f9f01b866ep+1 True Since the representation is exact, it is useful for reliably porting values diff --git a/Include/cpython/pyctype.h b/Include/cpython/pyctype.h index 729d93275e6c53..71c870080fe5ad 100644 --- a/Include/cpython/pyctype.h +++ b/Include/cpython/pyctype.h @@ -21,11 +21,17 @@ PyAPI_DATA(const unsigned int) _Py_ctype_table[256]; #define Py_ISLOWER(c) (_Py_ctype_table[Py_CHARMASK(c)] & PY_CTF_LOWER) #define Py_ISUPPER(c) (_Py_ctype_table[Py_CHARMASK(c)] & PY_CTF_UPPER) #define Py_ISALPHA(c) (_Py_ctype_table[Py_CHARMASK(c)] & PY_CTF_ALPHA) -#define Py_ISDIGIT(c) (_Py_ctype_table[Py_CHARMASK(c)] & PY_CTF_DIGIT) -#define Py_ISXDIGIT(c) (_Py_ctype_table[Py_CHARMASK(c)] & PY_CTF_XDIGIT) #define Py_ISALNUM(c) (_Py_ctype_table[Py_CHARMASK(c)] & PY_CTF_ALNUM) #define Py_ISSPACE(c) (_Py_ctype_table[Py_CHARMASK(c)] & PY_CTF_SPACE) +static inline int Py_ISDIGIT(char c) { + return _Py_ctype_table[Py_CHARMASK(c)] & PY_CTF_DIGIT; +} + +static inline int Py_ISXDIGIT(char c) { + return _Py_ctype_table[Py_CHARMASK(c)] & PY_CTF_XDIGIT; +} + PyAPI_DATA(const unsigned char) _Py_ctype_tolower[256]; PyAPI_DATA(const unsigned char) _Py_ctype_toupper[256]; diff --git a/Include/internal/pycore_floatobject.h b/Include/internal/pycore_floatobject.h index 317f984188bad8..2168353ff1e87f 100644 --- a/Include/internal/pycore_floatobject.h +++ b/Include/internal/pycore_floatobject.h @@ -40,6 +40,7 @@ extern PyObject* _Py_string_to_number_with_underscores( extern double _Py_parse_inf_or_nan(const char *p, char **endptr); +extern double _Py_dg_strtod_hex(const char *str, char **ptr); extern int _Py_convert_int_to_double(PyObject **v, double *dbl); diff --git a/Lib/test/support/numbers.py b/Lib/test/support/numbers.py index d5dbb41acebc38..dc266cb35ea190 100644 --- a/Lib/test/support/numbers.py +++ b/Lib/test/support/numbers.py @@ -24,6 +24,16 @@ '.1_4j', '(1_2.5+3_3j)', '(.5_6j)', + '0x_.1p1', + '0X_.1p1', + '0x1_1.p1', + '0x_1_1.p1', + '0x1.1_1p1', + '0x1.p1_1', + '0xa.p1', + '0x.ap1', + '0xa_c.p1', + '0x.a_cp1', ] INVALID_UNDERSCORE_LITERALS = [ # Trailing underscores: @@ -35,6 +45,8 @@ '0xf_', '0o5_', '0 if 1_Else 1', + '0x1p1_', + '0x1.1p1_', # Underscores in the base selector: '0_b0', '0_xf', @@ -52,28 +64,39 @@ '0o5__77', '1e1__0', '1e1__0j', + '0x1__1.1p1', # Underscore right before a dot: '1_.4', '1_.4j', + '0x1_.p1', + '0xa_.p1', # Underscore right after a dot: '1._4', '1._4j', '._5', '._5j', + '0x1._p1', + '0xa._p1', # Underscore right after a sign: '1.0e+_1', '1.0e+_1j', # Underscore right before j: '1.4_j', '1.4e5_j', - # Underscore right before e: + # Underscore right before e or p: '1_e1', '1.4_e1', '1.4_e1j', - # Underscore right after e: + '0x1_p1', + '0x1_P1', + '0x1.1_p1', + '0x1.1_P1', + # Underscore right after e or p: '1e_1', '1.4e_1', '1.4e_1j', + '0x1p_1', + '0x1.1p_1', # Complex cases with parens: '(1+1.5_j_)', '(1+1.5_j)', diff --git a/Lib/test/test_float.py b/Lib/test/test_float.py index 237d7b5d35edd7..eab11ea8c44dbb 100644 --- a/Lib/test/test_float.py +++ b/Lib/test/test_float.py @@ -63,9 +63,9 @@ def test_float(self): self.assertEqual(float(3.14), 3.14) self.assertEqual(float(314), 314.0) self.assertEqual(float(" 3.14 "), 3.14) - self.assertRaises(ValueError, float, " 0x3.1 ") - self.assertRaises(ValueError, float, " -0x3.p-1 ") - self.assertRaises(ValueError, float, " +0x3.p-1 ") + self.assertEqual(float(" 0x3.1 "), 3.0625) + self.assertEqual(float(" -0x3.p-1 "), -1.5) + self.assertEqual(float(" +0x3.p-1 "), 1.5) self.assertRaises(ValueError, float, "++3.14") self.assertRaises(ValueError, float, "+-3.14") self.assertRaises(ValueError, float, "-+3.14") @@ -95,13 +95,13 @@ def test_noargs(self): def test_underscores(self): for lit in VALID_UNDERSCORE_LITERALS: - if not any(ch in lit for ch in 'jJxXoObB'): + if not any(ch in lit for ch in 'jJoObB'): self.assertEqual(float(lit), eval(lit)) self.assertEqual(float(lit), float(lit.replace('_', ''))) for lit in INVALID_UNDERSCORE_LITERALS: if lit in ('0_7', '09_99'): # octals are not recognized here continue - if not any(ch in lit for ch in 'jJxXoObB'): + if not any(ch in lit for ch in 'jJoObB'): self.assertRaises(ValueError, float, lit) # Additional test cases; nan and inf are never valid as literals, # only in the float() constructor, but we don't allow underscores @@ -198,9 +198,9 @@ def test_float_with_comma(self): self.assertRaises(ValueError, float, " 3,14 ") self.assertRaises(ValueError, float, " +3,14 ") self.assertRaises(ValueError, float, " -3,14 ") - self.assertRaises(ValueError, float, " 0x3.1 ") - self.assertRaises(ValueError, float, " -0x3.p-1 ") - self.assertRaises(ValueError, float, " +0x3.p-1 ") + self.assertEqual(float(" 0x3.1 "), 3.0625) + self.assertEqual(float(" -0x3.p-1 "), -1.5) + self.assertEqual(float(" +0x3.p-1 "), 1.5) self.assertEqual(float(" 25.e-1 "), 2.5) self.assertAlmostEqual(float(" .25e-1 "), .025) @@ -1557,7 +1557,7 @@ def roundtrip(x): except OverflowError: pass else: - self.identical(x, fromHex(toHex(x))) + self.identical(x, roundtrip(x)) def test_subclass(self): class F(float): diff --git a/Lib/test/test_grammar.py b/Lib/test/test_grammar.py index 3ea4e47ca50a16..403c1e46023359 100644 --- a/Lib/test/test_grammar.py +++ b/Lib/test/test_grammar.py @@ -74,6 +74,17 @@ def test_plain_integers(self): else: self.fail('Weird maxsize value %r' % maxsize) + def test_attrs_on_hexintegers(self): + good_meth = [m for m in dir(int) if not m.startswith('_')] + for m in good_meth: + with self.assertWarns(SyntaxWarning): + v = eval('0x1.' + m) + self.assertEqual(v, eval('(0x1).' + m)) + self.check_syntax_error('0x1.spam', "invalid hexadecimal literal", + lineno=1, offset=4) + self.check_syntax_error('0x1.foo', "invalid hexadecimal literal", + lineno=1, offset=5) + def test_long_integers(self): x = 0 x = 0xffffffffffffffff @@ -97,6 +108,23 @@ def test_floats(self): x = 3.e14 x = .3e14 x = 3.1e4 + x = 0x1.2p1 + x = 0x1.2p+1 + x = 0x1.p1 + x = 0x1.p-1 + x = 0x1p0 + x = 0x1ap1 + x = 0x1P1 + x = 0x1cp2 + x = 0x1.p1 + x = 0x1.P1 + x = 0x001.1p2 + x = 0X1p1 + x = 0x1.1_1p1 + x = 0x1.1p1_1 + x = 0x1. + x = 0x1.1 + x = 0x.1 def test_float_exponent_tokenization(self): # See issue 21642. @@ -134,7 +162,14 @@ def test_bad_numerical_literals(self): "use an 0o prefix for octal integers") check("1.2_", "invalid decimal literal") check("1e2_", "invalid decimal literal") - check("1e+", "invalid decimal literal") + check("1e+", "invalid float literal") + check("0x.p", "invalid float literal") + check("0x_.p", "invalid float literal") + check("0x1.1p", "invalid float literal") + check("0x1.1_p", "invalid float literal") + check("0x1.1p_", "invalid float literal") + check("0xp", "invalid hexadecimal literal") + check("0xP", "invalid hexadecimal literal") def test_end_of_numerical_literals(self): def check(test, error=False): diff --git a/Lib/test/test_tokenize.py b/Lib/test/test_tokenize.py index 4e05a38394cdc1..f9174dc5190f1d 100644 --- a/Lib/test/test_tokenize.py +++ b/Lib/test/test_tokenize.py @@ -271,6 +271,16 @@ def test_float(self): NAME 'x' (1, 0) (1, 1) OP '=' (1, 2) (1, 3) NUMBER '3.14e159' (1, 4) (1, 12) + """) + self.check_tokenize("x = 0x1p1", """\ + NAME 'x' (1, 0) (1, 1) + OP '=' (1, 2) (1, 3) + NUMBER '0x1p1' (1, 4) (1, 9) + """) + self.check_tokenize("x = 0x.1p1", """\ + NAME 'x' (1, 0) (1, 1) + OP '=' (1, 2) (1, 3) + NUMBER '0x.1p1' (1, 4) (1, 10) """) def test_underscore_literals(self): diff --git a/Lib/tokenize.py b/Lib/tokenize.py index 7afacff7381f1c..135c486b228f03 100644 --- a/Lib/tokenize.py +++ b/Lib/tokenize.py @@ -77,7 +77,10 @@ def maybe(*choices): return group(*choices) + '?' Pointfloat = group(r'[0-9](?:_?[0-9])*\.(?:[0-9](?:_?[0-9])*)?', r'\.[0-9](?:_?[0-9])*') + maybe(Exponent) Expfloat = r'[0-9](?:_?[0-9])*' + Exponent -Floatnumber = group(Pointfloat, Expfloat) +HexExponent = r'[pP][-+]?[0-9](?:_?[0-9])*' +Hexfloat = group(r'0[xX]_?[0-9a-f](?:_?[0-9a-f])*\.(?:[0-9a-f](?:_?[0-9a-f])*)?', + r'0[xX]_?\.[0-9a-f](?:_?[0-9a-f])*') + HexExponent +Floatnumber = group(Pointfloat, Expfloat, Hexfloat) Imagnumber = group(r'[0-9](?:_?[0-9])*[jJ]', Floatnumber + r'[jJ]') Number = group(Imagnumber, Floatnumber, Intnumber) diff --git a/Misc/NEWS.d/next/Core_and_Builtins/2024-01-28-08-17-08.gh-issue-114667.8w_l9I.rst b/Misc/NEWS.d/next/Core_and_Builtins/2024-01-28-08-17-08.gh-issue-114667.8w_l9I.rst new file mode 100644 index 00000000000000..c01c0cde3e1892 --- /dev/null +++ b/Misc/NEWS.d/next/Core_and_Builtins/2024-01-28-08-17-08.gh-issue-114667.8w_l9I.rst @@ -0,0 +1,3 @@ +Add hexadecimal floating point literals (IEEE 754-2008 ยง5.12.3) and support +construction of floats from hexadecimal strings. Patch by Sergey B +Kirpichev. diff --git a/Objects/floatobject.c b/Objects/floatobject.c index 87a00bf1a458ea..e38752c2dc74ce 100644 --- a/Objects/floatobject.c +++ b/Objects/floatobject.c @@ -1247,11 +1247,64 @@ float_fromhex_impl(PyTypeObject *type, PyObject *string) /*[clinic end generated code: output=c54b4923552e5af5 input=0407bebd354bca89]*/ { PyObject *result; + Py_ssize_t length; + const char *s, *end, *last; + double x; + + s = PyUnicode_AsUTF8AndSize(string, &length); + if (s == NULL) { + return NULL; + } + last = s + length; + + while (Py_ISSPACE(*s)) { + s++; + } + while (s < last - 1 && Py_ISSPACE(last[-1])) { + last--; + } + + errno = 0; + x = _Py_dg_strtod_hex(s, (char **)&end); + + if (errno == ERANGE) { + PyErr_SetString(PyExc_OverflowError, + "hexadecimal value too large to represent as a float"); + return NULL; + } + + if (end != last) { + if (end != s && (*end && !Py_ISSPACE(*end))) { + PyErr_SetString(PyExc_ValueError, + "hexadecimal string too long to convert"); + return NULL; + } + /* Nothing parsed, maybe inf/nan? */ + x = _Py_parse_inf_or_nan(s, (char **)&end); + } + if (end != last || end == s) { + PyErr_SetString(PyExc_ValueError, + "invalid hexadecimal floating-point string"); + return NULL; + } + + result = PyFloat_FromDouble(x); + if (type != &PyFloat_Type && result != NULL) { + Py_SETREF(result, PyObject_CallOneArg((PyObject *)type, result)); + } + return result; +} + +double +_Py_dg_strtod_hex(const char *s00, char **se) +{ double x; long exp, top_exp, lsb, key_digit; - const char *s, *coeff_start, *s_store, *coeff_end, *exp_start, *s_end; + const char *coeff_start, *s_store, *coeff_end, *exp_start, *s = s00; int half_eps, digit, round_up, negate=0; - Py_ssize_t length, ndigits, fdigits, i; + Py_ssize_t ndigits, fdigits, i; + + *se = (char *)s00; /* * For the sake of simplicity and correctness, we impose an artificial @@ -1298,11 +1351,6 @@ float_fromhex_impl(PyTypeObject *type, PyObject *string) * exp+4*ndigits and exp-4*ndigits are within the range of a long. */ - s = PyUnicode_AsUTF8AndSize(string, &length); - if (s == NULL) - return NULL; - s_end = s + length; - /******************** * Parse the string * ********************/ @@ -1311,13 +1359,6 @@ float_fromhex_impl(PyTypeObject *type, PyObject *string) while (Py_ISSPACE(*s)) s++; - /* infinities and nans */ - x = _Py_parse_inf_or_nan(s, (char **)&coeff_end); - if (coeff_end != s) { - s = coeff_end; - goto finished; - } - /* optional sign */ if (*s == '-') { s++; @@ -1356,8 +1397,10 @@ float_fromhex_impl(PyTypeObject *type, PyObject *string) if (ndigits == 0) goto parse_error; if (ndigits > Py_MIN(DBL_MIN_EXP - DBL_MANT_DIG - LONG_MIN/2, - LONG_MAX/2 + 1 - DBL_MAX_EXP)/4) + LONG_MAX/2 + 1 - DBL_MAX_EXP)/4) { + *se = (char*)coeff_end; goto insane_length_error; + } /* [p ] */ if (*s == 'p' || *s == 'P') { @@ -1456,31 +1499,20 @@ float_fromhex_impl(PyTypeObject *type, PyObject *string) x = ldexp(x, (int)(exp+4*key_digit)); finished: - /* optional trailing whitespace leading to the end of the string */ - while (Py_ISSPACE(*s)) - s++; - if (s != s_end) + if (*s && !Py_ISSPACE(*s)) goto parse_error; - result = PyFloat_FromDouble(negate ? -x : x); - if (type != &PyFloat_Type && result != NULL) { - Py_SETREF(result, PyObject_CallOneArg((PyObject *)type, result)); - } - return result; + *se = (char *)s; + errno = 0; + return negate ? -x : x; overflow_error: - PyErr_SetString(PyExc_OverflowError, - "hexadecimal value too large to represent as a float"); - return NULL; + errno = ERANGE; + return HUGE_VAL; parse_error: - PyErr_SetString(PyExc_ValueError, - "invalid hexadecimal floating-point string"); - return NULL; - insane_length_error: - PyErr_SetString(PyExc_ValueError, - "hexadecimal string too long to convert"); - return NULL; + errno = 0; + return 0.0; } /*[clinic input] diff --git a/Parser/lexer/lexer.c b/Parser/lexer/lexer.c index 45604b197f43f2..cc773da9567827 100644 --- a/Parser/lexer/lexer.c +++ b/Parser/lexer/lexer.c @@ -359,21 +359,23 @@ verify_identifier(struct tok_state *tok) } static int -tok_decimal_tail(struct tok_state *tok) +tok_digits_tail(struct tok_state *tok, int base) { int c; + int (*_isdigit)(char) = base == 16 ? &Py_ISXDIGIT : &Py_ISDIGIT; while (1) { do { c = tok_nextc(tok); - } while (Py_ISDIGIT(c)); + } while ((*_isdigit)(c)); if (c != '_') { break; } c = tok_nextc(tok); - if (!Py_ISDIGIT(c)) { + if (!(*_isdigit)(c)) { tok_backup(tok, c); - _PyTokenizer_syntaxerror(tok, "invalid decimal literal"); + _PyTokenizer_syntaxerror(tok, "invalid %s literal", + base == 16 ? "hexadecimal" : "decimal"); return 0; } } @@ -749,20 +751,66 @@ tok_get_normal_mode(struct tok_state *tok, tokenizer_mode* current_tok, struct t /* Hex, octal or binary -- maybe. */ c = tok_nextc(tok); if (c == 'x' || c == 'X') { - /* Hex */ + /* Hex integer/float */ c = tok_nextc(tok); - do { - if (c == '_') { - c = tok_nextc(tok); - } + if (c == '_') { + c = tok_nextc(tok); + } + if (c == '.') { + c = tok_nextc(tok); if (!Py_ISXDIGIT(c)) { tok_backup(tok, c); - return MAKE_TOKEN(_PyTokenizer_syntaxerror(tok, "invalid hexadecimal literal")); + return MAKE_TOKEN(_PyTokenizer_syntaxerror(tok, "invalid float literal")); } - do { - c = tok_nextc(tok); - } while (Py_ISXDIGIT(c)); - } while (c == '_'); + goto hexfraction; + } + else if (!Py_ISXDIGIT(c)) { + tok_backup(tok, c); + return MAKE_TOKEN(_PyTokenizer_syntaxerror(tok, "invalid hexadecimal literal")); + } + c = tok_digits_tail(tok, 16); + if (c == 0) { + return MAKE_TOKEN(ERRORTOKEN); + } + if (c == '.') { + c = tok_nextc(tok); + hexfraction: + /* Allow attribute access on hexadecimal integer literals for + * for existing public attributes on int's, e.g. 0x1.bit_length(). */ + if ((c == 'a' && lookahead(tok, "s_integer_ratio")) || + (c == 't' && lookahead(tok, "o_bytes")) || + (c == 'b' && (lookahead(tok, "it_count") || + lookahead(tok, "it_length"))) || + (c == 'c' && lookahead(tok, "onjugate")) || + (c == 'd' && lookahead(tok, "enominator")) || + (c == 'f' && lookahead(tok, "rom_bytes")) || + (c == 'i' && (lookahead(tok, "mag") || + lookahead(tok, "s_integer"))) || + (c == 'n' && lookahead(tok, "umerator")) || + (c == 'r' && lookahead(tok, "eal"))) + { + if (_PyTokenizer_parser_warn(tok, PyExc_SyntaxWarning, + "invalid float literal")) + { + return 0; + } + tok_backup(tok, c); + c = '.'; + goto hexint; + } + if (Py_ISXDIGIT(c)) { + c = tok_digits_tail(tok, 16); + if (c == 0) { + tok->done = E_OK; + _PyTokenizer_syntaxerror(tok, "invalid float literal"); + return MAKE_TOKEN(ERRORTOKEN); + } + } + } + if (c == 'p' || c == 'P') { + goto exponent; + } + hexint: if (!verify_end_of_number(tok, c, "hexadecimal")) { return MAKE_TOKEN(ERRORTOKEN); } @@ -843,7 +891,7 @@ tok_get_normal_mode(struct tok_state *tok, tokenizer_mode* current_tok, struct t char* zeros_end = tok->cur; if (Py_ISDIGIT(c)) { nonzero = 1; - c = tok_decimal_tail(tok); + c = tok_digits_tail(tok, 10); if (c == 0) { return MAKE_TOKEN(ERRORTOKEN); } @@ -875,7 +923,7 @@ tok_get_normal_mode(struct tok_state *tok, tokenizer_mode* current_tok, struct t } else { /* Decimal */ - c = tok_decimal_tail(tok); + c = tok_digits_tail(tok, 10); if (c == 0) { return MAKE_TOKEN(ERRORTOKEN); } @@ -886,7 +934,7 @@ tok_get_normal_mode(struct tok_state *tok, tokenizer_mode* current_tok, struct t fraction: /* Fraction */ if (Py_ISDIGIT(c)) { - c = tok_decimal_tail(tok); + c = tok_digits_tail(tok, 10); if (c == 0) { return MAKE_TOKEN(ERRORTOKEN); } @@ -902,11 +950,11 @@ tok_get_normal_mode(struct tok_state *tok, tokenizer_mode* current_tok, struct t c = tok_nextc(tok); if (!Py_ISDIGIT(c)) { tok_backup(tok, c); - return MAKE_TOKEN(_PyTokenizer_syntaxerror(tok, "invalid decimal literal")); + return MAKE_TOKEN(_PyTokenizer_syntaxerror(tok, "invalid float literal")); } } else if (!Py_ISDIGIT(c)) { tok_backup(tok, c); - if (!verify_end_of_number(tok, e, "decimal")) { + if (!verify_end_of_number(tok, e, "float")) { return MAKE_TOKEN(ERRORTOKEN); } tok_backup(tok, e); @@ -914,7 +962,7 @@ tok_get_normal_mode(struct tok_state *tok, tokenizer_mode* current_tok, struct t p_end = tok->cur; return MAKE_TOKEN(NUMBER); } - c = tok_decimal_tail(tok); + c = tok_digits_tail(tok, 10); if (c == 0) { return MAKE_TOKEN(ERRORTOKEN); } diff --git a/Python/dtoa.c b/Python/dtoa.c index 3de150351a4ef8..9bf01562982b98 100644 --- a/Python/dtoa.c +++ b/Python/dtoa.c @@ -118,6 +118,7 @@ /* Linking of Python's #defines to Gay's #defines starts here. */ #include "Python.h" +#include "pycore_floatobject.h" // _Py_dg_strtod_hex() #include "pycore_dtoa.h" // _PY_SHORT_FLOAT_REPR #include "pycore_interp_structs.h"// struct Bigint #include "pycore_pystate.h" // _PyInterpreterState_GET() @@ -1412,6 +1413,9 @@ _Py_dg_strtod(const char *s00, char **se) c = *++s; } + if (*s == '0' && (*(s + 1) == 'x' || *(s + 1) == 'X')) + return _Py_dg_strtod_hex(s, se) * (sign ? -1: 1); + /* Skip leading zeros: lz is true iff there were leading zeros. */ s1 = s; while (c == '0') diff --git a/Python/pystrtod.c b/Python/pystrtod.c index 7b74f613ed563b..57206bb7fe73d6 100644 --- a/Python/pystrtod.c +++ b/Python/pystrtod.c @@ -169,13 +169,8 @@ _PyOS_ascii_strtod(const char *nptr, char **endptr) p++; } - /* Some platform strtods accept hex floats; Python shouldn't (at the - moment), so we check explicitly for strings starting with '0x'. */ - if (*p == '0' && (*(p+1) == 'x' || *(p+1) == 'X')) - goto invalid_string; - /* Check that what's left begins with a digit or decimal point */ - if (!Py_ISDIGIT(*p) && *p != '.') + if (!Py_ISXDIGIT(*p) && *p != '.') goto invalid_string; digits_pos = p; @@ -186,7 +181,7 @@ _PyOS_ascii_strtod(const char *nptr, char **endptr) swapped for the current locale's decimal point before we call strtod. On the other hand, if we find the current locale's decimal point then the input is invalid. */ - while (Py_ISDIGIT(*p)) + while (Py_ISXDIGIT(*p)) p++; if (*p == '.') @@ -194,10 +189,10 @@ _PyOS_ascii_strtod(const char *nptr, char **endptr) decimal_point_pos = p++; /* locate end of number */ - while (Py_ISDIGIT(*p)) + while (Py_ISXDIGIT(*p)) p++; - if (*p == 'e' || *p == 'E') + if (*p == 'e' || *p == 'E' || *p == 'p' || *p == 'P') p++; if (*p == '+' || *p == '-') p++; @@ -350,6 +345,7 @@ _Py_string_to_number_with_underscores( const char *p, *last; char *dup, *end; PyObject *result; + int (*_isdigit)(char) = &Py_ISDIGIT; assert(s[orig_len] == '\0'); @@ -364,21 +360,40 @@ _Py_string_to_number_with_underscores( end = dup; prev = '\0'; last = s + orig_len; - for (p = s; *p; p++) { + p = s; + /* Has hexadecimal prefix? */ + if (*p == '0' && (*(p+1) == 'x' || *(p+1) == 'X')) { + _isdigit = &Py_ISXDIGIT; + /* Accept prefix. */ + *end++ = *p; + p++; + *end++ = *p; + p++; + /* Underscore allowed right after the prefix and before '.' */ + if (*p == '_') { + p++; + if (*p == '.') { + *end++ = *p; + p++; + } + } + } + while (*p) { if (*p == '_') { /* Underscores are only allowed after digits. */ - if (!(prev >= '0' && prev <= '9')) { + if (!(*_isdigit)(prev)) { goto error; } } else { *end++ = *p; /* Underscores are only allowed before digits. */ - if (prev == '_' && !(*p >= '0' && *p <= '9')) { + if (prev == '_' && !(*_isdigit)(*p)) { goto error; } } prev = *p; + p++; } /* Underscores are not allowed at the end. */ if (prev == '_') {