Skip to content

Commit a0c603c

Browse files
author
Ma Lin
authored
bpo-38252: Use 8-byte step to detect ASCII sequence in 64bit Windows build (pythonGH-16334)
1 parent 3635388 commit a0c603c

File tree

5 files changed

+53
-52
lines changed

5 files changed

+53
-52
lines changed
Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1 @@
1+
Use 8-byte step to detect ASCII sequence in 64-bit Windows build.

Objects/bytes_methods.c

Lines changed: 10 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -100,35 +100,35 @@ Return True if B is empty or all characters in B are ASCII,\n\
100100
False otherwise.");
101101

102102
// Optimization is copied from ascii_decode in unicodeobject.c
103-
/* Mask to quickly check whether a C 'long' contains a
103+
/* Mask to quickly check whether a C 'size_t' contains a
104104
non-ASCII, UTF8-encoded char. */
105-
#if (SIZEOF_LONG == 8)
106-
# define ASCII_CHAR_MASK 0x8080808080808080UL
107-
#elif (SIZEOF_LONG == 4)
108-
# define ASCII_CHAR_MASK 0x80808080UL
105+
#if (SIZEOF_SIZE_T == 8)
106+
# define ASCII_CHAR_MASK 0x8080808080808080ULL
107+
#elif (SIZEOF_SIZE_T == 4)
108+
# define ASCII_CHAR_MASK 0x80808080U
109109
#else
110-
# error C 'long' size should be either 4 or 8!
110+
# error C 'size_t' size should be either 4 or 8!
111111
#endif
112112

113113
PyObject*
114114
_Py_bytes_isascii(const char *cptr, Py_ssize_t len)
115115
{
116116
const char *p = cptr;
117117
const char *end = p + len;
118-
const char *aligned_end = (const char *) _Py_ALIGN_DOWN(end, SIZEOF_LONG);
118+
const char *aligned_end = (const char *) _Py_ALIGN_DOWN(end, SIZEOF_SIZE_T);
119119

120120
while (p < end) {
121121
/* Fast path, see in STRINGLIB(utf8_decode) in stringlib/codecs.h
122122
for an explanation. */
123-
if (_Py_IS_ALIGNED(p, SIZEOF_LONG)) {
123+
if (_Py_IS_ALIGNED(p, SIZEOF_SIZE_T)) {
124124
/* Help allocation */
125125
const char *_p = p;
126126
while (_p < aligned_end) {
127-
unsigned long value = *(const unsigned long *) _p;
127+
size_t value = *(const size_t *) _p;
128128
if (value & ASCII_CHAR_MASK) {
129129
Py_RETURN_FALSE;
130130
}
131-
_p += SIZEOF_LONG;
131+
_p += SIZEOF_SIZE_T;
132132
}
133133
p = _p;
134134
if (_p == end)

Objects/stringlib/codecs.h

Lines changed: 15 additions & 15 deletions
Original file line numberDiff line numberDiff line change
@@ -6,14 +6,14 @@
66

77
#include "pycore_bitutils.h" // _Py_bswap32()
88

9-
/* Mask to quickly check whether a C 'long' contains a
9+
/* Mask to quickly check whether a C 'size_t' contains a
1010
non-ASCII, UTF8-encoded char. */
11-
#if (SIZEOF_LONG == 8)
12-
# define ASCII_CHAR_MASK 0x8080808080808080UL
13-
#elif (SIZEOF_LONG == 4)
14-
# define ASCII_CHAR_MASK 0x80808080UL
11+
#if (SIZEOF_SIZE_T == 8)
12+
# define ASCII_CHAR_MASK 0x8080808080808080ULL
13+
#elif (SIZEOF_SIZE_T == 4)
14+
# define ASCII_CHAR_MASK 0x80808080U
1515
#else
16-
# error C 'long' size should be either 4 or 8!
16+
# error C 'size_t' size should be either 4 or 8!
1717
#endif
1818

1919
/* 10xxxxxx */
@@ -26,7 +26,7 @@ STRINGLIB(utf8_decode)(const char **inptr, const char *end,
2626
{
2727
Py_UCS4 ch;
2828
const char *s = *inptr;
29-
const char *aligned_end = (const char *) _Py_ALIGN_DOWN(end, SIZEOF_LONG);
29+
const char *aligned_end = (const char *) _Py_ALIGN_DOWN(end, SIZEOF_SIZE_T);
3030
STRINGLIB_CHAR *p = dest + *outpos;
3131

3232
while (s < end) {
@@ -36,34 +36,34 @@ STRINGLIB(utf8_decode)(const char **inptr, const char *end,
3636
/* Fast path for runs of ASCII characters. Given that common UTF-8
3737
input will consist of an overwhelming majority of ASCII
3838
characters, we try to optimize for this case by checking
39-
as many characters as a C 'long' can contain.
39+
as many characters as a C 'size_t' can contain.
4040
First, check if we can do an aligned read, as most CPUs have
4141
a penalty for unaligned reads.
4242
*/
43-
if (_Py_IS_ALIGNED(s, SIZEOF_LONG)) {
43+
if (_Py_IS_ALIGNED(s, SIZEOF_SIZE_T)) {
4444
/* Help register allocation */
4545
const char *_s = s;
4646
STRINGLIB_CHAR *_p = p;
4747
while (_s < aligned_end) {
48-
/* Read a whole long at a time (either 4 or 8 bytes),
48+
/* Read a whole size_t at a time (either 4 or 8 bytes),
4949
and do a fast unrolled copy if it only contains ASCII
5050
characters. */
51-
unsigned long value = *(const unsigned long *) _s;
51+
size_t value = *(const size_t *) _s;
5252
if (value & ASCII_CHAR_MASK)
5353
break;
5454
#if PY_LITTLE_ENDIAN
5555
_p[0] = (STRINGLIB_CHAR)(value & 0xFFu);
5656
_p[1] = (STRINGLIB_CHAR)((value >> 8) & 0xFFu);
5757
_p[2] = (STRINGLIB_CHAR)((value >> 16) & 0xFFu);
5858
_p[3] = (STRINGLIB_CHAR)((value >> 24) & 0xFFu);
59-
# if SIZEOF_LONG == 8
59+
# if SIZEOF_SIZE_T == 8
6060
_p[4] = (STRINGLIB_CHAR)((value >> 32) & 0xFFu);
6161
_p[5] = (STRINGLIB_CHAR)((value >> 40) & 0xFFu);
6262
_p[6] = (STRINGLIB_CHAR)((value >> 48) & 0xFFu);
6363
_p[7] = (STRINGLIB_CHAR)((value >> 56) & 0xFFu);
6464
# endif
6565
#else
66-
# if SIZEOF_LONG == 8
66+
# if SIZEOF_SIZE_T == 8
6767
_p[0] = (STRINGLIB_CHAR)((value >> 56) & 0xFFu);
6868
_p[1] = (STRINGLIB_CHAR)((value >> 48) & 0xFFu);
6969
_p[2] = (STRINGLIB_CHAR)((value >> 40) & 0xFFu);
@@ -79,8 +79,8 @@ STRINGLIB(utf8_decode)(const char **inptr, const char *end,
7979
_p[3] = (STRINGLIB_CHAR)(value & 0xFFu);
8080
# endif
8181
#endif
82-
_s += SIZEOF_LONG;
83-
_p += SIZEOF_LONG;
82+
_s += SIZEOF_SIZE_T;
83+
_p += SIZEOF_SIZE_T;
8484
}
8585
s = _s;
8686
p = _p;

Objects/stringlib/find_max_char.h

Lines changed: 10 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -4,14 +4,14 @@
44
# error "find_max_char.h is specific to Unicode"
55
#endif
66

7-
/* Mask to quickly check whether a C 'long' contains a
7+
/* Mask to quickly check whether a C 'size_t' contains a
88
non-ASCII, UTF8-encoded char. */
9-
#if (SIZEOF_LONG == 8)
10-
# define UCS1_ASCII_CHAR_MASK 0x8080808080808080UL
11-
#elif (SIZEOF_LONG == 4)
12-
# define UCS1_ASCII_CHAR_MASK 0x80808080UL
9+
#if (SIZEOF_SIZE_T == 8)
10+
# define UCS1_ASCII_CHAR_MASK 0x8080808080808080ULL
11+
#elif (SIZEOF_SIZE_T == 4)
12+
# define UCS1_ASCII_CHAR_MASK 0x80808080U
1313
#else
14-
# error C 'long' size should be either 4 or 8!
14+
# error C 'size_t' size should be either 4 or 8!
1515
#endif
1616

1717
#if STRINGLIB_SIZEOF_CHAR == 1
@@ -21,17 +21,17 @@ STRINGLIB(find_max_char)(const STRINGLIB_CHAR *begin, const STRINGLIB_CHAR *end)
2121
{
2222
const unsigned char *p = (const unsigned char *) begin;
2323
const unsigned char *aligned_end =
24-
(const unsigned char *) _Py_ALIGN_DOWN(end, SIZEOF_LONG);
24+
(const unsigned char *) _Py_ALIGN_DOWN(end, SIZEOF_SIZE_T);
2525

2626
while (p < end) {
27-
if (_Py_IS_ALIGNED(p, SIZEOF_LONG)) {
27+
if (_Py_IS_ALIGNED(p, SIZEOF_SIZE_T)) {
2828
/* Help register allocation */
2929
const unsigned char *_p = p;
3030
while (_p < aligned_end) {
31-
unsigned long value = *(const unsigned long *) _p;
31+
size_t value = *(const size_t *) _p;
3232
if (value & UCS1_ASCII_CHAR_MASK)
3333
return 255;
34-
_p += SIZEOF_LONG;
34+
_p += SIZEOF_SIZE_T;
3535
}
3636
p = _p;
3737
if (p == end)

Objects/unicodeobject.c

Lines changed: 17 additions & 17 deletions
Original file line numberDiff line numberDiff line change
@@ -5025,21 +5025,21 @@ PyUnicode_DecodeUTF8(const char *s,
50255025
#include "stringlib/codecs.h"
50265026
#include "stringlib/undef.h"
50275027

5028-
/* Mask to quickly check whether a C 'long' contains a
5028+
/* Mask to quickly check whether a C 'size_t' contains a
50295029
non-ASCII, UTF8-encoded char. */
5030-
#if (SIZEOF_LONG == 8)
5031-
# define ASCII_CHAR_MASK 0x8080808080808080UL
5032-
#elif (SIZEOF_LONG == 4)
5033-
# define ASCII_CHAR_MASK 0x80808080UL
5030+
#if (SIZEOF_SIZE_T == 8)
5031+
# define ASCII_CHAR_MASK 0x8080808080808080ULL
5032+
#elif (SIZEOF_SIZE_T == 4)
5033+
# define ASCII_CHAR_MASK 0x80808080U
50345034
#else
5035-
# error C 'long' size should be either 4 or 8!
5035+
# error C 'size_t' size should be either 4 or 8!
50365036
#endif
50375037

50385038
static Py_ssize_t
50395039
ascii_decode(const char *start, const char *end, Py_UCS1 *dest)
50405040
{
50415041
const char *p = start;
5042-
const char *aligned_end = (const char *) _Py_ALIGN_DOWN(end, SIZEOF_LONG);
5042+
const char *aligned_end = (const char *) _Py_ALIGN_DOWN(end, SIZEOF_SIZE_T);
50435043

50445044
/*
50455045
* Issue #17237: m68k is a bit different from most architectures in
@@ -5049,21 +5049,21 @@ ascii_decode(const char *start, const char *end, Py_UCS1 *dest)
50495049
* version" will even speed up m68k.
50505050
*/
50515051
#if !defined(__m68k__)
5052-
#if SIZEOF_LONG <= SIZEOF_VOID_P
5053-
assert(_Py_IS_ALIGNED(dest, SIZEOF_LONG));
5054-
if (_Py_IS_ALIGNED(p, SIZEOF_LONG)) {
5052+
#if SIZEOF_SIZE_T <= SIZEOF_VOID_P
5053+
assert(_Py_IS_ALIGNED(dest, SIZEOF_SIZE_T));
5054+
if (_Py_IS_ALIGNED(p, SIZEOF_SIZE_T)) {
50555055
/* Fast path, see in STRINGLIB(utf8_decode) for
50565056
an explanation. */
50575057
/* Help allocation */
50585058
const char *_p = p;
50595059
Py_UCS1 * q = dest;
50605060
while (_p < aligned_end) {
5061-
unsigned long value = *(const unsigned long *) _p;
5061+
size_t value = *(const size_t *) _p;
50625062
if (value & ASCII_CHAR_MASK)
50635063
break;
5064-
*((unsigned long *)q) = value;
5065-
_p += SIZEOF_LONG;
5066-
q += SIZEOF_LONG;
5064+
*((size_t *)q) = value;
5065+
_p += SIZEOF_SIZE_T;
5066+
q += SIZEOF_SIZE_T;
50675067
}
50685068
p = _p;
50695069
while (p < end) {
@@ -5078,14 +5078,14 @@ ascii_decode(const char *start, const char *end, Py_UCS1 *dest)
50785078
while (p < end) {
50795079
/* Fast path, see in STRINGLIB(utf8_decode) in stringlib/codecs.h
50805080
for an explanation. */
5081-
if (_Py_IS_ALIGNED(p, SIZEOF_LONG)) {
5081+
if (_Py_IS_ALIGNED(p, SIZEOF_SIZE_T)) {
50825082
/* Help allocation */
50835083
const char *_p = p;
50845084
while (_p < aligned_end) {
5085-
unsigned long value = *(const unsigned long *) _p;
5085+
size_t value = *(const size_t *) _p;
50865086
if (value & ASCII_CHAR_MASK)
50875087
break;
5088-
_p += SIZEOF_LONG;
5088+
_p += SIZEOF_SIZE_T;
50895089
}
50905090
p = _p;
50915091
if (_p == end)

0 commit comments

Comments
 (0)