Skip to content

Commit 7ff501b

Browse files
committed
10. limit max group to 1,073,741,823
1,073,741,823 groups should enough for most users. This change reduces sizeof(match_context) again: - On 32 bit platform: 32 bytes, no change. - On 64 bit platform: 64 bytes -> 56 bytes. sre uses stack and match_context struct to simulate recursive call, smaller struct brings: - deeper recursive call - less memory consume - less memory realloc Here is a test, if limit the stack size (state->data_stack_base) to 1 GiB, the max available value of n is: re.match(r'(ab)*', n * 'ab') # need to save MARKs 72 bytes: n = 11,184,809 64 bytes: n = 12,201,610 56 bytes: n = 13,421,771 re.match(r'(?:ab)*', n * 'ab') # no need to save MARKs 72 bytes: n = 13,421,770 64 bytes: n = 14,913,079 56 bytes: n = 16,777,214
1 parent 32b9797 commit 7ff501b

File tree

2 files changed

+12
-9
lines changed

2 files changed

+12
-9
lines changed

Modules/sre.h

Lines changed: 8 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -16,12 +16,14 @@
1616
/* size of a code word (must be unsigned short or larger, and
1717
large enough to hold a UCS4 character) */
1818
#define SRE_CODE Py_UCS4
19+
20+
/* SRE_MAXGROUPS is 1,073,741,823 */
21+
#define SRE_MAXGROUPS INT_MAX / 2
22+
1923
#if SIZEOF_SIZE_T > 4
2024
# define SRE_MAXREPEAT (~(SRE_CODE)0)
21-
# define SRE_MAXGROUPS ((~(SRE_CODE)0) / 2)
2225
#else
2326
# define SRE_MAXREPEAT ((SRE_CODE)PY_SSIZE_T_MAX)
24-
# define SRE_MAXGROUPS ((SRE_CODE)PY_SSIZE_T_MAX / SIZEOF_SIZE_T / 2)
2527
#endif
2628

2729
typedef struct {
@@ -71,18 +73,18 @@ typedef struct {
7173
Py_ssize_t pos, endpos;
7274
int isbytes;
7375
int charsize; /* character size */
76+
/* current repeat context */
77+
SRE_REPEAT *repeat;
7478
/* registers */
75-
Py_ssize_t lastindex;
76-
Py_ssize_t lastmark;
79+
int32_t lastmark;
80+
int32_t lastindex;
7781
void** mark;
7882
int match_all;
7983
int must_advance;
8084
/* dynamically allocated stuff */
8185
char* data_stack;
8286
size_t data_stack_size;
8387
size_t data_stack_base;
84-
/* current repeat context */
85-
SRE_REPEAT *repeat;
8688
} SRE_STATE;
8789

8890
typedef struct {

Modules/sre_lib.h

Lines changed: 4 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -532,8 +532,8 @@ typedef struct {
532532
SRE_CHAR* ptr;
533533
SRE_CODE* pattern;
534534
Py_ssize_t count;
535-
Py_ssize_t lastmark;
536-
Py_ssize_t lastindex;
535+
int32_t lastmark;
536+
int32_t lastindex;
537537
union {
538538
SRE_CODE chr;
539539
SRE_REPEAT* rep;
@@ -550,7 +550,8 @@ SRE(match)(SRE_STATE* state, SRE_CODE* pattern, int toplevel)
550550
{
551551
SRE_CHAR* end = (SRE_CHAR *)state->end;
552552
Py_ssize_t alloc_pos, ctx_pos = -1;
553-
Py_ssize_t i, ret = 0;
553+
Py_ssize_t ret = 0;
554+
int32_t i;
554555
unsigned int sigcount=0;
555556
int8_t jump;
556557

0 commit comments

Comments
 (0)