Skip to content

Commit 089ad25

Browse files
committed
gh16947: avoid mutating regexp program only within GOSUB
Commits 3bc2a78 and bdb91f3 used the existence of a frame to decide when it was unsafe to mutate the regexp program, due to having recursed for as GOSUB. However the frame recursion mechanism is also used for SUSPEND. Refine it further to avoid mutation only when within a GOSUB by saving a new boolean in the frame structure, and using that to derive a "mutate_ok" flag.
1 parent 3b53f4e commit 089ad25

File tree

2 files changed

+20
-10
lines changed

2 files changed

+20
-10
lines changed

regcomp.c

Lines changed: 12 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -163,6 +163,7 @@ typedef struct scan_frame {
163163
regnode *next_regnode; /* next node to process when last is reached */
164164
U32 prev_recursed_depth;
165165
I32 stopparen; /* what stopparen do we use */
166+
bool in_gosub; /* this or an outer frame is for GOSUB */
166167

167168
struct scan_frame *this_prev_frame; /* this previous frame */
168169
struct scan_frame *prev_frame; /* previous frame */
@@ -4606,6 +4607,7 @@ S_study_chunk(pTHX_ RExC_state_t *pRExC_state, regnode **scanp,
46064607
node length to get a real minimum (because
46074608
the folded version may be shorter) */
46084609
bool unfolded_multi_char = FALSE;
4610+
bool mutate_ok = (frame && frame->in_gosub) ? 0 : 1;
46094611
/* Peephole optimizer: */
46104612
DEBUG_STUDYDATA("Peep", data, depth, is_inf);
46114613
DEBUG_PEEP("Peep", scan, depth, flags);
@@ -4619,7 +4621,7 @@ S_study_chunk(pTHX_ RExC_state_t *pRExC_state, regnode **scanp,
46194621
if (PL_regkind[OP(scan)] == EXACT
46204622
&& OP(scan) != LEXACT
46214623
&& OP(scan) != LEXACT_REQ8
4622-
&& !frame
4624+
&& mutate_ok
46234625
) {
46244626
join_exact(pRExC_state, scan, &min_subtract, &unfolded_multi_char,
46254627
0, NULL, depth + 1);
@@ -4810,7 +4812,7 @@ S_study_chunk(pTHX_ RExC_state_t *pRExC_state, regnode **scanp,
48104812

48114813
if (PERL_ENABLE_TRIE_OPTIMISATION
48124814
&& OP(startbranch) == BRANCH
4813-
&& !frame
4815+
&& mutate_ok
48144816
) {
48154817
/* demq.
48164818

@@ -5264,6 +5266,9 @@ S_study_chunk(pTHX_ RExC_state_t *pRExC_state, regnode **scanp,
52645266
newframe->stopparen = stopparen;
52655267
newframe->prev_recursed_depth = recursed_depth;
52665268
newframe->this_prev_frame= frame;
5269+
newframe->in_gosub = (
5270+
(frame && frame->in_gosub) || OP(scan) == GOSUB
5271+
);
52675272

52685273
DEBUG_STUDYDATA("frame-new", data, depth, is_inf);
52695274
DEBUG_PEEP("fnew", scan, depth, flags);
@@ -5349,7 +5354,7 @@ S_study_chunk(pTHX_ RExC_state_t *pRExC_state, regnode **scanp,
53495354
&& ( OP(scan) == EXACTFAA
53505355
|| ( OP(scan) == EXACTFU
53515356
&& ! HAS_NONLATIN1_SIMPLE_FOLD_CLOSURE(*s)))
5352-
&& !frame
5357+
&& mutate_ok
53535358
) {
53545359
U8 mask = ~ ('A' ^ 'a'); /* These differ in just one bit */
53555360

@@ -5443,7 +5448,7 @@ S_study_chunk(pTHX_ RExC_state_t *pRExC_state, regnode **scanp,
54435448

54445449
/* This temporary node can now be turned into EXACTFU, and
54455450
* must, as regexec.c doesn't handle it */
5446-
if (OP(next) == EXACTFU_S_EDGE && !frame) {
5451+
if (OP(next) == EXACTFU_S_EDGE && mutate_ok) {
54475452
OP(next) = EXACTFU;
54485453
}
54495454

@@ -5452,7 +5457,7 @@ S_study_chunk(pTHX_ RExC_state_t *pRExC_state, regnode **scanp,
54525457
&& ( OP(next) == EXACTFAA
54535458
|| ( OP(next) == EXACTFU
54545459
&& ! HAS_NONLATIN1_SIMPLE_FOLD_CLOSURE(* STRING(next))))
5455-
&& !frame
5460+
&& mutate_ok
54565461
) {
54575462
/* These differ in just one bit */
54585463
U8 mask = ~ ('A' ^ 'a');
@@ -5601,7 +5606,7 @@ S_study_chunk(pTHX_ RExC_state_t *pRExC_state, regnode **scanp,
56015606
&& data->flags & SF_IN_PAR
56025607
&& !(data->flags & SF_HAS_EVAL)
56035608
&& !deltanext && minnext == 1
5604-
&& !frame
5609+
&& mutate_ok
56055610
) {
56065611
/* Try to optimize to CURLYN. */
56075612
regnode *nxt = NEXTOPER(oscan) + EXTRA_STEP_2ARGS;
@@ -5655,7 +5660,7 @@ S_study_chunk(pTHX_ RExC_state_t *pRExC_state, regnode **scanp,
56555660
/* Nor characters whose fold at run-time may be
56565661
* multi-character */
56575662
&& ! (RExC_seen & REG_UNFOLDED_MULTI_SEEN)
5658-
&& !frame
5663+
&& mutate_ok
56595664
) {
56605665
/* XXXX How to optimize if data == 0? */
56615666
/* Optimize to a simpler form. */

t/re/pat.t

Lines changed: 8 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -24,7 +24,7 @@ BEGIN {
2424

2525
skip_all_without_unicode_tables();
2626

27-
plan tests => 1018; # Update this when adding/deleting tests.
27+
plan tests => 1019; # Update this when adding/deleting tests.
2828

2929
run_tests() unless caller;
3030

@@ -2252,11 +2252,16 @@ SKIP:
22522252
ok($result, "regexp correctly matched");
22532253
}
22542254

2255-
# gh16947: test regexp corruption
2255+
# gh16947: test regexp corruption (GOSUB)
22562256
{
22572257
fresh_perl_is(q{
22582258
'xy' =~ /x(?0)|x(?|y|y)/ && print 'ok'
2259-
}, 'ok', {}, 'gh16947: test regexp corruption');
2259+
}, 'ok', {}, 'gh16947: test regexp corruption (GOSUB)');
2260+
}
2261+
# gh16947: test fix doesn't break SUSPEND
2262+
{
2263+
fresh_perl_is(q{ 'sx' =~ m{ss++}i; print 'ok' },
2264+
'ok', {}, "gh16947: test fix doesn't break SUSPEND");
22602265
}
22612266

22622267
} # End of sub run_tests

0 commit comments

Comments
 (0)