Skip to content

Commit 499333d

Browse files
committed
PATCH: [perl #122671] Many warnings in regcomp.c can occur twice
This solves the problem by moving the warnings to be output only in pass2 of compilation. The problem arises because almost all of pass1 can be repeated under certain circumstances described in the ticket and the added comments of this patch.
1 parent 3da4c24 commit 499333d

File tree

4 files changed

+126
-80
lines changed

4 files changed

+126
-80
lines changed

pod/perldelta.pod

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -437,6 +437,12 @@ Stub declarations like C<sub f;> and C<sub f ();> no longer wipe out
437437
constants of the same name declared by C<use constant>. This bug was
438438
introduced in perl 5.10.0.
439439

440+
=item *
441+
442+
Under some conditions a warning raised in compilation of regular
443+
expression patterns could be displayed multiple times. This is now
444+
fixed.
445+
440446
=back
441447

442448
=head1 Known Problems

regcomp.c

Lines changed: 41 additions & 33 deletions
Original file line numberDiff line numberDiff line change
@@ -570,80 +570,85 @@ static const scan_data_t zero_scan_data =
570570
REPORT_LOCATION_ARGS(offset)); \
571571
} STMT_END
572572

573+
/* These have asserts in them because of [perl #122671] Many warnings in
574+
* regcomp.c can occur twice. If they get output in pass1 and later in that
575+
* pass, the pattern has to be converted to UTF-8 and the pass restarted, they
576+
* would get output again. So they should be output in pass2, and these
577+
* asserts make sure new warnings follow that paradigm. */
573578

574579
/* m is not necessarily a "literal string", in this macro */
575580
#define reg_warn_non_literal_string(loc, m) STMT_START { \
576581
const IV offset = loc - RExC_precomp; \
577-
Perl_warner(aTHX_ packWARN(WARN_REGEXP), "%s" REPORT_LOCATION, \
582+
__ASSERT_(PASS2) Perl_warner(aTHX_ packWARN(WARN_REGEXP), "%s" REPORT_LOCATION, \
578583
m, REPORT_LOCATION_ARGS(offset)); \
579584
} STMT_END
580585

581586
#define ckWARNreg(loc,m) STMT_START { \
582587
const IV offset = loc - RExC_precomp; \
583-
Perl_ck_warner(aTHX_ packWARN(WARN_REGEXP), m REPORT_LOCATION, \
588+
__ASSERT_(PASS2) Perl_ck_warner(aTHX_ packWARN(WARN_REGEXP), m REPORT_LOCATION, \
584589
REPORT_LOCATION_ARGS(offset)); \
585590
} STMT_END
586591

587592
#define vWARN_dep(loc, m) STMT_START { \
588593
const IV offset = loc - RExC_precomp; \
589-
Perl_warner(aTHX_ packWARN(WARN_DEPRECATED), m REPORT_LOCATION, \
594+
__ASSERT_(PASS2) Perl_warner(aTHX_ packWARN(WARN_DEPRECATED), m REPORT_LOCATION, \
590595
REPORT_LOCATION_ARGS(offset)); \
591596
} STMT_END
592597

593598
#define ckWARNdep(loc,m) STMT_START { \
594599
const IV offset = loc - RExC_precomp; \
595-
Perl_ck_warner_d(aTHX_ packWARN(WARN_DEPRECATED), \
600+
__ASSERT_(PASS2) Perl_ck_warner_d(aTHX_ packWARN(WARN_DEPRECATED), \
596601
m REPORT_LOCATION, \
597602
REPORT_LOCATION_ARGS(offset)); \
598603
} STMT_END
599604

600605
#define ckWARNregdep(loc,m) STMT_START { \
601606
const IV offset = loc - RExC_precomp; \
602-
Perl_ck_warner_d(aTHX_ packWARN2(WARN_DEPRECATED, WARN_REGEXP), \
607+
__ASSERT_(PASS2) Perl_ck_warner_d(aTHX_ packWARN2(WARN_DEPRECATED, WARN_REGEXP), \
603608
m REPORT_LOCATION, \
604609
REPORT_LOCATION_ARGS(offset)); \
605610
} STMT_END
606611

607612
#define ckWARN2reg_d(loc,m, a1) STMT_START { \
608613
const IV offset = loc - RExC_precomp; \
609-
Perl_ck_warner_d(aTHX_ packWARN(WARN_REGEXP), \
614+
__ASSERT_(PASS2) Perl_ck_warner_d(aTHX_ packWARN(WARN_REGEXP), \
610615
m REPORT_LOCATION, \
611616
a1, REPORT_LOCATION_ARGS(offset)); \
612617
} STMT_END
613618

614619
#define ckWARN2reg(loc, m, a1) STMT_START { \
615620
const IV offset = loc - RExC_precomp; \
616-
Perl_ck_warner(aTHX_ packWARN(WARN_REGEXP), m REPORT_LOCATION, \
621+
__ASSERT_(PASS2) Perl_ck_warner(aTHX_ packWARN(WARN_REGEXP), m REPORT_LOCATION, \
617622
a1, REPORT_LOCATION_ARGS(offset)); \
618623
} STMT_END
619624

620625
#define vWARN3(loc, m, a1, a2) STMT_START { \
621626
const IV offset = loc - RExC_precomp; \
622-
Perl_warner(aTHX_ packWARN(WARN_REGEXP), m REPORT_LOCATION, \
627+
__ASSERT_(PASS2) Perl_warner(aTHX_ packWARN(WARN_REGEXP), m REPORT_LOCATION, \
623628
a1, a2, REPORT_LOCATION_ARGS(offset)); \
624629
} STMT_END
625630

626631
#define ckWARN3reg(loc, m, a1, a2) STMT_START { \
627632
const IV offset = loc - RExC_precomp; \
628-
Perl_ck_warner(aTHX_ packWARN(WARN_REGEXP), m REPORT_LOCATION, \
633+
__ASSERT_(PASS2) Perl_ck_warner(aTHX_ packWARN(WARN_REGEXP), m REPORT_LOCATION, \
629634
a1, a2, REPORT_LOCATION_ARGS(offset)); \
630635
} STMT_END
631636

632637
#define vWARN4(loc, m, a1, a2, a3) STMT_START { \
633638
const IV offset = loc - RExC_precomp; \
634-
Perl_warner(aTHX_ packWARN(WARN_REGEXP), m REPORT_LOCATION, \
639+
__ASSERT_(PASS2) Perl_warner(aTHX_ packWARN(WARN_REGEXP), m REPORT_LOCATION, \
635640
a1, a2, a3, REPORT_LOCATION_ARGS(offset)); \
636641
} STMT_END
637642

638643
#define ckWARN4reg(loc, m, a1, a2, a3) STMT_START { \
639644
const IV offset = loc - RExC_precomp; \
640-
Perl_ck_warner(aTHX_ packWARN(WARN_REGEXP), m REPORT_LOCATION, \
645+
__ASSERT_(PASS2) Perl_ck_warner(aTHX_ packWARN(WARN_REGEXP), m REPORT_LOCATION, \
641646
a1, a2, a3, REPORT_LOCATION_ARGS(offset)); \
642647
} STMT_END
643648

644649
#define vWARN5(loc, m, a1, a2, a3, a4) STMT_START { \
645650
const IV offset = loc - RExC_precomp; \
646-
Perl_warner(aTHX_ packWARN(WARN_REGEXP), m REPORT_LOCATION, \
651+
__ASSERT_(PASS2) Perl_warner(aTHX_ packWARN(WARN_REGEXP), m REPORT_LOCATION, \
647652
a1, a2, a3, a4, REPORT_LOCATION_ARGS(offset)); \
648653
} STMT_END
649654

@@ -9380,7 +9385,7 @@ S_parse_lparen_question_flags(pTHX_ RExC_state_t *pRExC_state)
93809385
/*NOTREACHED*/
93819386
case ONCE_PAT_MOD: /* 'o' */
93829387
case GLOBAL_PAT_MOD: /* 'g' */
9383-
if (SIZE_ONLY && ckWARN(WARN_REGEXP)) {
9388+
if (PASS2 && ckWARN(WARN_REGEXP)) {
93849389
const I32 wflagbit = *RExC_parse == 'o'
93859390
? WASTED_O
93869391
: WASTED_G;
@@ -9400,7 +9405,7 @@ S_parse_lparen_question_flags(pTHX_ RExC_state_t *pRExC_state)
94009405
break;
94019406

94029407
case CONTINUE_PAT_MOD: /* 'c' */
9403-
if (SIZE_ONLY && ckWARN(WARN_REGEXP)) {
9408+
if (PASS2 && ckWARN(WARN_REGEXP)) {
94049409
if (! (wastedflags & WASTED_C) ) {
94059410
wastedflags |= WASTED_GC;
94069411
/* diag_listed_as: Useless (?-%s) - don't use /%s modifier in regex; marked by <-- HERE in m/%s/ */
@@ -9415,7 +9420,7 @@ S_parse_lparen_question_flags(pTHX_ RExC_state_t *pRExC_state)
94159420
break;
94169421
case KEEPCOPY_PAT_MOD: /* 'p' */
94179422
if (flagsp == &negflags) {
9418-
if (SIZE_ONLY)
9423+
if (PASS2)
94199424
ckWARNreg(RExC_parse + 1,"Useless use of (?-p)");
94209425
} else {
94219426
*flagsp |= RXf_PMf_KEEPCOPY;
@@ -10547,7 +10552,6 @@ S_regpiece(pTHX_ RExC_state_t *pRExC_state, I32 *flagp, U32 depth)
1054710552
if (max < min) { /* If can't match, warn and optimize to fail
1054810553
unconditionally */
1054910554
if (SIZE_ONLY) {
10550-
ckWARNreg(RExC_parse, "Quantifier {n,m} with n > m can't match");
1055110555

1055210556
/* We can't back off the size because we have to reserve
1055310557
* enough space for all the things we are about to throw
@@ -10556,6 +10560,7 @@ S_regpiece(pTHX_ RExC_state_t *pRExC_state, I32 *flagp, U32 depth)
1055610560
RExC_size = PREVOPER(RExC_size) - regarglen[(U8)OPFAIL];
1055710561
}
1055810562
else {
10563+
ckWARNreg(RExC_parse, "Quantifier {n,m} with n > m can't match");
1055910564
RExC_emit = orig_emit;
1056010565
}
1056110566
ret = reg_node(pRExC_state, OPFAIL);
@@ -10565,7 +10570,7 @@ S_regpiece(pTHX_ RExC_state_t *pRExC_state, I32 *flagp, U32 depth)
1056510570
&& RExC_parse < RExC_end
1056610571
&& (*RExC_parse == '?' || *RExC_parse == '+'))
1056710572
{
10568-
if (SIZE_ONLY) {
10573+
if (PASS2) {
1056910574
ckWARN2reg(RExC_parse + 1,
1057010575
"Useless use of greediness modifier '%c'",
1057110576
*RExC_parse);
@@ -10837,7 +10842,7 @@ S_grok_bslash_N(pTHX_ RExC_state_t *pRExC_state, regnode** node_p,
1083710842
*node_p = reg_node(pRExC_state,NOTHING);
1083810843
}
1083910844
else if (in_char_class) {
10840-
if (SIZE_ONLY && in_char_class) {
10845+
if (PASS2 && in_char_class) {
1084110846
if (strict) {
1084210847
RExC_parse++; /* Position after the "}" */
1084310848
vFAIL("Zero length \\N{}");
@@ -11430,7 +11435,7 @@ S_regatom(pTHX_ RExC_state_t *pRExC_state, I32 *flagp, U32 depth)
1143011435
ret = reg_node(pRExC_state, CANY);
1143111436
RExC_seen |= REG_CANY_SEEN;
1143211437
*flagp |= HASWIDTH|SIMPLE;
11433-
if (SIZE_ONLY) {
11438+
if (PASS2) {
1143411439
ckWARNdep(RExC_parse+1, "\\C is deprecated");
1143511440
}
1143611441
goto finish_meta_pat;
@@ -11930,7 +11935,7 @@ S_regatom(pTHX_ RExC_state_t *pRExC_state, I32 *flagp, U32 depth)
1193011935
bool valid = grok_bslash_o(&p,
1193111936
&result,
1193211937
&error_msg,
11933-
TRUE, /* out warnings */
11938+
PASS2, /* out warnings */
1193411939
FALSE, /* not strict */
1193511940
TRUE, /* Output warnings
1193611941
for non-
@@ -11959,7 +11964,7 @@ S_regatom(pTHX_ RExC_state_t *pRExC_state, I32 *flagp, U32 depth)
1195911964
bool valid = grok_bslash_x(&p,
1196011965
&result,
1196111966
&error_msg,
11962-
TRUE, /* out warnings */
11967+
PASS2, /* out warnings */
1196311968
FALSE, /* not strict */
1196411969
TRUE, /* Output warnings
1196511970
for non-
@@ -11982,7 +11987,7 @@ S_regatom(pTHX_ RExC_state_t *pRExC_state, I32 *flagp, U32 depth)
1198211987
}
1198311988
case 'c':
1198411989
p++;
11985-
ender = grok_bslash_c(*p++, SIZE_ONLY);
11990+
ender = grok_bslash_c(*p++, PASS2);
1198611991
break;
1198711992
case '8': case '9': /* must be a backreference */
1198811993
--p;
@@ -12021,7 +12026,7 @@ S_regatom(pTHX_ RExC_state_t *pRExC_state, I32 *flagp, U32 depth)
1202112026
REQUIRE_UTF8;
1202212027
}
1202312028
p += numlen;
12024-
if (SIZE_ONLY /* like \08, \178 */
12029+
if (PASS2 /* like \08, \178 */
1202512030
&& numlen < 3
1202612031
&& p < RExC_end
1202712032
&& isDIGIT(*p) && ckWARN(WARN_REGEXP))
@@ -12038,7 +12043,7 @@ S_regatom(pTHX_ RExC_state_t *pRExC_state, I32 *flagp, U32 depth)
1203812043
if (! RExC_override_recoding) {
1203912044
SV* enc = PL_encoding;
1204012045
ender = reg_recode((const char)(U8)ender, &enc);
12041-
if (!enc && SIZE_ONLY)
12046+
if (!enc && PASS2)
1204212047
ckWARNreg(p, "Invalid escape in the specified encoding");
1204312048
REQUIRE_UTF8;
1204412049
}
@@ -12772,16 +12777,17 @@ S_handle_regex_sets(pTHX_ RExC_state_t *pRExC_state, SV** return_invlist,
1277212777
* upon an unescaped ']' that isn't one ending a regclass. To do both
1277312778
* these things, we need to realize that something preceded by a backslash
1277412779
* is escaped, so we have to keep track of backslashes */
12775-
if (SIZE_ONLY) {
12776-
UV depth = 0; /* how many nested (?[...]) constructs */
12777-
12780+
if (PASS2) {
1277812781
Perl_ck_warner_d(aTHX_
1277912782
packWARN(WARN_EXPERIMENTAL__REGEX_SETS),
1278012783
"The regex_sets feature is experimental" REPORT_LOCATION,
1278112784
UTF8fARG(UTF, (RExC_parse - RExC_precomp), RExC_precomp),
1278212785
UTF8fARG(UTF,
1278312786
RExC_end - RExC_start - (RExC_parse - RExC_precomp),
1278412787
RExC_precomp + (RExC_parse - RExC_precomp)));
12788+
}
12789+
else {
12790+
UV depth = 0; /* how many nested (?[...]) constructs */
1278512791

1278612792
while (RExC_parse < RExC_end) {
1278712793
SV* current = NULL;
@@ -13282,7 +13288,9 @@ S_add_above_Latin1_folds(pTHX_ RExC_state_t *pRExC_state, const U8 cp, SV** invl
1328213288
default:
1328313289
/* Use deprecated warning to increase the chances of this being
1328413290
* output */
13285-
ckWARN2reg_d(RExC_parse, "Perl folding rules are not up-to-date for 0x%02X; please use the perlbug utility to report;", cp);
13291+
if (PASS2) {
13292+
ckWARN2reg_d(RExC_parse, "Perl folding rules are not up-to-date for 0x%02X; please use the perlbug utility to report;", cp);
13293+
}
1328613294
break;
1328713295
}
1328813296
}
@@ -13750,8 +13758,8 @@ S_regclass(pTHX_ RExC_state_t *pRExC_state, I32 *flagp, U32 depth,
1375013758
bool valid = grok_bslash_o(&RExC_parse,
1375113759
&value,
1375213760
&error_msg,
13753-
SIZE_ONLY, /* warnings in pass
13754-
1 only */
13761+
PASS2, /* warnings only in
13762+
pass 2 */
1375513763
strict,
1375613764
silence_non_portable,
1375713765
UTF);
@@ -13770,7 +13778,7 @@ S_regclass(pTHX_ RExC_state_t *pRExC_state, I32 *flagp, U32 depth,
1377013778
bool valid = grok_bslash_x(&RExC_parse,
1377113779
&value,
1377213780
&error_msg,
13773-
TRUE, /* Output warnings */
13781+
PASS2, /* Output warnings */
1377413782
strict,
1377513783
silence_non_portable,
1377613784
UTF);
@@ -13782,7 +13790,7 @@ S_regclass(pTHX_ RExC_state_t *pRExC_state, I32 *flagp, U32 depth,
1378213790
goto recode_encoding;
1378313791
break;
1378413792
case 'c':
13785-
value = grok_bslash_c(*RExC_parse++, SIZE_ONLY);
13793+
value = grok_bslash_c(*RExC_parse++, PASS2);
1378613794
break;
1378713795
case '0': case '1': case '2': case '3': case '4':
1378813796
case '5': case '6': case '7':
@@ -13822,7 +13830,7 @@ S_regclass(pTHX_ RExC_state_t *pRExC_state, I32 *flagp, U32 depth,
1382213830
if (strict) {
1382313831
vFAIL("Invalid escape in the specified encoding");
1382413832
}
13825-
else if (SIZE_ONLY) {
13833+
else if (PASS2) {
1382613834
ckWARNreg(RExC_parse,
1382713835
"Invalid escape in the specified encoding");
1382813836
}

t/lib/warnings/regcomp

Lines changed: 24 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,9 +1,32 @@
11
regcomp.c These tests have been moved to t/re/reg_mesg.t
2-
except for those that explicitly test line numbers.
2+
except for those that explicitly test line numbers
3+
and those that don't have a <-- HERE in them.
34

45
__END__
56
use warnings 'regexp';
67
$r=qr/(??{ q"\\b+" })/;
78
"a" =~ /a$r/; # warning should come from this line
89
EXPECT
910
\b+ matches null string many times in regex; marked by <-- HERE in m/\b+ <-- HERE / at - line 3.
11+
########
12+
# regcomp.c
13+
use warnings 'digit' ;
14+
my $a = qr/\o{1238456}\x{100}/;
15+
my $a = qr/[\o{6548321}]\x{100}/;
16+
no warnings 'digit' ;
17+
my $a = qr/\o{1238456}\x{100}/;
18+
my $a = qr/[\o{6548321}]\x{100}/;
19+
EXPECT
20+
Non-octal character '8'. Resolved as "\o{123}" at - line 3.
21+
Non-octal character '8'. Resolved as "\o{654}" at - line 4.
22+
########
23+
# regcomp.c.c
24+
use warnings;
25+
$a = qr/\c,/;
26+
$a = qr/[\c,]/;
27+
no warnings 'syntax';
28+
$a = qr/\c,/;
29+
$a = qr/[\c,]/;
30+
EXPECT
31+
"\c," is more clearly written simply as "l" at - line 3.
32+
"\c," is more clearly written simply as "l" at - line 4.

0 commit comments

Comments
 (0)