Skip to content

Commit 22b433e

Browse files
committed
PATCH [perl #123562] Regexp-matching "hangs"
The regex engine got into an infinite loop because of the malformation. It is trying to back-up over a sequence of UTF-8 continuation bytes. But the character just before the sequence should be a start byte. If not, there is a malformation. I added a test to croak if that isn't the case so that it doesn't just infinitely loop. I did this also in the similar areas of regexec.c. Comments long ago added to the code suggested that we check for malformations in the vicinity of the new tests. But that was never done. These new tests should be good enough to prevent looping, anyway.
1 parent 545badf commit 22b433e

File tree

2 files changed

+29
-1
lines changed

2 files changed

+29
-1
lines changed

regexec.c

Lines changed: 12 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -8761,6 +8761,10 @@ S_reghop3(U8 *s, SSize_t off, const U8* lim)
87618761
if (UTF8_IS_CONTINUED(*s)) {
87628762
while (s > lim && UTF8_IS_CONTINUATION(*s))
87638763
s--;
8764+
if (! UTF8_IS_START(*s)) {
8765+
dTHX;
8766+
Perl_croak(aTHX_ "Malformed UTF-8 character (fatal)");
8767+
}
87648768
}
87658769
/* XXX could check well-formedness here */
87668770
}
@@ -8785,6 +8789,10 @@ S_reghop4(U8 *s, SSize_t off, const U8* llim, const U8* rlim)
87858789
if (UTF8_IS_CONTINUED(*s)) {
87868790
while (s > llim && UTF8_IS_CONTINUATION(*s))
87878791
s--;
8792+
if (! UTF8_IS_START(*s)) {
8793+
dTHX;
8794+
Perl_croak(aTHX_ "Malformed UTF-8 character (fatal)");
8795+
}
87888796
}
87898797
/* XXX could check well-formedness here */
87908798
}
@@ -8814,6 +8822,10 @@ S_reghopmaybe3(U8* s, SSize_t off, const U8* lim)
88148822
if (UTF8_IS_CONTINUED(*s)) {
88158823
while (s > lim && UTF8_IS_CONTINUATION(*s))
88168824
s--;
8825+
if (! UTF8_IS_START(*s)) {
8826+
dTHX;
8827+
Perl_croak(aTHX_ "Malformed UTF-8 character (fatal)");
8828+
}
88178829
}
88188830
/* XXX could check well-formedness here */
88198831
}

t/re/pat.t

Lines changed: 17 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -23,7 +23,7 @@ BEGIN {
2323
skip_all_without_unicode_tables();
2424
}
2525

26-
plan tests => 774; # Update this when adding/deleting tests.
26+
plan tests => 775; # Update this when adding/deleting tests.
2727

2828
run_tests() unless caller;
2929

@@ -1675,6 +1675,22 @@ EOP
16751675
"test that we handle things like m/\\888888888/ without infinite loops" );
16761676
}
16771677

1678+
{ # Test that we handle some malformed UTF-8 without looping [perl
1679+
# #123562]
1680+
1681+
my $code='
1682+
BEGIN{require q(test.pl);}
1683+
use Encode qw(_utf8_on);
1684+
my $malformed = "a\x80\n";
1685+
_utf8_on($malformed);
1686+
watchdog(3);
1687+
$malformed =~ /(\n\r|\r)$/;
1688+
print q(No infinite loop here!);
1689+
';
1690+
fresh_perl_like($code, qr/Malformed UTF-8 character/, {},
1691+
"test that we handle some UTF-8 malformations without looping" );
1692+
}
1693+
16781694
{
16791695
# [perl #123843] hits SEGV trying to compile this pattern
16801696
my $match;

0 commit comments

Comments
 (0)