Skip to content

Commit 5c1648b

Browse files
author
Father Chrysostomos
committed
Fix infinite loop with $tied =~ s/non-utf8/utf8/
Commit 3e462cd fixed bug #41530 (s/non-utf8/utf8/ was not working properly at all) by upgrading the target and redoing the substitution if the replacement was utf8 and the target was not. Commit c95ca9b fixed one problem with it calling get-magic too many times, by checking whether the upgrade caused a string realloca- tion and only then redoing the substitution. But it only fixed it when magic returns a pure ASCII string. Redoing the substitution meant going back to where the target was initially stringified and starting again. That meant calling get- magic again. So in those cases where magic returned something other than a UTF8 or pure ASCII string the substitution restarted and magic would be trig- gered again, possibly resulting in infinite loops (because it would have to be upgraded again, resulting a reallocation, and a restart). This happens with: • Latin-1 strings • Copy-on-write non-UTF8 strings • References that stringify without UTF8 c95ca9b also added SvPVX without checking first that it is SvPVX- able, so a typeglob causes an assertion failure. It turned out that there were also two other places in pp_subst that were calling FETCH a second time (the tests I added for the looping/ assertion bugs found this), so I changed them, too.
1 parent 0efd047 commit 5c1648b

File tree

2 files changed

+38
-5
lines changed

2 files changed

+38
-5
lines changed

pp_hot.c

Lines changed: 6 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -2101,8 +2101,8 @@ PP(pp_subst)
21012101
Perl_croak_no_modify(aTHX);
21022102
PUTBACK;
21032103

2104-
setup_match:
21052104
s = SvPV_mutable(TARG, len);
2105+
setup_match:
21062106
if (!SvPOKp(TARG) || SvTYPE(TARG) == SVt_PVGV || SvVOK(TARG))
21072107
force_on_match = 1;
21082108

@@ -2179,13 +2179,15 @@ PP(pp_subst)
21792179
* http://www.nntp.perl.org/group/perl.perl5.porters/2010/04/msg158809.html
21802180
*/
21812181
if (DO_UTF8(dstr) && ! DO_UTF8(TARG)) {
2182-
char * const orig_pvx = SvPVX(TARG);
2182+
char * const orig_pvx = SvPOKp(TARG) ? SvPVX(TARG) : NULL;
21832183
const STRLEN new_len = sv_utf8_upgrade_nomg(TARG);
21842184

21852185
/* If the lengths are the same, the pattern contains only
21862186
* invariants, can keep going; otherwise, various internal markers
21872187
* could be off, so redo */
21882188
if (new_len != len || orig_pvx != SvPVX(TARG)) {
2189+
/* Do this here, to avoid multiple FETCHes. */
2190+
s = SvPV_nomg(TARG, len);
21892191
goto setup_match;
21902192
}
21912193
}
@@ -2231,7 +2233,7 @@ PP(pp_subst)
22312233
#endif
22322234
if (force_on_match) {
22332235
force_on_match = 0;
2234-
s = SvPV_force(TARG, len);
2236+
s = SvPV_force_nomg(TARG, len);
22352237
goto force_it;
22362238
}
22372239
d = s;
@@ -2315,7 +2317,7 @@ PP(pp_subst)
23152317
cases where it would be viable to drop into the copy code. */
23162318
TARG = sv_2mortal(newSVsv(TARG));
23172319
}
2318-
s = SvPV_force(TARG, len);
2320+
s = SvPV_force_nomg(TARG, len);
23192321
goto force_it;
23202322
}
23212323
#ifdef PERL_OLD_COPY_ON_WRITE

t/re/subst.t

Lines changed: 32 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -7,7 +7,7 @@ BEGIN {
77
}
88

99
require './test.pl';
10-
plan( tests => 190 );
10+
plan( tests => 200 );
1111

1212
$_ = 'david';
1313
$a = s/david/rules/r;
@@ -746,6 +746,8 @@ fresh_perl_is( '$_="abcef"; s/bc|(.)\G(.)/$1 ? "[$1-$2]" : "XX"/ge; print' => 'a
746746
# when substituted with a UTF8 replacement string, due to
747747
# magic getting called multiple times, and pointers now pointing
748748
# to stale/freed strings
749+
# The original fix for this caused infinite loops for non- or cow-
750+
# strings, so we test those, too.
749751
package FOO;
750752
my $fc;
751753
sub TIESCALAR { bless [ "abcdefgh" ] }
@@ -757,6 +759,35 @@ fresh_perl_is( '$_="abcef"; s/bc|(.)\G(.)/$1 ? "[$1-$2]" : "XX"/ge; print' => 'a
757759
$s =~ s/..../\x{101}/;
758760
::is($fc, 1, "tied UTF8 stuff FETCH count");
759761
::is("$s", "\x{101}efgh", "tied UTF8 stuff");
762+
763+
::watchdog(300);
764+
$fc = 0;
765+
$s = *foo;
766+
$s =~ s/..../\x{101}/;
767+
::is($fc, 1, '$tied_glob =~ s/non-utf8/utf8/ fetch count');
768+
::is("$s", "\x{101}::foo", '$tied_glob =~ s/non-utf8/utf8/ result');
769+
$fc = 0;
770+
$s = *foo;
771+
$s =~ s/(....)/\x{101}/g;
772+
::is($fc, 1, '$tied_glob =~ s/(non-utf8)/utf8/g fetch count');
773+
::is("$s", "\x{101}\x{101}o",
774+
'$tied_glob =~ s/(non-utf8)/utf8/g result');
775+
$fc = 0;
776+
$s = "\xff\xff\xff\xff\xff";
777+
$s =~ s/..../\x{101}/;
778+
::is($fc, 1, '$tied_latin1 =~ s/non-utf8/utf8/ fetch count');
779+
::is("$s", "\x{101}\xff", '$tied_latin1 =~ s/non-utf8/utf8/ result');
780+
$fc = 0;
781+
{ package package_name; tied($s)->[0] = __PACKAGE__ };
782+
$s =~ s/..../\x{101}/;
783+
::is($fc, 1, '$tied_cow =~ s/non-utf8/utf8/ fetch count');
784+
::is("$s", "\x{101}age_name", '$tied_cow =~ s/non-utf8/utf8/ result');
785+
$fc = 0;
786+
$s = \1;
787+
$s =~ s/..../\x{101}/;
788+
::is($fc, 1, '$tied_ref =~ s/non-utf8/utf8/ fetch count');
789+
::like("$s", qr/^\x{101}AR\(0x.*\)\z/,
790+
'$tied_ref =~ s/non-utf8/utf8/ result');
760791
}
761792

762793
# RT #97954

0 commit comments

Comments
 (0)