Skip to content

Commit e4be969

Browse files
committed
join: save the delimiter string before anything magical happens to it
This code had a few problems: - changes to the content of delim from set or overload magic could result in the separator between elements changing during the process of the join. - changes to the content of delim which allocated a new PVX resulted in access to freed memory - changes to the flags of delim, the UTF-8 flag in particular, could result in an invalid joined string, either mojibake or an invalidly encoded upgraded string To avoid that, we copy the separator, either into a local buffer if it's large enough, or an allocated buffer, and save the flag we use, to prevent changes to the delim SV from changing or invalidating the delimpv value. Fixes #21458 and some similar problems.
1 parent 6a3cf23 commit e4be969

File tree

2 files changed

+27
-7
lines changed

2 files changed

+27
-7
lines changed

doop.c

+27-3
Original file line numberDiff line numberDiff line change
@@ -633,6 +633,13 @@ Perl_do_trans(pTHX_ SV *sv)
633633
}
634634
}
635635

636+
#ifdef DEBUGGING
637+
/* make it small to exercise the logic */
638+
# define JOIN_DELIM_BUFSIZE 2
639+
#else
640+
# define JOIN_DELIM_BUFSIZE 40
641+
#endif
642+
636643
/*
637644
=for apidoc_section $string
638645
=for apidoc do_join
@@ -662,10 +669,27 @@ Perl_do_join(pTHX_ SV *sv, SV *delim, SV **mark, SV **sp)
662669
SSize_t items = sp - mark;
663670
STRLEN len;
664671
STRLEN delimlen;
665-
const char * const delims = SvPV_const(delim, delimlen);
672+
const char * delimpv = SvPV_const(delim, delimlen);
673+
char delim_buf[JOIN_DELIM_BUFSIZE];
674+
bool delim_do_utf8 = DO_UTF8(delim);
666675

667676
PERL_ARGS_ASSERT_DO_JOIN;
668677

678+
if (items >= 2) {
679+
/* Make a copy of the delim, since G or A magic may modify the delim SV.
680+
Use a local buffer if possible to avoid the cost of allocation and
681+
clean up.
682+
*/
683+
if (delimlen <= JOIN_DELIM_BUFSIZE) {
684+
Copy(delimpv, delim_buf, delimlen, char);
685+
delimpv = delim_buf;
686+
}
687+
else {
688+
delimpv = savepvn(delimpv, delimlen);
689+
SAVEFREEPV(delimpv);
690+
}
691+
}
692+
669693
mark++;
670694
len = (items > 0 ? (delimlen * (items - 1) ) : 0);
671695
SvUPGRADE(sv, SVt_PV);
@@ -699,11 +723,11 @@ Perl_do_join(pTHX_ SV *sv, SV *delim, SV **mark, SV **sp)
699723
}
700724

701725
if (delimlen) {
702-
const U32 delimflag = DO_UTF8(delim) ? SV_CATUTF8 : SV_CATBYTES;
726+
const U32 delimflag = delim_do_utf8 ? SV_CATUTF8 : SV_CATBYTES;
703727
for (; items > 0; items--,mark++) {
704728
STRLEN len;
705729
const char *s;
706-
sv_catpvn_flags(sv,delims,delimlen,delimflag);
730+
sv_catpvn_flags(sv, delimpv, delimlen, delimflag);
707731
s = SvPV_const(*mark,len);
708732
sv_catpvn_flags(sv,s,len,
709733
DO_UTF8(*mark) ? SV_CATUTF8 : SV_CATBYTES);

t/op/join.t

-4
Original file line numberDiff line numberDiff line change
@@ -173,15 +173,12 @@ isnt $_[1], $_[0],
173173
is( $SM::fetched, 0, 'FETCH not called' );
174174

175175
tie $t, "SM";
176-
{ local $TODO = "separator keeps being FETCHed";
177176
is( join( $t, "a", $t, "b", $t, "c" ),
178177
'a474b4104c', 'tied separator also in the join arguments' );
179-
}
180178
is( $SM::fetched, 3, 'FETCH called 1 + 2 times' );
181179
}
182180
{
183181
# see GH #21484
184-
local $TODO = "changes to delim have an effect";
185182
my $expect = "a\x{100}x\x{100}b\n";
186183
utf8::encode($expect);
187184
fresh_perl_is(<<'CODE', $expect, {}, "modifications delim from magic should be ignored");
@@ -199,7 +196,6 @@ CODE
199196
{
200197
# see GH #21484
201198
my $expect = "x\x{100}a\n";
202-
local $TODO = "modifications to delim PVX caused UB";
203199
utf8::encode($expect); # fresh_perl() does bytes
204200
fresh_perl_is(<<'CODE', $expect, {}, "modifications to delim PVX shouldn't crash");
205201
my $n = 1;

0 commit comments

Comments
 (0)