From 5e2b25980249c05bc1c22225e3a359a3a3a54350 Mon Sep 17 00:00:00 2001 From: Richard Leach Date: Thu, 15 May 2025 16:36:56 +0000 Subject: [PATCH 1/2] SvPV_shrink_to_cur: don't be unrealistic, do include space for COW The `SvPV_shrink_to_cur` macro shrinks an allocation to `SvCUR(sv) + 1`, which does not include an additional byte for Copy-On-Write (COW). GH#22116 - a902d92a78262a0fd111789742f9c9e2a7fa2f42 - short-circuited constant folding on CONST OPs, as this should be unnecessary. However, Dave Mitchell noticed that it had the inadvertent effect of disabling COW on SVs holding UTF8 string literals (e.g. `"\x{100}abcd"`). When a CONST OP is created, `Perl_ck_svconst` should mark its SV as being COW-able. But SVs built via `S_scan_const`, when that has called `SvPV_shrink_to_cur`, have resulting `SvLEN(sv)` values that fail the `SvCANCOW(sv)` test. Previously, constant folding had the effect of copying the literal into a buffer large enough for COW. This commit modifies `SvPV_shrink_to_cur` to: allocate an additional byte to allow for subsequent COWing directly. The macro has also been modified such that: * No reallocation will be attempted if the saving is unrealistically small, or otherwise no saving is likely to be achievable. * The intended saving is compared with SvLEN(sv), which enables checks at call sites to be simplified. --- sv.h | 16 +++++++++++++--- 1 file changed, 13 insertions(+), 3 deletions(-) diff --git a/sv.h b/sv.h index 449091e85375..05a83638cacc 100644 --- a/sv.h +++ b/sv.h @@ -1590,9 +1590,19 @@ L> before calling this. =cut */ -#define SvPV_shrink_to_cur(sv) STMT_START { \ - const STRLEN _lEnGtH = SvCUR(sv) + 1; \ - SvPV_renew(sv, _lEnGtH); \ +/* Notes: Ensure the buffer is big enough to be COWed in the future, so + + 1 for the trailing null byte + 1 for the COW count. + * Don't try to shrink if the saving is too small to be realistic (given + * the realities of allocators) or noticeable. + * Don't try to shrink smaller than PERL_STRLEN_NEW_MIN, as that is + * unlikely to result in a smaller allocation. +*/ + +#define SvPV_shrink_to_cur(sv) STMT_START { \ + const STRLEN _lEnGtH = SvCUR(sv) + 2; \ + if ((SvLEN(sv) > _lEnGtH + PTRSIZE) && \ + ( _lEnGtH >= PERL_STRLEN_NEW_MIN) ) \ + SvPV_renew(sv, _lEnGtH); \ } STMT_END /* From 5234f32919e65ab3b447ca4e000df7a58e9ddee9 Mon Sep 17 00:00:00 2001 From: Richard Leach Date: Wed, 21 May 2025 21:30:09 +0000 Subject: [PATCH 2/2] Simplify SvPV_shrink_to_cur() usage in toke.c The previous commit modified the definition of `SvPV_shrink_to_cur` so that it does not attempt to reallocate to make unrealistic savings. That made some of the condition checks in toke.c redundant, so this commit removes those. --- toke.c | 13 +++---------- 1 file changed, 3 insertions(+), 10 deletions(-) diff --git a/toke.c b/toke.c index 5759c7890d32..671c4af209a2 100644 --- a/toke.c +++ b/toke.c @@ -4422,9 +4422,7 @@ S_scan_const(pTHX_ char *start) } /* shrink the sv if we allocated more than we used */ - if (SvCUR(sv) + 5 < SvLEN(sv)) { - SvPV_shrink_to_cur(sv); - } + SvPV_shrink_to_cur(sv); /* return the substring (via pl_yylval) only if we parsed anything */ if (s > start) { @@ -11348,9 +11346,7 @@ S_scan_heredoc(pTHX_ char *s) SvREFCNT_dec_NN(newstr); } - if (SvCUR(tmpstr) + 5 < SvLEN(tmpstr)) { - SvPV_shrink_to_cur(tmpstr); - } + SvPV_shrink_to_cur(tmpstr); if (!IN_BYTES) { if (UTF && is_utf8_string((U8*)SvPVX_const(tmpstr), SvCUR(tmpstr))) @@ -11877,10 +11873,7 @@ Perl_scan_str(pTHX_ char *start, int keep_bracketed_quoted, int keep_delims, int PL_parser->herelines = herelines; /* if we allocated too much space, give some back */ - if (SvCUR(sv) + 5 < SvLEN(sv)) { - SvLEN_set(sv, SvCUR(sv) + 1); - SvPV_shrink_to_cur(sv); - } + SvPV_shrink_to_cur(sv); /* decide whether this is the first or second quoted string we've read for this op