Skip to content

Commit f746176

Browse files
Zeframrgs
Zefram
authored andcommitted
Bareword sub lookups
Attached is a patch that changes how the tokeniser looks up subroutines, when they're referenced by a bareword, for prototype and const-sub purposes. Formerly, it has looked up bareword subs directly in the package, which is contrary to the way the generated op tree looks up the sub, via an rv2cv op. The patch makes the tokeniser generate the rv2cv op earlier, and dig around in that. The motivation for this is to allow modules to hook the rv2cv op creation, to affect the name->subroutine lookup process. Currently, such hooking affects op execution as intended, but everything goes wrong with a bareword ref where the tokeniser looks at some unrelated CV, or a blank space, in the package. With the patch in place, an rv2cv hook correctly affects the tokeniser and therefore the prototype-based aspects of parsing. The patch also changes ck_subr (which applies the argument context and checking parts of prototype behaviour) to handle subs referenced by an RV const op inside the rv2cv, where formerly it would only handle a gv op inside the rv2cv. This is to support the most likely kind of modified rv2cv op. The attached patch is the resulting revised version of the bareword sub patch. It incorporates the original patch (allowing rv2cv op hookers to control prototype processing), the GV-downgrading addition, and a mention in perldelta.
1 parent 6bd7445 commit f746176

File tree

8 files changed

+141
-44
lines changed

8 files changed

+141
-44
lines changed

embed.fnc

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -365,6 +365,7 @@ Ap |void |gv_fullname4 |NN SV* sv|NN const GV* gv|NULLOK const char* prefix|bool
365365
pMox |GP * |newGP |NN GV *const gv
366366
Ap |void |gv_init |NN GV* gv|NULLOK HV* stash|NN const char* name|STRLEN len|int multi
367367
Ap |void |gv_name_set |NN GV* gv|NN const char *name|U32 len|U32 flags
368+
Apd |void |gv_try_downgrade|NN GV* gv
368369
Apd |HV* |gv_stashpv |NN const char* name|I32 flags
369370
Apd |HV* |gv_stashpvn |NN const char* name|U32 namelen|I32 flags
370371
Apd |HV* |gv_stashsv |NN SV* sv|I32 flags

embed.h

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -291,6 +291,7 @@
291291
#define gv_fullname4 Perl_gv_fullname4
292292
#define gv_init Perl_gv_init
293293
#define gv_name_set Perl_gv_name_set
294+
#define gv_try_downgrade Perl_gv_try_downgrade
294295
#define gv_stashpv Perl_gv_stashpv
295296
#define gv_stashpvn Perl_gv_stashpvn
296297
#define gv_stashsv Perl_gv_stashsv
@@ -2654,6 +2655,7 @@
26542655
#endif
26552656
#define gv_init(a,b,c,d,e) Perl_gv_init(aTHX_ a,b,c,d,e)
26562657
#define gv_name_set(a,b,c,d) Perl_gv_name_set(aTHX_ a,b,c,d)
2658+
#define gv_try_downgrade(a) Perl_gv_try_downgrade(aTHX_ a)
26572659
#define gv_stashpv(a,b) Perl_gv_stashpv(aTHX_ a,b)
26582660
#define gv_stashpvn(a,b,c) Perl_gv_stashpvn(aTHX_ a,b,c)
26592661
#define gv_stashsv(a,b) Perl_gv_stashsv(aTHX_ a,b)

global.sym

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -148,6 +148,7 @@ Perl_gv_fullname3
148148
Perl_gv_fullname4
149149
Perl_gv_init
150150
Perl_gv_name_set
151+
Perl_gv_try_downgrade
151152
Perl_gv_stashpv
152153
Perl_gv_stashpvn
153154
Perl_gv_stashsv

gv.c

Lines changed: 47 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -2371,6 +2371,53 @@ Perl_gv_name_set(pTHX_ GV *gv, const char *name, U32 len, U32 flags)
23712371
GvNAME_HEK(gv) = share_hek(name, len, hash);
23722372
}
23732373

2374+
/*
2375+
=for apidoc gv_try_downgrade
2376+
2377+
If C<gv> is a typeglob containing only a constant sub, and is only
2378+
referenced from its package, and both the typeglob and the sub are
2379+
sufficiently ordinary, replace the typeglob (in the package) with a
2380+
placeholder that more compactly represents the same thing. This is meant
2381+
to be used when a placeholder has been upgraded, most likely because
2382+
something wanted to look at a proper code object, and it has turned out
2383+
to be a constant sub to which a proper reference is no longer required.
2384+
2385+
=cut
2386+
*/
2387+
2388+
void
2389+
Perl_gv_try_downgrade(pTHX_ GV *gv)
2390+
{
2391+
HV *stash;
2392+
CV *cv;
2393+
HEK *namehek;
2394+
SV **gvp;
2395+
PERL_ARGS_ASSERT_GV_TRY_DOWNGRADE;
2396+
if (SvREFCNT(gv) == 1 && SvTYPE(gv) == SVt_PVGV && !SvFAKE(gv) &&
2397+
!SvOBJECT(gv) && !SvMAGICAL(gv) && !SvREADONLY(gv) &&
2398+
isGV_with_GP(gv) && GvGP(gv) &&
2399+
GvMULTI(gv) && !GvINTRO(gv) && GvREFCNT(gv) == 1 &&
2400+
!GvSV(gv) && !GvAV(gv) && !GvHV(gv) && !GvIOp(gv) && !GvFORM(gv) &&
2401+
GvEGV(gv) == gv && (stash = GvSTASH(gv)) && (cv = GvCV(gv)) &&
2402+
!SvOBJECT(cv) && !SvMAGICAL(cv) && !SvREADONLY(cv) &&
2403+
CvSTASH(cv) == stash && CvGV(cv) == gv &&
2404+
CvCONST(cv) && !CvMETHOD(cv) && !CvLVALUE(cv) && !CvUNIQUE(cv) &&
2405+
!CvNODEBUG(cv) && !CvCLONE(cv) && !CvCLONED(cv) && !CvANON(cv) &&
2406+
(namehek = GvNAME_HEK(gv)) &&
2407+
(gvp = hv_fetch(stash, HEK_KEY(namehek),
2408+
HEK_LEN(namehek)*(HEK_UTF8(namehek) ? -1 : 1), 0)) &&
2409+
*gvp == (SV*)gv) {
2410+
SV *value = SvREFCNT_inc(CvXSUBANY(cv).any_ptr);
2411+
SvREFCNT(gv) = 0;
2412+
sv_clear((SV*)gv);
2413+
SvREFCNT(gv) = 1;
2414+
SvFLAGS(gv) = SVt_IV|SVf_ROK;
2415+
SvANY(gv) = (XPVGV*)((char*)&(gv->sv_u.svu_iv) -
2416+
STRUCT_OFFSET(XPVIV, xiv_iv));
2417+
SvRV_set(gv, value);
2418+
}
2419+
}
2420+
23742421
/*
23752422
* Local variables:
23762423
* c-indentation-style: bsd

op.c

Lines changed: 34 additions & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -570,6 +570,13 @@ Perl_op_clear(pTHX_ OP *o)
570570
case OP_AELEMFAST:
571571
if (! (o->op_type == OP_AELEMFAST && o->op_flags & OPf_SPECIAL)) {
572572
/* not an OP_PADAV replacement */
573+
GV *gv = (o->op_type == OP_GV || o->op_type == OP_GVSV)
574+
#ifdef USE_ITHREADS
575+
&& PL_curpad
576+
#endif
577+
? cGVOPo_gv : NULL;
578+
if (gv)
579+
SvREFCNT_inc(gv);
573580
#ifdef USE_ITHREADS
574581
if (cPADOPo->op_padix > 0) {
575582
/* No GvIN_PAD_off(cGVOPo_gv) here, because other references
@@ -581,6 +588,12 @@ Perl_op_clear(pTHX_ OP *o)
581588
SvREFCNT_dec(cSVOPo->op_sv);
582589
cSVOPo->op_sv = NULL;
583590
#endif
591+
if (gv) {
592+
int try_downgrade = SvREFCNT(gv) == 2;
593+
SvREFCNT_dec(gv);
594+
if (try_downgrade)
595+
gv_try_downgrade(gv);
596+
}
584597
}
585598
break;
586599
case OP_METHOD_NAMED:
@@ -7945,22 +7958,29 @@ Perl_ck_subr(pTHX_ OP *o)
79457958
o->op_private |= OPpENTERSUB_HASTARG;
79467959
for (cvop = o2; cvop->op_sibling; cvop = cvop->op_sibling) ;
79477960
if (cvop->op_type == OP_RV2CV) {
7948-
SVOP* tmpop;
79497961
o->op_private |= (cvop->op_private & OPpENTERSUB_AMPER);
79507962
op_null(cvop); /* disable rv2cv */
7951-
tmpop = (SVOP*)((UNOP*)cvop)->op_first;
7952-
if (tmpop->op_type == OP_GV && !(o->op_private & OPpENTERSUB_AMPER)) {
7953-
GV *gv = cGVOPx_gv(tmpop);
7954-
cv = GvCVu(gv);
7955-
if (!cv)
7956-
tmpop->op_private |= OPpEARLY_CV;
7957-
else {
7958-
if (SvPOK(cv)) {
7959-
STRLEN len;
7960-
namegv = CvANON(cv) ? gv : CvGV(cv);
7961-
proto = SvPV(MUTABLE_SV(cv), len);
7962-
proto_end = proto + len;
7963-
}
7963+
if (!(o->op_private & OPpENTERSUB_AMPER)) {
7964+
SVOP *tmpop = (SVOP*)((UNOP*)cvop)->op_first;
7965+
GV *gv = NULL;
7966+
switch (tmpop->op_type) {
7967+
case OP_GV: {
7968+
gv = cGVOPx_gv(tmpop);
7969+
cv = GvCVu(gv);
7970+
if (!cv)
7971+
tmpop->op_private |= OPpEARLY_CV;
7972+
} break;
7973+
case OP_CONST: {
7974+
SV *sv = cSVOPx_sv(tmpop);
7975+
if (SvROK(sv) && SvTYPE(SvRV(sv)) == SVt_PVCV)
7976+
cv = (CV*)SvRV(sv);
7977+
} break;
7978+
}
7979+
if (cv && SvPOK(cv)) {
7980+
STRLEN len;
7981+
namegv = gv && CvANON(cv) ? gv : CvGV(cv);
7982+
proto = SvPV(MUTABLE_SV(cv), len);
7983+
proto_end = proto + len;
79647984
}
79657985
}
79667986
}

pod/perl5112delta.pod

Lines changed: 10 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -74,6 +74,16 @@ implements reverse Polish notation arithmetic via pluggable keywords.
7474
This module is mainly used for test purposes, and is not normally
7575
installed, but also serves as an example of how to use the new mechanism.
7676

77+
=head2 Overridable function lookup
78+
79+
Where an extension module hooks the creation of rv2cv ops, to modify
80+
the subroutine lookup process, this now works correctly for bareword
81+
subroutine calls. This means that prototypes on subroutines referenced
82+
this way will be processed correctly. (Previously bareword subroutine
83+
names were initially looked up, for parsing purposes, by an unhookable
84+
mechanism, so extensions could only properly influence subroutine names
85+
that appeared with an C<&> sigil.)
86+
7787
=head1 New Platforms
7888

7989
XXX List any platforms that this version of perl compiles on, that previous

proto.h

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -943,6 +943,11 @@ PERL_CALLCONV void Perl_gv_name_set(pTHX_ GV* gv, const char *name, U32 len, U32
943943
#define PERL_ARGS_ASSERT_GV_NAME_SET \
944944
assert(gv); assert(name)
945945

946+
PERL_CALLCONV void Perl_gv_try_downgrade(pTHX_ GV* gv)
947+
__attribute__nonnull__(pTHX_1);
948+
#define PERL_ARGS_ASSERT_GV_TRY_DOWNGRADE \
949+
assert(gv)
950+
946951
PERL_CALLCONV HV* Perl_gv_stashpv(pTHX_ const char* name, I32 flags)
947952
__attribute__nonnull__(pTHX_1);
948953
#define PERL_ARGS_ASSERT_GV_STASHPV \

toke.c

Lines changed: 41 additions & 30 deletions
Original file line numberDiff line numberDiff line change
@@ -5361,6 +5361,7 @@ Perl_yylex(pTHX)
53615361
SV *sv;
53625362
int pkgname = 0;
53635363
const char lastchar = (PL_bufptr == PL_oldoldbufptr ? 0 : PL_bufptr[-1]);
5364+
OP *rv2cv_op;
53645365
CV *cv;
53655366
#ifdef PERL_MAD
53665367
SV *nextPL_nextwhite = 0;
@@ -5454,19 +5455,29 @@ Perl_yylex(pTHX)
54545455
if (len)
54555456
goto safe_bareword;
54565457

5457-
/* Do the explicit type check so that we don't need to force
5458-
the initialisation of the symbol table to have a real GV.
5459-
Beware - gv may not really be a PVGV, cv may not really be
5460-
a PVCV, (because of the space optimisations that gv_init
5461-
understands) But they're true if for this symbol there is
5462-
respectively a typeglob and a subroutine.
5463-
*/
5464-
cv = gv ? ((SvTYPE(gv) == SVt_PVGV)
5465-
/* Real typeglob, so get the real subroutine: */
5466-
? GvCVu(gv)
5467-
/* A proxy for a subroutine in this package? */
5468-
: SvOK(gv) ? MUTABLE_CV(gv) : NULL)
5469-
: NULL;
5458+
cv = NULL;
5459+
{
5460+
OP *const_op = newSVOP(OP_CONST, 0, SvREFCNT_inc(sv));
5461+
const_op->op_private = OPpCONST_BARE;
5462+
rv2cv_op = newCVREF(0, const_op);
5463+
}
5464+
if (rv2cv_op->op_type == OP_RV2CV &&
5465+
(rv2cv_op->op_flags & OPf_KIDS)) {
5466+
OP *rv_op = cUNOPx(rv2cv_op)->op_first;
5467+
switch (rv_op->op_type) {
5468+
case OP_CONST: {
5469+
SV *sv = cSVOPx_sv(rv_op);
5470+
if (SvROK(sv) && SvTYPE(SvRV(sv)) == SVt_PVCV)
5471+
cv = (CV*)SvRV(sv);
5472+
} break;
5473+
case OP_GV: {
5474+
GV *gv = cGVOPx_gv(rv_op);
5475+
CV *maybe_cv = GvCVu(gv);
5476+
if (maybe_cv && SvTYPE((SV*)maybe_cv) == SVt_PVCV)
5477+
cv = maybe_cv;
5478+
} break;
5479+
}
5480+
}
54705481

54715482
/* See if it's the indirect object for a list operator. */
54725483

@@ -5489,16 +5500,18 @@ Perl_yylex(pTHX)
54895500
/* Two barewords in a row may indicate method call. */
54905501

54915502
if ((isIDFIRST_lazy_if(s,UTF) || *s == '$') &&
5492-
(tmp = intuit_method(s, gv, cv)))
5503+
(tmp = intuit_method(s, gv, cv))) {
5504+
op_free(rv2cv_op);
54935505
return REPORT(tmp);
5506+
}
54945507

54955508
/* If not a declared subroutine, it's an indirect object. */
54965509
/* (But it's an indir obj regardless for sort.) */
54975510
/* Also, if "_" follows a filetest operator, it's a bareword */
54985511

54995512
if (
55005513
( !immediate_paren && (PL_last_lop_op == OP_SORT ||
5501-
((!gv || !cv) &&
5514+
(!cv &&
55025515
(PL_last_lop_op != OP_MAPSTART &&
55035516
PL_last_lop_op != OP_GREPSTART))))
55045517
|| (PL_tokenbuf[0] == '_' && PL_tokenbuf[1] == '\0'
@@ -5521,6 +5534,7 @@ Perl_yylex(pTHX)
55215534

55225535
/* Is this a word before a => operator? */
55235536
if (*s == '=' && s[1] == '>' && !pkgname) {
5537+
op_free(rv2cv_op);
55245538
CLINE;
55255539
sv_setpv(((SVOP*)pl_yylval.opval)->op_sv, PL_tokenbuf);
55265540
if (UTF && !IN_BYTES && is_utf8_string((U8*)PL_tokenbuf, len))
@@ -5535,7 +5549,7 @@ Perl_yylex(pTHX)
55355549
d = s + 1;
55365550
while (SPACE_OR_TAB(*d))
55375551
d++;
5538-
if (*d == ')' && (sv = gv_const_sv(gv))) {
5552+
if (*d == ')' && (sv = cv_const_sv(cv))) {
55395553
s = d + 1;
55405554
goto its_constant;
55415555
}
@@ -5556,14 +5570,16 @@ Perl_yylex(pTHX)
55565570
PL_thistoken = newSVpvs("");
55575571
}
55585572
#endif
5573+
op_free(rv2cv_op);
55595574
force_next(WORD);
55605575
pl_yylval.ival = 0;
55615576
TOKEN('&');
55625577
}
55635578

55645579
/* If followed by var or block, call it a method (unless sub) */
55655580

5566-
if ((*s == '$' || *s == '{') && (!gv || !cv)) {
5581+
if ((*s == '$' || *s == '{') && !cv) {
5582+
op_free(rv2cv_op);
55675583
PL_last_lop = PL_oldbufptr;
55685584
PL_last_lop_op = OP_METHOD;
55695585
PREBLOCK(METHOD);
@@ -5573,8 +5589,10 @@ Perl_yylex(pTHX)
55735589

55745590
if (!orig_keyword
55755591
&& (isIDFIRST_lazy_if(s,UTF) || *s == '$')
5576-
&& (tmp = intuit_method(s, gv, cv)))
5592+
&& (tmp = intuit_method(s, gv, cv))) {
5593+
op_free(rv2cv_op);
55775594
return REPORT(tmp);
5595+
}
55785596

55795597
/* Not a method, so call it a subroutine (if defined) */
55805598

@@ -5584,25 +5602,17 @@ Perl_yylex(pTHX)
55845602
"Ambiguous use of -%s resolved as -&%s()",
55855603
PL_tokenbuf, PL_tokenbuf);
55865604
/* Check for a constant sub */
5587-
if ((sv = gv_const_sv(gv))) {
5605+
if ((sv = cv_const_sv(cv))) {
55885606
its_constant:
5607+
op_free(rv2cv_op);
55895608
SvREFCNT_dec(((SVOP*)pl_yylval.opval)->op_sv);
55905609
((SVOP*)pl_yylval.opval)->op_sv = SvREFCNT_inc_simple(sv);
55915610
pl_yylval.opval->op_private = 0;
55925611
TOKEN(WORD);
55935612
}
55945613

5595-
/* Resolve to GV now. */
5596-
if (SvTYPE(gv) != SVt_PVGV) {
5597-
gv = gv_fetchpv(PL_tokenbuf, 0, SVt_PVCV);
5598-
assert (SvTYPE(gv) == SVt_PVGV);
5599-
/* cv must have been some sort of placeholder, so
5600-
now needs replacing with a real code reference. */
5601-
cv = GvCV(gv);
5602-
}
5603-
56045614
op_free(pl_yylval.opval);
5605-
pl_yylval.opval = newCVREF(0, newGVOP(OP_GV, 0, gv));
5615+
pl_yylval.opval = rv2cv_op;
56065616
pl_yylval.opval->op_private |= OPpENTERSUB_NOPAREN;
56075617
PL_last_lop = PL_oldbufptr;
56085618
PL_last_lop_op = OP_ENTERSUB;
@@ -5670,7 +5680,7 @@ Perl_yylex(pTHX)
56705680
if (probable_sub) {
56715681
gv = gv_fetchpv(PL_tokenbuf, GV_ADD, SVt_PVCV);
56725682
op_free(pl_yylval.opval);
5673-
pl_yylval.opval = newCVREF(0, newGVOP(OP_GV, 0, gv));
5683+
pl_yylval.opval = rv2cv_op;
56745684
pl_yylval.opval->op_private |= OPpENTERSUB_NOPAREN;
56755685
PL_last_lop = PL_oldbufptr;
56765686
PL_last_lop_op = OP_ENTERSUB;
@@ -5722,6 +5732,7 @@ Perl_yylex(pTHX)
57225732
}
57235733
}
57245734
}
5735+
op_free(rv2cv_op);
57255736

57265737
safe_bareword:
57275738
if ((lastchar == '*' || lastchar == '%' || lastchar == '&')) {

0 commit comments

Comments
 (0)