Skip to content

Commit

Permalink
remove pointless SVPV mortal alloc in S_strip_spaces()/prototype parser
Browse files Browse the repository at this point in the history
Valid, parseable, and sane prototypes, are tiny in char len and often fit
on 1 hand, most extreme cases in production would be 80 chars, because
terminal length. Original commit referred to mitigating sloppy XS code
with random white space. Majority of XS code will have perfect clean
prototype strings.

So not SV Alloc, PV Alloc, MEXTENDPUSH, memcpy(), and alot more
free()s in scope _dec(), for clean strings. Even for dirty but parsable
prototypes, they will be tiny in bytes. Therefore use a tiny stack buffer
for dirty semi-hot path to remove overhead. Fuzzing, junk, abuse, can
OOM die in newSV()/malloc() if needed, same as in prior version of the
code.

Use newSV(len) and POK_off, SV head is private to us, and a waste to
bookkeep SVPV details. SAVEFREEPV() was not used, because previous code
did mortal, and not SAVEFREEPV(), so keep using mortal. This can be
changed if someone has rational to do it.

64 bytes was picked since its power of 2, and below 80, but not 16 or 32
bytes which is too small and C compiler would probably be leaving padding
at the end of the array anyways. Going to 80 or 100, was discarded, to
give breathing room on C stack to C compilers so on x86/x64 C compilers
can emit U8 arithmitic with U8 constants to access various C autos, and
the CC doesn't downgrade to U32 constants to access various local C auto.
Worst case production Perl code for prototype length, is a URL argument
template system on CPAN, and even urls, are unlikely to be longer than a
desktop's URL bar or 80 char terminals. So 64 byte was picked.

The struct provides type matching and length safety between
S_strip_spaces() and its callers.
  • Loading branch information
bulk88 committed Oct 18, 2024
1 parent 82c4939 commit 737f8d5
Show file tree
Hide file tree
Showing 3 changed files with 62 additions and 18 deletions.
66 changes: 52 additions & 14 deletions inline.h
Original file line number Diff line number Diff line change
Expand Up @@ -260,27 +260,65 @@ Perl_CvDEPTH(const CV * const sv)
was added by toke.c, but is generally not the case if it was added elsewhere.
Since we can't enforce the spacelessness at assignment time, this routine
provides a temporary copy at parse time with spaces removed.
I<orig> is the start of the original buffer, I<len> is the length of the
prototype and will be updated when this returns.
prototype and will be updated or stay the same when this returns.
I<small_buf> is an optional small C stack char array buffer
of type I<STRIP_WS_BUF_T>, Caller must use I<STRIP_WS_PICK_BUF_TYPE(&buf,len)>
to pick if I<small_buf> will be used or not.
If I<small_buf> is not used a temporary self-freeing new buffer
(mortal but subject to change), will be malloc-ed and returned. The return
pointer can be the original I<orig> ptr, or I<small_buf> ptr, or a temporary
already mortalized I<malloc> memory. It is supposed to be opaque how the retval
I<char *> was allocated. It will not leak, and you can't take ownership of it.
It will free itself somehow, in the near future when you leave the current C
function scope.
*/

#ifdef PERL_CORE
# if defined(PERL_IN_OP_C) || defined(PERL_IN_TOKE_C)

typedef struct {
char small_buf [64];
} STRIP_WS_BUF_T;

#define STRIP_WS_PICK_BUF_TYPE(_buf, _len) \
(((_len) < (sizeof(STRIP_WS_BUF_T)-2)) ? (_buf) : NULL)
#define strip_spaces(_smlb, _op, _plen) \
S_strip_spaces(aTHX_ (_smlb), (_op), (_plen))

PERL_STATIC_INLINE char *
S_strip_spaces(pTHX_ const char * orig, STRLEN * const len)
S_strip_spaces(pTHX_ STRIP_WS_BUF_T * small_buf, const char * orig, STRLEN * const ptolen)
{
SV * tmpsv;
char * tmps;
tmpsv = newSVpvn_flags(orig, *len, SVs_TEMP);
tmps = SvPVX(tmpsv);
while ((*len)--) {
if (!isSPACE(*orig))
*tmps++ = *orig;
orig++;
}
*tmps = '\0';
*len = tmps - SvPVX(tmpsv);
return SvPVX(tmpsv);
STRLEN len = *ptolen;
const char * orig_p = orig;
const char * orig_end = orig + len;

while (orig_p < orig_end) {
if(isSPACE(*orig_p)) { /* no memchr()! think memspn() */
char * tmps;
char * tmps_p;
if(small_buf)
tmps = (char *)&small_buf->small_buf[0];
else
tmps = SvPVX(sv_2mortal(newSV(len+1)));
tmps_p = tmps+(orig_p-orig);
Move(orig, tmps, (Size_t)(orig_p-orig), char);
while (orig_p < orig_end) {
if(!isSPACE(*orig_p))
*tmps_p++ = *orig_p;
orig_p++;
}
*tmps_p = '\0';
*ptolen = tmps_p - tmps;
return tmps; /* return cleaned string */
}
orig_p++;
} /* ptolen was untouched */
return (char *)orig; /* original string unmodified */
}
# endif
#endif

/* ------------------------------- iperlsys.h ------------------------------- */
Expand Down
10 changes: 7 additions & 3 deletions op.c
Original file line number Diff line number Diff line change
Expand Up @@ -9977,6 +9977,8 @@ void
Perl_cv_ckproto_len_flags(pTHX_ const CV *cv, const GV *gv, const char *p,
const STRLEN len, const U32 flags)
{
STRIP_WS_BUF_T pbuf;
STRIP_WS_BUF_T cbuf;
SV *name = NULL, *msg;
const char * cvp = SvROK(cv)
? SvTYPE(SvRV_const(cv)) == SVt_PVCV
Expand All @@ -9994,8 +9996,8 @@ Perl_cv_ckproto_len_flags(pTHX_ const CV *cv, const GV *gv, const char *p,
return;

if (p && cvp) {
p = S_strip_spaces(aTHX_ p, &plen);
cvp = S_strip_spaces(aTHX_ cvp, &clen);
p = strip_spaces(STRIP_WS_PICK_BUF_TYPE(&pbuf, plen), p, &plen);
cvp = strip_spaces(STRIP_WS_PICK_BUF_TYPE(&cbuf, clen), cvp, &clen);
if ((flags & SVf_UTF8) == SvUTF8(cv)) {
if (plen == clen && memEQ(cvp, p, plen))
return;
Expand Down Expand Up @@ -14362,6 +14364,7 @@ OP *
Perl_ck_entersub_args_proto(pTHX_ OP *entersubop, GV *namegv, SV *protosv)
{
STRLEN proto_len;
STRIP_WS_BUF_T protocleanbuf;
const char *proto, *proto_end;
OP *aop, *prev, *cvop, *parent;
int optional = 0;
Expand All @@ -14375,7 +14378,8 @@ Perl_ck_entersub_args_proto(pTHX_ OP *entersubop, GV *namegv, SV *protosv)
if (SvTYPE(protosv) == SVt_PVCV)
proto = CvPROTO(protosv), proto_len = CvPROTOLEN(protosv);
else proto = SvPV(protosv, proto_len);
proto = S_strip_spaces(aTHX_ proto, &proto_len);
proto = strip_spaces( STRIP_WS_PICK_BUF_TYPE(&protocleanbuf, proto_len),
proto, &proto_len);
proto_end = proto + proto_len;
parent = entersubop;
aop = cUNOPx(entersubop)->op_first;
Expand Down
4 changes: 3 additions & 1 deletion toke.c
Original file line number Diff line number Diff line change
Expand Up @@ -6281,11 +6281,13 @@ yyl_colon(pTHX_ char *s)
static int
yyl_subproto(pTHX_ char *s, CV *cv)
{
STRIP_WS_BUF_T protocleanbuf;
STRLEN protolen = CvPROTOLEN(cv);
const char *proto = CvPROTO(cv);
bool optional;

proto = S_strip_spaces(aTHX_ proto, &protolen);
proto = strip_spaces( STRIP_WS_PICK_BUF_TYPE(&protocleanbuf, protolen),
proto, &protolen);
if (!protolen)
TERM(FUNC0SUB);
if ((optional = *proto == ';')) {
Expand Down

0 comments on commit 737f8d5

Please sign in to comment.