Skip to content

Commit

Permalink
success!!!, new rule, "struct body_details bodies_by_type[]" only in …
Browse files Browse the repository at this point in the history
…sv.c

-svfix.pl is quick throwaway garbage done in 20 mins and probably doesnt
 regen sv_inline.h properly and copy pasting/replace regexps were
 was used anyway to fix up the code, it can be rewritten correctly tho
 and put in as a official regen.pl script

how to finish this fix, options

set and unset sv_type as a CPP macro then

-#include a header 17 times that contains ONLY only Perl_newSV_type() and
 its mortal() sister creating 17*2 static inline fns (basically
 what I did here), code is stepable, extra ms'es of I/O build times
 perf degrade debate may or may not come up, I hope the CC has a sane
 in memory cache for .h files and doesn't go back to the kernel

or put the entire Perl_newSV_type() fnc in a #define
"#define PETS blah(foo(myarg)) +  \
  cat(dog(fur)) + \
  laser(ball(toy))
"
 then execute that macro 17 times

or write 17 Perl_newSV_type() copies into sv_inline.h with
/regen.pl infrastructure (fastest for build speed core and
build speed CPAN and code is c dbg stepable) OR is a dedicated
"sv_newg.h" for regen.pl needed? does the master
Perl_newSV_type() template live in a .pl or a .h? i dont have an
opinion

or against concept of sv_inline.h just have 5-10 hand written
versions sv type specific of Perl_newSV_type(), its a cheap gimick fix
to keep all 17 types together mashed with if/else/switch in 1 func and
expecting bug free perfection from LTO engines of various C compilers,
and expecting perfection from an single vendor LTO engine is very
against the spirit of portable code

-todo ideas, turn those super long #define ==?:==?:==?: into
char array/struct initializers, stored in macros, one faux-string per
each column of struct body_details, use that macro as c auto stk rw
array initializer, then do the

U32 len [3] = "\x01\x02\x03"[sv_type];

or

U8 sizes [3] = {1,2,3};
U32 len = sizes[sv_type];

which in perl core would look

U32 arena_size = SVDB_AR_SZ_DECL;
U32 len = arena_size[sv_type];

maybe VC will optimize those since no global memory is used. Only
Perl_newSV_typeX() needs this.

in this commit static inline Perl_newSV_typeX(pTHX_ const svtype type)
which is the ONLY Perl_newSV_type*() variant that take an arbitrary
svtype arg, this is the fallback for gv_pvn_add_by() since I couldn't
"const" that call to newSV_type() cuz gv_pvn_add_by() is only place in the
whole core that takes a random SV type number.

Internals of Perl_newSV_typeX() are trashy, here is an example, MSVC
DID not turn this into a jump table but instead 17 test/cond_jump ops.

      v5 = (char *)S_new_body(v2);
      v6 = 40i64;
      if ( v2 == 15 )
        v6 = 136i64;
      if ( v2 == 14 )
        v6 = 104i64;
      if ( v2 == 13 )
        v6 = 104i64;
      if ( v2 == 12 )
        v6 = 32i64;
      if ( v2 == 11 )
        v6 = 40i64;
      if ( v2 == 10 )
        v6 = 80i64;
      if ( v2 == 9 )
        v6 = 48i64;
      if ( v2 == 8 )
        v6 = 224i64;
      if ( v2 == 7 )
        v6 = 48i64;
      if ( v2 == 6 )
        v6 = 32i64;
      if ( v2 == 5 )
        v6 = 24i64;
      if ( v2 == 4 )
        v6 = 40i64;
      if ( v2 == 3 )
        v6 = 16i64;
      if ( v2 == 2 )
        v6 = 0i64;
      if ( v2 == 1 )
        v6 = 0i64;
      memset(v5, 0, v6 & -(signed __int64)(v2 != 0));

Solution is move Perl_newSV_typeX() to sv.c, and let it be struct
body_details driven. Cuz it only purpose is when newSV_type() absolutly
CAN NOT be constant folded (random number input). it only has 1 caller
in core.

S_new_body() properly const folded away in 99% of cases except for TWO
callers Perl_newSV_typeX() and Perl_make_trie(). Perl_make_trie() failure
to inline is bizzare, since Perl_make_trie() internally does

"v9 = S_new_body(SVt_PVAV);"

and DID inline Perl_newSV_typeSVt_PVAV() !!! and therefore
Perl_make_trie() has the AV field initing/nulling code.

Here is the "optimized" contents of S_new_body(), its junk
performance/design wise (but runtime correct/no bugs)

void **__fastcall S_new_body(svtype sv_type)
{
  svtype v1; // er9
  __int64 v2; // rbx
  void **result; // rax
  signed int v4; // ecx
  signed __int64 v5; // rax

  v1 = sv_type;
  v2 = sv_type;
  result = (void **)PL_body_roots[sv_type];
  if ( !result )
  {
    v4 = 4080;
    if ( v1 == 15 )
      v4 = 3264;
    if ( v1 == 14 )
      v4 = 2080;
    if ( v1 == 13 )
      v4 = 4056;
    if ( v1 == 12 )
      v4 = 4064;
    if ( v1 == 11 )
      v4 = 4080;
    if ( v1 == 10 )
      v4 = 4080;
    if ( v1 == 9 )
      v4 = 4080;
    if ( v1 == 8 )
      v4 = 4032;
    if ( v1 == 7 )
      v4 = 4080;
    if ( v1 == 6 )
      v4 = 3296;
    if ( v1 == 5 )
      v4 = 3424;
    if ( v1 == 4 )
      v4 = 3120;
    if ( v1 == 3 )
      v4 = 3536;
    if ( v1 == 2 )
      v4 = 0;
    if ( v1 == 1 )
      v4 = 0;
    v5 = 40i64;
    if ( v1 == 15 )
      v5 = 136i64;
    if ( v1 == 14 )
      v5 = 104i64;
    if ( v1 == 13 )
      v5 = 104i64;
    if ( v1 == 12 )
      v5 = 32i64;
    if ( v1 == 11 )
      v5 = 40i64;
    if ( v1 == 10 )
      v5 = 80i64;
    if ( v1 == 9 )
      v5 = 48i64;
    if ( v1 == 8 )
      v5 = 224i64;
    if ( v1 == 7 )
      v5 = 48i64;
    if ( v1 == 6 )
      v5 = 32i64;
    if ( v1 == 5 )
      v5 = 24i64;
    if ( v1 == 4 )
      v5 = 40i64;
    if ( v1 == 3 )
      v5 = 16i64;
    if ( v1 == 2 )
      v5 = 0i64;
    if ( v1 == 1 )
      v5 = 0i64;
    result = (void **)Perl_more_bodies(v1, v5 & -(signed __int64)(v1 != 0), v4 & (unsigned int)-(v1 != 0));
  }
  PL_body_roots[v2] = *result;
  return result;
}
------------
disassembly view of S_new_body()
------------
cmp     r9d, 0Fh
lea     edi, [rbp+28h]
mov     r8d, 0FF0h
lea     r11d, [rbp+20h]
mov     edx, 0CC0h
lea     r10d, [rbp+30h]
mov     ecx, r8d
mov     eax, r9d
cmovz   ecx, edx
cmp     r9d, 0Eh
mov     edx, 820h
cmovz   ecx, edx
cmp     r9d, 0Dh
lea     edx, [r8-18h]
cmovz   ecx, edx
cmp     r9d, 0Ch
lea     edx, [r8-10h]
cmovz   ecx, edx
cmp     r9d, 0Bh
lea     edx, [r8-30h]
cmovz   ecx, r8d
cmp     r9d, 0Ah
cmovz   ecx, r8d
cmp     r9d, 9
cmovz   ecx, r8d
cmp     r9d, 8
cmovz   ecx, edx
cmp     r9d, 7
mov     edx, 0CE0h
cmovz   ecx, r8d
cmp     r9d, 6
cmovz   ecx, edx
cmp     r9d, 5
mov     edx, 0D60h
cmovz   ecx, edx
cmp     r9d, 4
mov     edx, 0C30h
cmovz   ecx, edx
cmp     r9d, 3
mov     edx, 0DD0h
cmovz   ecx, edx
cmp     r9d, 2
lea     edx, [rdi+60h]
cmovz   ecx, ebp
cmp     r9d, 1
cmovz   ecx, ebp
neg     eax
sbb     eax, eax
and     eax, ecx
mov     ecx, r9d
mov     r8d, eax
cmp     r9d, 0Fh
mov     eax, edi
cmovz   eax, edx
cmp     r9d, 0Eh
lea     edx, [rbp+68h]
cmovz   eax, edx
cmp     r9d, 0Dh
cmovz   eax, edx
cmp     r9d, 0Ch
lea     edx, [rbp+50h]
cmovz   eax, r11d
cmp     r9d, 0Bh
cmovz   eax, edi
cmp     r9d, 0Ah
cmovz   eax, edx
cmp     r9d, 9
mov     edx, 0E0h
cmovz   eax, r10d
cmp     r9d, 8
cmovz   eax, edx
cmp     r9d, 7
lea     edx, [rbp+18h]
cmovz   eax, r10d
cmp     r9d, 6
cmovz   eax, r11d
cmp     r9d, 5
cmovz   eax, edx
cmp     r9d, 4
lea     edx, [rbp+10h]
cmovz   eax, edi
cmp     r9d, 3
cmovz   eax, edx
cmp     r9d, 2
cmovz   eax, ebp
cmp     r9d, 1
cmovz   eax, ebp
neg     ecx
mov     ecx, r9d
sbb     rdx, rdx
and     rdx, rax
call    Perl_more_bodies
----------------------

17 test ops and 17 conditional_move_constant_8_bits ops

solution, turn S_new_body() back into a macro so no CC ever tries to
ref-inline it. It was a macro before sv_inline.h branch was merged

TODO add XSApitest.xs that worlds longest macros are identical to the
master correct copy (struct body_details).

byte size drops from before these 3 commits to this "success commit"

mp.exe
0x1241AC-0x1224EC=7360
0x19D3D8-0x19B8E8=6896

p541.dll
0x154886-0x1532A6=5600
0x1AA19E-0x1A862E=7024


BEFORE

Dump of file ..\miniperl.exe
SECTION HEADER #1
   .text name
  1241AC virtual size
SECTION HEADER #2
  .rdata name
  19D3D8 virtual size

Dump of file ..\perl541.dll
SECTION HEADER #1
   .text name
  154886 virtual size
SECTION HEADER #2
  .rdata name
  1AA19E virtual size



AFTER

Dump of file ..\perl541.dll
SECTION HEADER #1
   .text name
  1532A6 virtual size
SECTION HEADER #2
  .rdata name
  1A862E virtual size


Dump of file ..\miniperl.exe
SECTION HEADER #1
   .text name
  1224EC virtual size
SECTION HEADER #2
  .rdata name
  19B8E8 virtual size
  • Loading branch information
bulk88 committed Oct 14, 2024
1 parent a98b094 commit f5bfe6a
Showing 1 changed file with 38 additions and 112 deletions.
150 changes: 38 additions & 112 deletions sv_inline.h
Original file line number Diff line number Diff line change
Expand Up @@ -579,111 +579,16 @@ PERL_STATIC_INLINE SV *
Perl_newSV_typeX(pTHX_ const svtype type)
{

static const struct body_details bodies_by_type_STAT[] = {
/* HEs use this offset for their arena. */
{ 0, 0, 0, SVt_NULL, FALSE, NONV, NOARENA, 0 },

/* IVs are in the head, so the allocation size is 0. */
{ 0,
sizeof(IV), /* This is used to copy out the IV body. */
STRUCT_OFFSET(XPVIV, xiv_iv), SVt_IV, FALSE, NONV,
NOARENA /* IVS don't need an arena */, 0
},

#if NVSIZE <= IVSIZE
{ 0, sizeof(NV),
STRUCT_OFFSET(XPVNV, xnv_u),
SVt_NV, FALSE, HADNV, NOARENA, 0 },
#else
{ sizeof(NV), sizeof(NV),
STRUCT_OFFSET(XPVNV, xnv_u),
SVt_NV, FALSE, HADNV, HASARENA, FIT_ARENA(0, sizeof(NV)) },
#endif

{ sizeof(XPV) - STRUCT_OFFSET(XPV, xpv_cur),
copy_length(XPV, xpv_len) - STRUCT_OFFSET(XPV, xpv_cur),
+ STRUCT_OFFSET(XPV, xpv_cur),
SVt_PV, FALSE, NONV, HASARENA,
FIT_ARENA(0, sizeof(XPV) - STRUCT_OFFSET(XPV, xpv_cur)) },

{ sizeof(XINVLIST) - STRUCT_OFFSET(XPV, xpv_cur),
copy_length(XINVLIST, is_offset) - STRUCT_OFFSET(XPV, xpv_cur),
+ STRUCT_OFFSET(XPV, xpv_cur),
SVt_INVLIST, TRUE, NONV, HASARENA,
FIT_ARENA(0, sizeof(XINVLIST) - STRUCT_OFFSET(XPV, xpv_cur)) },

{ sizeof(XPVIV) - STRUCT_OFFSET(XPV, xpv_cur),
copy_length(XPVIV, xiv_u) - STRUCT_OFFSET(XPV, xpv_cur),
+ STRUCT_OFFSET(XPV, xpv_cur),
SVt_PVIV, FALSE, NONV, HASARENA,
FIT_ARENA(0, sizeof(XPVIV) - STRUCT_OFFSET(XPV, xpv_cur)) },

{ sizeof(XPVNV) - STRUCT_OFFSET(XPV, xpv_cur),
copy_length(XPVNV, xnv_u) - STRUCT_OFFSET(XPV, xpv_cur),
+ STRUCT_OFFSET(XPV, xpv_cur),
SVt_PVNV, FALSE, HADNV, HASARENA,
FIT_ARENA(0, sizeof(XPVNV) - STRUCT_OFFSET(XPV, xpv_cur)) },

{ sizeof(XPVMG), copy_length(XPVMG, xnv_u), 0, SVt_PVMG, FALSE, HADNV,
HASARENA, FIT_ARENA(0, sizeof(XPVMG)) },

{ sizeof(ALIGNED_TYPE_NAME(regexp)),
sizeof(regexp),
0,
SVt_REGEXP, TRUE, NONV, HASARENA,
FIT_ARENA(0, sizeof(ALIGNED_TYPE_NAME(regexp)))
},

{ sizeof(ALIGNED_TYPE_NAME(XPVGV)), sizeof(XPVGV), 0, SVt_PVGV, TRUE, HADNV,
HASARENA, FIT_ARENA(0, sizeof(ALIGNED_TYPE_NAME(XPVGV))) },

{ sizeof(ALIGNED_TYPE_NAME(XPVLV)), sizeof(XPVLV), 0, SVt_PVLV, TRUE, HADNV,
HASARENA, FIT_ARENA(0, sizeof(ALIGNED_TYPE_NAME(XPVLV))) },

{ sizeof(ALIGNED_TYPE_NAME(XPVAV)),
copy_length(XPVAV, xav_alloc),
0,
SVt_PVAV, TRUE, NONV, HASARENA,
FIT_ARENA(0, sizeof(ALIGNED_TYPE_NAME(XPVAV))) },

{ sizeof(ALIGNED_TYPE_NAME(XPVHV)),
copy_length(XPVHV, xhv_max),
0,
SVt_PVHV, TRUE, NONV, HASARENA,
FIT_ARENA(0, sizeof(ALIGNED_TYPE_NAME(XPVHV))) },

{ sizeof(ALIGNED_TYPE_NAME(XPVCV)),
sizeof(XPVCV),
0,
SVt_PVCV, TRUE, NONV, HASARENA,
FIT_ARENA(0, sizeof(ALIGNED_TYPE_NAME(XPVCV))) },

{ sizeof(ALIGNED_TYPE_NAME(XPVFM)),
sizeof(XPVFM),
0,
SVt_PVFM, TRUE, NONV, NOARENA,
FIT_ARENA(20, sizeof(ALIGNED_TYPE_NAME(XPVFM))) },

{ sizeof(ALIGNED_TYPE_NAME(XPVIO)),
sizeof(XPVIO),
0,
SVt_PVIO, TRUE, NONV, HASARENA,
FIT_ARENA(24, sizeof(ALIGNED_TYPE_NAME(XPVIO))) },

{ sizeof(ALIGNED_TYPE_NAME(XPVOBJ)),
copy_length(XPVOBJ, xobject_fields),
0,
SVt_PVOBJ, TRUE, NONV, HASARENA,
FIT_ARENA(0, sizeof(ALIGNED_TYPE_NAME(XPVOBJ))) },
};

SV *sv;
void* new_body;
#ifdef WANT_SV_BODY_DETAILS
const struct body_details *type_details;
#endif

new_SV(sv);

bodies_by_type_STAT[type];
#ifdef WANT_SV_BODY_DETAILS
type_details = bodies_by_type + type;
#endif

SvFLAGS(sv) &= ~SVTYPEMASK;
SvFLAGS(sv) |= type;
Expand All @@ -706,20 +611,29 @@ static const struct body_details bodies_by_type_STAT[] = {
case SVt_PVHV:
case SVt_PVAV:
case SVt_PVOBJ:
assert(bodies_by_type_STAT[type].body_size);
#ifdef WANT_SV_BODY_DETAILS
assert(type_details->body_size);
#endif

#ifndef PURIFY
assert(bodies_by_type_STAT[type].arena);
assert(bodies_by_type_STAT[type].arena_size);
#ifdef WANT_SV_BODY_DETAILS
assert(type_details->arena);
assert(type_details->arena_size);
#endif
/* This points to the start of the allocated area. */
new_body = S_new_body(aTHX_ type);
/* xpvav and xpvhv have no offset, so no need to adjust new_body */
assert(!(bodies_by_type_STAT[type].offset));
#ifdef WANT_SV_BODY_DETAILS
assert(type_details->offset);
#endif
#else
/* We always allocated the full length item with PURIFY. To do this
we fake things so that arena is false for all 16 types.. */

new_body = new_NOARENAZ(&(bodies_by_type_STAT[type]));
#ifdef WANT_SV_BODY_DETAILS
new_body = new_NOARENAZ(type_details);
#else
new_body = new_NOARENAZ(type);
#endif
#endif
SvANY(sv) = new_body;

Expand Down Expand Up @@ -779,20 +693,32 @@ static const struct body_details bodies_by_type_STAT[] = {
* Obviously this all only holds as long as it's a true reflection of
* the bodies_by_type lookup table. */
#ifndef PURIFY
ASSUME(bodies_by_type_STAT[type].arena);
#ifdef WANT_SV_BODY_DETAILS
ASSUME(type_details->arena);
#endif
#endif
/* FALLTHROUGH */
case SVt_PVFM:

assert(bodies_by_type_STAT[type].body_size);
#ifdef WANT_SV_BODY_DETAILS
assert(type_details->body_size);
#endif
/* We always allocated the full length item with PURIFY. To do this
we fake things so that arena is false for all 16 types.. */
#ifndef PURIFY
if(bodies_by_type_STAT[type].arena) {
#ifdef WANT_SV_BODY_DETAILS
if(type_details->arena) {
#else
if(SVDB_arena(type)) {
#endif
/* This points to the start of the allocated area. */
new_body = S_new_body(aTHX_ type);
Zero(new_body, bodies_by_type_STAT[type].body_size, char);
new_body = ((char *)new_body) - bodies_by_type_STAT[type].offset;
#ifdef WANT_SV_BODY_DETAILS
Zero(new_body, type_details->body_size, char);
new_body = ((char *)new_body) - type_details->offset;
#else
Zero(new_body, SVDB_body_size(type), char);
new_body = ((char *)new_body) - SVDB_offset(type);
#endif
} else
#endif
{
Expand Down

0 comments on commit f5bfe6a

Please sign in to comment.