Skip to content

Commit

Permalink
make objectid effects total (#52119)
Browse files Browse the repository at this point in the history
Avoids the need to rehash most dictionaries on reload.

System image data size increase is about 109MB -> 112MB, since there are
about 130k Arrays, 75k CodeInstances, 30k Methods, 80k TypeMapEntries,
75k MethodInstance, and 36k Core.Bindings, and other mutable objects.
  • Loading branch information
vtjnash authored Nov 15, 2023
1 parent cd98632 commit a26e23a
Show file tree
Hide file tree
Showing 9 changed files with 67 additions and 63 deletions.
14 changes: 3 additions & 11 deletions base/reflection.jl
Original file line number Diff line number Diff line change
Expand Up @@ -720,18 +720,10 @@ If `x === y` then `objectid(x) == objectid(y)`, and usually when `x !== y`, `obj
See also [`hash`](@ref), [`IdDict`](@ref).
"""
function objectid(x)
# objectid is foldable iff it isn't a pointer.
if isidentityfree(typeof(x))
return _foldable_objectid(x)
end
return _objectid(x)
end
function _foldable_objectid(@nospecialize(x))
@_foldable_meta
_objectid(x)
function objectid(@nospecialize(x))
@_total_meta
return ccall(:jl_object_id, UInt, (Any,), x)
end
_objectid(@nospecialize(x)) = ccall(:jl_object_id, UInt, (Any,), x)

"""
isdispatchtuple(T)
Expand Down
59 changes: 34 additions & 25 deletions src/builtins.c
Original file line number Diff line number Diff line change
Expand Up @@ -344,6 +344,9 @@ static uintptr_t type_object_id_(jl_value_t *v, jl_varidx_t *env) JL_NOTSAFEPOIN
i++;
pe = pe->prev;
}
uintptr_t bits = jl_astaggedvalue(v)->header;
if (bits & GC_IN_IMAGE)
return ((uintptr_t*)v)[-2];
return inthash((uintptr_t)v);
}
if (tv == jl_uniontype_type) {
Expand Down Expand Up @@ -432,50 +435,56 @@ static uintptr_t immut_id_(jl_datatype_t *dt, jl_value_t *v, uintptr_t h) JL_NOT
return h;
}

static uintptr_t NOINLINE jl_object_id__cold(jl_datatype_t *dt, jl_value_t *v) JL_NOTSAFEPOINT
static uintptr_t NOINLINE jl_object_id__cold(uintptr_t tv, jl_value_t *v) JL_NOTSAFEPOINT
{
if (dt == jl_simplevector_type)
return hash_svec((jl_svec_t*)v);
if (dt == jl_datatype_type) {
jl_datatype_t *dtv = (jl_datatype_t*)v;
uintptr_t h = ~dtv->name->hash;
return bitmix(h, hash_svec(dtv->parameters));
}
if (dt == jl_string_type) {
jl_datatype_t *dt = (jl_datatype_t*)jl_to_typeof(tv);
if (dt->name->mutabl) {
if (dt == jl_string_type) {
#ifdef _P64
return memhash_seed(jl_string_data(v), jl_string_len(v), 0xedc3b677);
return memhash_seed(jl_string_data(v), jl_string_len(v), 0xedc3b677);
#else
return memhash32_seed(jl_string_data(v), jl_string_len(v), 0xedc3b677);
return memhash32_seed(jl_string_data(v), jl_string_len(v), 0xedc3b677);
#endif
}
if (dt == jl_module_type) {
jl_module_t *m = (jl_module_t*)v;
return m->hash;
}
if (dt->name->mutabl)
}
if (dt == jl_simplevector_type)
return hash_svec((jl_svec_t*)v);
if (dt == jl_datatype_type) {
jl_datatype_t *dtv = (jl_datatype_t*)v;
uintptr_t h = ~dtv->name->hash;
return bitmix(h, hash_svec(dtv->parameters));
}
if (dt == jl_module_type) {
jl_module_t *m = (jl_module_t*)v;
return m->hash;
}
uintptr_t bits = jl_astaggedvalue(v)->header;
if (bits & GC_IN_IMAGE)
return ((uintptr_t*)v)[-2];
return inthash((uintptr_t)v);
}
return immut_id_(dt, v, dt->hash);
}

JL_DLLEXPORT inline uintptr_t jl_object_id_(jl_value_t *tv, jl_value_t *v) JL_NOTSAFEPOINT
JL_DLLEXPORT inline uintptr_t jl_object_id_(uintptr_t tv, jl_value_t *v) JL_NOTSAFEPOINT
{
jl_datatype_t *dt = (jl_datatype_t*)tv;
if (dt == jl_symbol_type)
if (tv == jl_symbol_tag << 4) {
return ((jl_sym_t*)v)->hash;
if (dt == jl_typename_type)
return ((jl_typename_t*)v)->hash;
if (dt == jl_datatype_type) {
}
else if (tv == jl_datatype_tag << 4) {
jl_datatype_t *dtv = (jl_datatype_t*)v;
if (dtv->isconcretetype)
return dtv->hash;
}
return jl_object_id__cold(dt, v);
else if (tv == (uintptr_t)jl_typename_type) {
return ((jl_typename_t*)v)->hash;
}
return jl_object_id__cold(tv, v);
}


JL_DLLEXPORT uintptr_t jl_object_id(jl_value_t *v) JL_NOTSAFEPOINT
{
return jl_object_id_(jl_typeof(v), v);
return jl_object_id_(jl_typetagof(v), v);
}

// eq hash table --------------------------------------------------------------
Expand Down
2 changes: 1 addition & 1 deletion src/ccall.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -1880,7 +1880,7 @@ static jl_cgval_t emit_ccall(jl_codectx_t &ctx, jl_value_t **args, size_t nargs)
if (!val.isghost && !val.ispointer())
val = value_to_pointer(ctx, val);
Value *args[] = {
emit_typeof(ctx, val),
emit_typeof(ctx, val, false, true),
val.isghost ? ConstantPointerNull::get(T_pint8_derived) :
ctx.builder.CreateBitCast(
decay_derived(ctx, data_pointer(ctx, val)),
Expand Down
2 changes: 1 addition & 1 deletion src/codegen.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -1165,7 +1165,7 @@ static const auto jlapplytype_func = new JuliaFunction<>{
static const auto jl_object_id__func = new JuliaFunction<TypeFnContextAndSizeT>{
XSTR(jl_object_id_),
[](LLVMContext &C, Type *T_size) { return FunctionType::get(T_size,
{JuliaType::get_prjlvalue_ty(C), PointerType::get(getInt8Ty(C), AddressSpace::Derived)}, false); },
{T_size, PointerType::get(getInt8Ty(C), AddressSpace::Derived)}, false); },
nullptr,
};
static const auto setjmp_func = new JuliaFunction<TypeFnContextAndTriple>{
Expand Down
2 changes: 1 addition & 1 deletion src/iddict.c
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,7 @@
// compute empirical max-probe for a given size
#define max_probe(size) ((size) <= 1024 ? 16 : (size) >> 6)

#define keyhash(k) jl_object_id_(jl_typeof(k), k)
#define keyhash(k) jl_object_id_(jl_typetagof(k), k)
#define h2index(hv, sz) (size_t)(((hv) & ((sz)-1)) * 2)

static inline int jl_table_assign_bp(jl_genericmemory_t **pa, jl_value_t *key, jl_value_t *val);
Expand Down
2 changes: 1 addition & 1 deletion src/julia_internal.h
Original file line number Diff line number Diff line change
Expand Up @@ -1370,7 +1370,7 @@ JL_DLLEXPORT int jl_stored_inline(jl_value_t *el_type);
JL_DLLEXPORT jl_value_t *(jl_array_data_owner)(jl_array_t *a);
JL_DLLEXPORT jl_array_t *jl_array_copy(jl_array_t *ary);

JL_DLLEXPORT uintptr_t jl_object_id_(jl_value_t *tv, jl_value_t *v) JL_NOTSAFEPOINT;
JL_DLLEXPORT uintptr_t jl_object_id_(uintptr_t tv, jl_value_t *v) JL_NOTSAFEPOINT;
JL_DLLEXPORT void jl_set_next_task(jl_task_t *task) JL_NOTSAFEPOINT;

// -- synchronization utilities -- //
Expand Down
33 changes: 15 additions & 18 deletions src/staticdata.c
Original file line number Diff line number Diff line change
Expand Up @@ -501,8 +501,6 @@ typedef struct {
int8_t incremental;
} jl_serializer_state;

static jl_value_t *jl_idtable_type = NULL;
static jl_typename_t *jl_idtable_typename = NULL;
static jl_value_t *jl_bigint_type = NULL;
static int gmp_limb_size = 0;
static jl_sym_t *jl_docmeta_sym = NULL;
Expand Down Expand Up @@ -1253,24 +1251,35 @@ static void jl_write_values(jl_serializer_state *s) JL_GC_DISABLED
assert(!(s->incremental && jl_object_in_image(v)));
jl_datatype_t *t = (jl_datatype_t*)jl_typeof(v);
assert((!jl_is_datatype_singleton(t) || t->instance == v) && "detected singleton construction corruption");
int mutabl = t->name->mutabl;
ios_t *f = s->s;
if (t->smalltag) {
if (t->layout->npointers == 0 || t == jl_string_type) {
if (jl_datatype_nfields(t) == 0 || t->name->mutabl == 0 || t == jl_string_type) {
if (jl_datatype_nfields(t) == 0 || mutabl == 0 || t == jl_string_type) {
f = s->const_data;
}
}
}

// realign stream to expected gc alignment (16 bytes)
// realign stream to expected gc alignment (16 bytes) after tag
uintptr_t skip_header_pos = ios_pos(f) + sizeof(jl_taggedvalue_t);
uintptr_t object_id_expected = mutabl &&
t != jl_datatype_type &&
t != jl_typename_type &&
t != jl_string_type &&
t != jl_simplevector_type &&
t != jl_module_type;
if (object_id_expected)
skip_header_pos += sizeof(size_t);
write_padding(f, LLT_ALIGN(skip_header_pos, 16) - skip_header_pos);

// write header
if (object_id_expected)
write_uint(f, jl_object_id(v));
if (s->incremental && jl_needs_serialization(s, (jl_value_t*)t) && needs_uniquing((jl_value_t*)t))
arraylist_push(&s->uniquing_types, (void*)(uintptr_t)(ios_pos(f)|1));
if (f == s->const_data)
write_uint(s->const_data, ((uintptr_t)t->smalltag << 4) | GC_OLD_MARKED);
write_uint(s->const_data, ((uintptr_t)t->smalltag << 4) | GC_OLD_MARKED | GC_IN_IMAGE);
else
write_gctaggedfield(s, t);
size_t reloc_offset = ios_pos(f);
Expand Down Expand Up @@ -1718,11 +1727,6 @@ static void jl_write_values(jl_serializer_state *s) JL_GC_DISABLED
arraylist_push(&s->fixup_objs, (void*)reloc_offset);
}
}
else if (((jl_datatype_t*)(jl_typeof(v)))->name == jl_idtable_typename) {
assert(f == s->s);
// will need to rehash this, later (after types are fully constructed)
arraylist_push(&s->fixup_objs, (void*)reloc_offset);
}
else if (jl_is_genericmemoryref(v)) {
assert(f == s->s);
record_memoryref(s, reloc_offset, *(jl_genericmemoryref_t*)v);
Expand Down Expand Up @@ -2582,8 +2586,6 @@ static void jl_save_system_image_to_stream(ios_t *f, jl_array_t *mod_array,
}
}
}
jl_idtable_type = jl_base_module ? jl_get_global(jl_base_module, jl_symbol("IdDict")) : NULL;
jl_idtable_typename = jl_base_module ? ((jl_datatype_t*)jl_unwrap_unionall((jl_value_t*)jl_idtable_type))->name : NULL;
jl_bigint_type = jl_base_module ? jl_get_global(jl_base_module, jl_symbol("BigInt")) : NULL;
if (jl_bigint_type) {
gmp_limb_size = jl_unbox_long(jl_get_global((jl_module_t*)jl_get_global(jl_base_module, jl_symbol("GMP")),
Expand Down Expand Up @@ -3409,12 +3411,7 @@ static void jl_restore_system_image_from_stream_(ios_t *f, jl_image_t *image, jl
}
}
else {
// rehash IdDict
//assert(((jl_datatype_t*)(jl_typeof(obj)))->name == jl_idtable_typename);
jl_genericmemory_t **a = (jl_genericmemory_t**)obj;
assert(jl_typetagis(*a, jl_memory_any_type));
*a = jl_idtable_rehash(*a, (*a)->length);
jl_gc_wb(obj, *a);
abort();
}
}
// Now pick up the globalref binding pointer field
Expand Down
9 changes: 4 additions & 5 deletions test/core.jl
Original file line number Diff line number Diff line change
Expand Up @@ -8000,12 +8000,11 @@ for T in (Int, String, Symbol, Module)
@test Core.Compiler.is_foldable(Base.infer_effects(hash, (Tuple{T},)))
@test Core.Compiler.is_foldable(Base.infer_effects(objectid, (Tuple{T,T},)))
@test Core.Compiler.is_foldable(Base.infer_effects(hash, (Tuple{T,T},)))
@test Core.Compiler.is_foldable(Base.infer_effects(objectid, (Ref{T},)))
@test Core.Compiler.is_foldable(Base.infer_effects(objectid, (Tuple{Ref{T}},)))
@test Core.Compiler.is_foldable(Base.infer_effects(objectid, (Tuple{Vector{T}},)))
end
@test !Core.Compiler.is_consistent(Base.infer_effects(objectid, (Ref{Int},)))
@test !Core.Compiler.is_consistent(Base.infer_effects(objectid, (Tuple{Ref{Int}},)))
# objectid for datatypes is inconsistent for types that have unbound type parameters.
@test !Core.Compiler.is_consistent(Base.infer_effects(objectid, (DataType,)))
@test !Core.Compiler.is_consistent(Base.infer_effects(objectid, (Tuple{Vector{Int}},)))
@test Core.Compiler.is_foldable(Base.infer_effects(objectid, (DataType,)))

# donotdelete should not taint consistency of the containing function
f_donotdete(x) = (Core.Compiler.donotdelete(x); 1)
Expand Down
7 changes: 7 additions & 0 deletions test/precompile.jl
Original file line number Diff line number Diff line change
Expand Up @@ -288,6 +288,9 @@ precompile_test_harness(false) do dir
a_vec_inline = Pair{Int,Any}[]
push!(a_vec_inline, 1=>2, 3=>4)
a_mat_inline = reshape(a_vec_inline, (1, 2))
oid_vec_int = objectid(a_vec_int)
oid_mat_int = objectid(a_mat_int)
end
""")
# Issue #12623
Expand Down Expand Up @@ -371,6 +374,10 @@ precompile_test_harness(false) do dir
@test Foo.a_mat_inline == Pair{Int,Any}[1=>2 3=>4]
Foo.a_mat_inline[1, 2] = 5=>6
@test Foo.a_vec_inline[2] === Pair{Int,Any}(5, 6)

@test objectid(Foo.a_vec_int) === Foo.oid_vec_int
@test objectid(Foo.a_mat_int) === Foo.oid_mat_int
@test Foo.oid_vec_int !== Foo.oid_mat_int
end

@eval begin function ccallable_test()
Expand Down

0 comments on commit a26e23a

Please sign in to comment.