Skip to content

Commit

Permalink
threading deadlock: change jl_fptr_wait_for_compiled to actually comp…
Browse files Browse the repository at this point in the history
…ile code

The jl_fptr_wait_for_compiled state merely means it could compile that
code, but in many circumstances, it will not actually compile that code
until a long delay: when either all edges are satisfied or it is
demanded to run immediately. The previous logic did not handle that
possibility leading to deadlocks (possible even on one thread).

A high rate of failure was shown on running the following CI test:

    $ ./julia -t 20 -q <<EOF
    for i = 1:1000
    i % 10 == 0 && @show i
    (@eval function _atthreads_greedy_dynamic_schedule()
        inc = Threads.Atomic{Int}(0)
        Threads.@threads :greedy for _ = 1:Threads.threadpoolsize()
            Threads.@threads :dynamic for _ = 1:Threads.threadpoolsize()
                Threads.atomic_add!(inc, 1)
            end
        end
        return inc[]
    end)() == Threads.threadpoolsize() * Threads.threadpoolsize()
    end
    EOF
  • Loading branch information
vtjnash committed Nov 15, 2024
1 parent caa2f7d commit 6c458db
Show file tree
Hide file tree
Showing 3 changed files with 13 additions and 19 deletions.
21 changes: 7 additions & 14 deletions src/gf.c
Original file line number Diff line number Diff line change
Expand Up @@ -2654,15 +2654,15 @@ void jl_read_codeinst_invoke(jl_code_instance_t *ci, uint8_t *specsigflags, jl_c
uint8_t flags = jl_atomic_load_acquire(&ci->specsigflags); // happens-before for subsequent read of fptr
while (1) {
jl_callptr_t initial_invoke = jl_atomic_load_acquire(&ci->invoke); // happens-before for subsequent read of fptr
while (initial_invoke == jl_fptr_wait_for_compiled_addr) {
if (initial_invoke == jl_fptr_wait_for_compiled_addr) {
if (!waitcompile) {
*invoke = NULL;
*specptr = NULL;
*specsigflags = 0b00;
return;
}
jl_cpu_pause();
initial_invoke = jl_atomic_load_acquire(&ci->invoke);
jl_compile_codeinst(ci);
initial_invoke = jl_atomic_load_acquire(&ci->invoke); // happens-before for subsequent read of fptr
}
void *fptr = jl_atomic_load_relaxed(&ci->specptr.fptr);
if (initial_invoke == NULL || fptr == NULL) {
Expand Down Expand Up @@ -2759,14 +2759,14 @@ jl_code_instance_t *jl_compile_method_internal(jl_method_instance_t *mi, size_t
jl_code_instance_t *unspec = jl_atomic_load_relaxed(&unspecmi->cache);
jl_callptr_t unspec_invoke = NULL;
if (unspec && (unspec_invoke = jl_atomic_load_acquire(&unspec->invoke))) {
jl_read_codeinst_invoke(unspec, &specsigflags, &invoke, &fptr, 1);
jl_code_instance_t *codeinst = jl_new_codeinst(mi, jl_nothing,
(jl_value_t*)jl_any_type, (jl_value_t*)jl_any_type, NULL, NULL,
0, 1, ~(size_t)0, 0, jl_nothing, 0, NULL, NULL);
codeinst->rettype_const = unspec->rettype_const;
uint8_t specsigflags;
jl_callptr_t invoke;
void *fptr;
jl_read_codeinst_invoke(unspec, &specsigflags, &invoke, &fptr, 1);
jl_atomic_store_relaxed(&codeinst->specptr.fptr, fptr);
jl_atomic_store_relaxed(&codeinst->invoke, invoke);
// unspec is probably not specsig, but might be using specptr
Expand Down Expand Up @@ -2864,14 +2864,14 @@ jl_code_instance_t *jl_compile_method_internal(jl_method_instance_t *mi, size_t
jl_typeinf_timing_end(start, is_recompile);
return ucache;
}
jl_read_codeinst_invoke(ucache, &specsigflags, &invoke, &fptr, 1);
codeinst = jl_new_codeinst(mi, jl_nothing,
(jl_value_t*)jl_any_type, (jl_value_t*)jl_any_type, NULL, NULL,
0, 1, ~(size_t)0, 0, jl_nothing, 0, NULL, NULL);
codeinst->rettype_const = ucache->rettype_const;
uint8_t specsigflags;
jl_callptr_t invoke;
void *fptr;
jl_read_codeinst_invoke(ucache, &specsigflags, &invoke, &fptr, 1);
// unspec is always not specsig, but might use specptr
jl_atomic_store_relaxed(&codeinst->specptr.fptr, fptr);
jl_atomic_store_relaxed(&codeinst->invoke, invoke);
Expand Down Expand Up @@ -2906,16 +2906,9 @@ jl_value_t *jl_fptr_sparam(jl_value_t *f, jl_value_t **args, uint32_t nargs, jl_

jl_value_t *jl_fptr_wait_for_compiled(jl_value_t *f, jl_value_t **args, uint32_t nargs, jl_code_instance_t *m)
{
// This relies on the invariant that the JIT will set the invoke ptr immediately upon adding `m` to itself.
size_t nthreads = jl_atomic_load_relaxed(&jl_n_threads);
// This should only be possible if there's more than one thread. If not, either there's a bug somewhere
// that resulted in this not getting cleared, or we're about to deadlock. Either way, that's bad.
if (nthreads == 1) {
jl_error("Internal error: Reached jl_fptr_wait_for_compiled in single-threaded execution.");
}
jl_callptr_t invoke = jl_atomic_load_acquire(&m->invoke);
while (invoke == &jl_fptr_wait_for_compiled) {
jl_cpu_pause();
if (invoke == &jl_fptr_wait_for_compiled) {
jl_compile_codeinst(m);
invoke = jl_atomic_load_acquire(&m->invoke);
}
return invoke(f, args, nargs, m);
Expand Down
9 changes: 5 additions & 4 deletions src/jitlayers.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -278,6 +278,7 @@ static int jl_analyze_workqueue(jl_code_instance_t *callee, jl_codegen_params_t
// But it must be consistent with the following invokenames lookup, which is protected by the engine_lock
uint8_t specsigflags;
void *fptr;
void jl_read_codeinst_invoke(jl_code_instance_t *ci, uint8_t *specsigflags, jl_callptr_t *invoke, void **specptr, int waitcompile) JL_NOTSAFEPOINT; // not a safepoint (or deadlock) in this file due to 0 parameter
jl_read_codeinst_invoke(codeinst, &specsigflags, &invoke, &fptr, 0);
//if (specsig ? specsigflags & 0b1 : invoke == jl_fptr_args_addr)
if (invoke == jl_fptr_args_addr) {
Expand Down Expand Up @@ -697,13 +698,13 @@ static void recursive_compile_graph(
// and adds the result to the jitlayers
// (and the shadow module),
// and generates code for it
static jl_callptr_t _jl_compile_codeinst(
static void _jl_compile_codeinst(
jl_code_instance_t *codeinst,
jl_code_info_t *src)
{
recursive_compile_graph(codeinst, src);
jl_compile_codeinst_now(codeinst);
return jl_atomic_load_acquire(&codeinst->invoke);
assert(jl_is_compiled_codeinst(codeinst));
}


Expand Down Expand Up @@ -819,7 +820,7 @@ extern "C" JL_DLLEXPORT_CODEGEN
int jl_compile_codeinst_impl(jl_code_instance_t *ci)
{
int newly_compiled = 0;
if (jl_atomic_load_relaxed(&ci->invoke) == NULL) {
if (!jl_is_compiled_codeinst(ci)) {
++SpecFPtrCount;
uint64_t start = jl_typeinf_timing_begin();
_jl_compile_codeinst(ci, NULL);
Expand Down Expand Up @@ -862,7 +863,7 @@ void jl_generate_fptr_for_unspecialized_impl(jl_code_instance_t *unspec)
if (src) {
// TODO: first prepare recursive_compile_graph(unspec, src) before taking this lock to avoid recursion?
JL_LOCK(&jitlock); // TODO: use a better lock
if (jl_atomic_load_relaxed(&unspec->invoke) == NULL) {
if (!jl_is_compiled_codeinst(unspec)) {
assert(jl_is_code_info(src));
++UnspecFPtrCount;
jl_debuginfo_t *debuginfo = src->debuginfo;
Expand Down
2 changes: 1 addition & 1 deletion src/julia_internal.h
Original file line number Diff line number Diff line change
Expand Up @@ -677,7 +677,7 @@ JL_DLLEXPORT jl_code_instance_t *jl_get_method_inferred(
jl_method_instance_t *mi JL_PROPAGATES_ROOT, jl_value_t *rettype,
size_t min_world, size_t max_world, jl_debuginfo_t *di, jl_svec_t *edges);
JL_DLLEXPORT jl_method_instance_t *jl_get_unspecialized(jl_method_t *def JL_PROPAGATES_ROOT);
JL_DLLEXPORT void jl_read_codeinst_invoke(jl_code_instance_t *ci, uint8_t *specsigflags, jl_callptr_t *invoke, void **specptr, int waitcompile) JL_NOTSAFEPOINT;
JL_DLLEXPORT void jl_read_codeinst_invoke(jl_code_instance_t *ci, uint8_t *specsigflags, jl_callptr_t *invoke, void **specptr, int waitcompile);
JL_DLLEXPORT jl_method_instance_t *jl_method_match_to_mi(jl_method_match_t *match, size_t world, size_t min_valid, size_t max_valid, int mt_cache);

JL_DLLEXPORT jl_code_instance_t *jl_new_codeinst_uninit(jl_method_instance_t *mi, jl_value_t *owner);
Expand Down

0 comments on commit 6c458db

Please sign in to comment.