Skip to content

Commit

Permalink
Improve __crystal_once performance
Browse files Browse the repository at this point in the history
Based on the PR by @BlobCodes:
crystal-lang#15216

The performance improvement is two-fold:

1. the usage of a i8 instead of an i1 boolean to have 3 states instead
   of 2, which permits to quickly detect recursive calls without an
   array;
2. inline tricks to optimize the fast and slow paths.

Unlike the PR:

1. Doesn't use atomics: it already uses a mutex that guarantees acquire
   release memory ordering semantics, and __crystal_once_init is only
   ever called in the main thread before any other thread is started.
2. Removes the need for a state maintained by the compiler, yet keeps
   forward and backward compatibility (both signatures are supported).
  • Loading branch information
ysbaddaden committed Jan 9, 2025
1 parent 576a14c commit 43421ce
Show file tree
Hide file tree
Showing 4 changed files with 145 additions and 65 deletions.
6 changes: 3 additions & 3 deletions src/compiler/crystal/codegen/class_var.cr
Original file line number Diff line number Diff line change
Expand Up @@ -25,8 +25,8 @@ class Crystal::CodeGenVisitor
initialized_flag_name = class_var_global_initialized_name(class_var)
initialized_flag = @main_mod.globals[initialized_flag_name]?
unless initialized_flag
initialized_flag = @main_mod.globals.add(@main_llvm_context.int1, initialized_flag_name)
initialized_flag.initializer = @main_llvm_context.int1.const_int(0)
initialized_flag = @main_mod.globals.add(@main_llvm_context.int8, initialized_flag_name)
initialized_flag.initializer = @main_llvm_context.int8.const_int(0)
initialized_flag.linkage = LLVM::Linkage::Internal if @single_module
initialized_flag.thread_local = true if class_var.thread_local?
end
Expand Down Expand Up @@ -61,7 +61,7 @@ class Crystal::CodeGenVisitor
initialized_flag_name = class_var_global_initialized_name(class_var)
initialized_flag = @llvm_mod.globals[initialized_flag_name]?
unless initialized_flag
initialized_flag = @llvm_mod.globals.add(llvm_context.int1, initialized_flag_name)
initialized_flag = @llvm_mod.globals.add(llvm_context.int8, initialized_flag_name)
initialized_flag.thread_local = true if class_var.thread_local?
end
end
Expand Down
4 changes: 2 additions & 2 deletions src/compiler/crystal/codegen/const.cr
Original file line number Diff line number Diff line change
Expand Up @@ -64,8 +64,8 @@ class Crystal::CodeGenVisitor
initialized_flag_name = const.initialized_llvm_name
initialized_flag = @main_mod.globals[initialized_flag_name]?
unless initialized_flag
initialized_flag = @main_mod.globals.add(@main_llvm_context.int1, initialized_flag_name)
initialized_flag.initializer = @main_llvm_context.int1.const_int(0)
initialized_flag = @main_mod.globals.add(@main_llvm_context.int8, initialized_flag_name)
initialized_flag.initializer = @main_llvm_context.int8.const_int(0)
initialized_flag.linkage = LLVM::Linkage::Internal if @single_module
end
initialized_flag
Expand Down
51 changes: 33 additions & 18 deletions src/compiler/crystal/codegen/once.cr
Original file line number Diff line number Diff line change
Expand Up @@ -7,31 +7,46 @@ class Crystal::CodeGenVisitor
if once_init_fun = typed_fun?(@main_mod, ONCE_INIT)
once_init_fun = check_main_fun ONCE_INIT, once_init_fun

once_state_global = @main_mod.globals.add(once_init_fun.type.return_type, ONCE_STATE)
once_state_global.linkage = LLVM::Linkage::Internal if @single_module
once_state_global.initializer = once_init_fun.type.return_type.null

state = call once_init_fun
store state, once_state_global
if once_init_fun.type.return_type.void?
call once_init_fun
else
# legacy (kept for backward compatibility): the compiler must save the
# state returned by __crystal_once_init
once_state_global = @main_mod.globals.add(once_init_fun.type.return_type, ONCE_STATE)
once_state_global.linkage = LLVM::Linkage::Internal if @single_module
once_state_global.initializer = once_init_fun.type.return_type.null

state = call once_init_fun
store state, once_state_global
end
end
end

def run_once(flag, func : LLVMTypedFunction)
once_fun = main_fun(ONCE)
once_init_fun = main_fun(ONCE_INIT)

# both of these should be Void*
once_state_type = once_init_fun.type.return_type
once_initializer_type = once_fun.func.params.last.type
once_fun_params = once_fun.func.params
once_initializer_type = once_fun_params.last.type # must be Void*
initializer = pointer_cast(func.func.to_value, once_initializer_type)

once_state_global = @llvm_mod.globals[ONCE_STATE]? || begin
global = @llvm_mod.globals.add(once_state_type, ONCE_STATE)
global.linkage = LLVM::Linkage::External
global
if once_fun_params.size == 2
args = [flag, initializer]
else
# legacy (kept for backward compatibility): the compiler must pass the
# state returned by __crystal_once_init to __crystal_once as the first
# argument
once_init_fun = main_fun(ONCE_INIT)
once_state_type = once_init_fun.type.return_type # must be Void*

once_state_global = @llvm_mod.globals[ONCE_STATE]? || begin
global = @llvm_mod.globals.add(once_state_type, ONCE_STATE)
global.linkage = LLVM::Linkage::External
global
end

state = load(once_state_type, once_state_global)
args = [state, flag, initializer]
end

state = load(once_state_type, once_state_global)
initializer = pointer_cast(func.func.to_value, once_initializer_type)
call once_fun, [state, flag, initializer]
call once_fun, args
end
end
149 changes: 107 additions & 42 deletions src/crystal/once.cr
Original file line number Diff line number Diff line change
@@ -1,54 +1,119 @@
# This file defines the functions `__crystal_once_init` and `__crystal_once` expected
# by the compiler. `__crystal_once` is called each time a constant or class variable
# has to be initialized and is its responsibility to verify the initializer is executed
# only once. `__crystal_once_init` is executed only once at the beginning of the program
# and the result is passed on each call to `__crystal_once`.

# This implementation uses an array to store the initialization flag pointers for each value
# to find infinite loops and raise an error. In multithread mode a mutex is used to
# avoid race conditions between threads.

# :nodoc:
class Crystal::OnceState
@rec = [] of Bool*

def once(flag : Bool*, initializer : Void*)
unless flag.value
if @rec.includes?(flag)
# This file defines two functions expected by the compiler:
#
# - `__crystal_once_init`: executed only once at the beginning of the program
# and, for the legacy implementation, the result is passed on each call to
# `__crystal_once`.
#
# - `__crystal_once`: called each time a constant or class variable has to be
# initialized and is its responsibility to verify the initializer is executed
# only once and to fail on recursion.

# In multithread mode a mutex is used to avoid race conditions between threads.
#
# On Win32, `Crystal::System::FileDescriptor#@@reader_thread` spawns a new
# thread even without the `preview_mt` flag, and the thread can also reference
# Crystal constants, leading to race conditions, so we always enable the mutex.

{% if compare_versions(Crystal::VERSION, "1.15.0-dev") >= 0 %}
# This implementation uses an enum over the initialization flag pointer for
# each value to find infinite loops and raise an error.

module Crystal
enum OnceState : Int8
Processing = -1
Uninitialized = 0
Initialized = 1
end

{% if flag?(:preview_mt) || flag?(:win32) %}
@@once_mutex = uninitialized Mutex
class_property once_mutex : Mutex
{% end %}

# Using @[NoInline] so LLVM optimizes for the hot path (var already
# initialized).
@[NoInline]
def self.once(flag : OnceState*, initializer : Void*) : Nil
{% if flag?(:preview_mt) || flag?(:win32) %}
Crystal.once_mutex.synchronize { once_exec(flag, initializer) }
{% else %}
once_exec(flag, initializer)
{% end %}
end

private def self.once_exec(flag : OnceState*, initializer : Void*) : Nil
case flag.value
in .initialized?
return
in .uninitialized?
flag.value = :processing
Proc(Nil).new(initializer, Pointer(Void).null).call
flag.value = :initialized
in .processing?
raise "Recursion while initializing class variables and/or constants"
end
@rec << flag
end
end

Proc(Nil).new(initializer, Pointer(Void).null).call
flag.value = true
# :nodoc:
fun __crystal_once_init : Nil
{% if flag?(:preview_mt) || flag?(:win32) %}
Crystal.once_mutex = Mutex.new(:reentrant)
{% end %}
end

@rec.pop
end
# :nodoc:
#
# Using `@[AlwaysInline]` allows LLVM to optimize const accesses. Since this
# is a `fun` the function will still appear in the symbol table, though it
# will never be called.
@[AlwaysInline]
fun __crystal_once(flag : Int8*, initializer : Void*) : Nil
flag = flag.as(Crystal::OnceState*)
Crystal.once(flag, initializer) unless flag.value.initialized?
end
{% else %}
# This implementation uses a global array to store the initialization flag
# pointers for each value to find infinite loops and raise an error.

# on Win32, `Crystal::System::FileDescriptor#@@reader_thread` spawns a new
# thread even without the `preview_mt` flag, and the thread can also reference
# Crystal constants, leading to race conditions, so we always enable the mutex
# TODO: can this be improved?
{% if flag?(:preview_mt) || flag?(:win32) %}
@mutex = Mutex.new(:reentrant)
# :nodoc:
class Crystal::OnceState
@rec = [] of Bool*

def once(flag : Bool*, initializer : Void*)
unless flag.value
@mutex.synchronize do
previous_def
if @rec.includes?(flag)
raise "Recursion while initializing class variables and/or constants"
end
@rec << flag

Proc(Nil).new(initializer, Pointer(Void).null).call
flag.value = true

@rec.pop
end
end
{% end %}
end

# :nodoc:
fun __crystal_once_init : Void*
Crystal::OnceState.new.as(Void*)
end

# :nodoc:
fun __crystal_once(state : Void*, flag : Bool*, initializer : Void*)
state.as(Crystal::OnceState).once(flag, initializer)
end

{% if flag?(:preview_mt) || flag?(:win32) %}
@mutex = Mutex.new(:reentrant)

def once(flag : Bool*, initializer : Void*)
unless flag.value
@mutex.synchronize do
previous_def
end
end
end
{% end %}
end

# :nodoc:
fun __crystal_once_init : Void*
Crystal::OnceState.new.as(Void*)
end

# :nodoc:
fun __crystal_once(state : Void*, flag : Bool*, initializer : Void*)
state.as(Crystal::OnceState).once(flag, initializer)
end
{% end %}

0 comments on commit 43421ce

Please sign in to comment.