diff --git a/crates/cranelift/src/compiler.rs b/crates/cranelift/src/compiler.rs index 3b75e2821d9a..751981439fcc 100644 --- a/crates/cranelift/src/compiler.rs +++ b/crates/cranelift/src/compiler.rs @@ -316,6 +316,10 @@ impl wasmtime_environ::Compiler for Compiler { } } let FunctionBodyData { validator, body } = input; + func_env.func_body_offset = body.get_binary_reader().original_position(); + if let Some(hints) = translation.branch_hints.get(&func_index.as_u32()) { + func_env.set_branch_hints(hints); + } let mut validator = validator.into_validator(mem::take(&mut compiler.cx.validator_allocations)); compiler.cx.func_translator.translate_body( diff --git a/crates/cranelift/src/func_environ.rs b/crates/cranelift/src/func_environ.rs index fa50a47794e3..d71f312bba15 100644 --- a/crates/cranelift/src/func_environ.rs +++ b/crates/cranelift/src/func_environ.rs @@ -216,6 +216,13 @@ pub struct FuncEnvironment<'module_environment> { /// The stack-slot used for exposing Wasm state via debug /// instrumentation, if any, and the builder containing its metadata. pub(crate) state_slot: Option<(ir::StackSlot, FrameStateSlotBuilder)>, + + /// The byte offset of the current function body in the wasm module. + /// Used to convert absolute srcloc offsets to relative offsets for branch hint lookup. + pub(crate) func_body_offset: usize, + + /// Branch hints for the current function (offset -> likely taken). + branch_hints: Option<&'module_environment std::collections::HashMap>, } impl<'module_environment> FuncEnvironment<'module_environment> { @@ -277,6 +284,9 @@ impl<'module_environment> FuncEnvironment<'module_environment> { stack_switching_values_buffer: None, state_slot: None, + + func_body_offset: 0, + branch_hints: None, } } @@ -1203,6 +1213,20 @@ impl<'module_environment> FuncEnvironment<'module_environment> { self.needs_gc_heap } + /// Returns branch hint at `absolute_offset`: `Some(true)` = likely, `Some(false)` = unlikely. + pub fn get_branch_hint(&self, absolute_offset: usize) -> Option { + let hints = self.branch_hints?; + let relative_offset = absolute_offset.checked_sub(self.func_body_offset)? as u32; + hints.get(&relative_offset).copied() + } + + pub(crate) fn set_branch_hints( + &mut self, + hints: &'module_environment std::collections::HashMap, + ) { + self.branch_hints = Some(hints); + } + /// Get the number of Wasm parameters for the given function. pub(crate) fn num_params_for_func(&self, function_index: FuncIndex) -> usize { let ty = self.module.functions[function_index] diff --git a/crates/cranelift/src/translate/code_translator.rs b/crates/cranelift/src/translate/code_translator.rs index e2dc4b539c8c..91d0f858d2bb 100644 --- a/crates/cranelift/src/translate/code_translator.rs +++ b/crates/cranelift/src/translate/code_translator.rs @@ -3965,9 +3965,22 @@ fn translate_br_if( builder: &mut FunctionBuilder, env: &mut FuncEnvironment<'_>, ) { + // Get branch hint before `translate_br_if_args` borrows env mutably. + let offset = builder.srcloc().bits() as usize; + let branch_hint = env.get_branch_hint(offset); + let val = env.stacks.pop1(); let (br_destination, inputs) = translate_br_if_args(relative_depth, env); let next_block = builder.create_block(); + + if let Some(likely) = branch_hint { + if likely { + builder.set_cold_block(next_block); + } else { + builder.set_cold_block(br_destination); + } + } + canonicalise_brif(builder, val, br_destination, inputs, next_block, &[]); builder.seal_block(next_block); // The only predecessor is the current block. diff --git a/crates/environ/src/compile/module_environ.rs b/crates/environ/src/compile/module_environ.rs index 411dc144ffd0..cd3f5b8131ce 100644 --- a/crates/environ/src/compile/module_environ.rs +++ b/crates/environ/src/compile/module_environ.rs @@ -110,6 +110,9 @@ pub struct ModuleTranslation<'data> { /// The type information of the current module made available at the end of the /// validation process. types: Option, + + /// Branch hints parsed from the `metadata.code.branch_hint` custom section. + pub branch_hints: HashMap>, } impl<'data> ModuleTranslation<'data> { @@ -130,6 +133,7 @@ impl<'data> ModuleTranslation<'data> { total_passive_data: 0, code_index: 0, types: None, + branch_hints: HashMap::default(), } } @@ -740,6 +744,17 @@ and for re-adding support for interface types you can see this issue: log::warn!("failed to parse name section {e:?}"); } } + KnownCustom::BranchHints(reader) => { + for func_hints in reader.into_iter().flatten() { + let mut hints = HashMap::new(); + for hint in func_hints.hints.into_iter().flatten() { + hints.insert(hint.func_offset, hint.taken); + } + if !hints.is_empty() { + self.result.branch_hints.insert(func_hints.func, hints); + } + } + } _ => { let name = section.name().trim_end_matches(".dwo"); if name.starts_with(".debug_") { diff --git a/docs/stability-wasm-proposals.md b/docs/stability-wasm-proposals.md index 68607b0ae020..8e985650c92a 100644 --- a/docs/stability-wasm-proposals.md +++ b/docs/stability-wasm-proposals.md @@ -84,17 +84,20 @@ The emoji legend is: | Proposal | Phase 4 | Tests | Finished | Fuzzed | API | C API | |-----------------------------|---------|-------|----------|--------|-----|-------| +| [`branch-hinting`] [^12] | ❌ | ❌ | ✅ | ❌ | ✅ | ✅ | | [`stack-switching`] [^11] | ❌ | 🚧 | 🚧 | ❌ | ❌ | ❌ | [^11]: The stack-switching proposal is a work-in-progress being tracked at [#9465](https://github.com/bytecodealliance/wasmtime/issues/9465). Currently the implementation is only for x86\_64 Linux. +[^12]: Branch hinting is implemented by parsing the `metadata.code.branch_hint` + custom section and marking cold blocks in Cranelift for optimization. + No configuration is required; hints are automatically used when present. ## Unimplemented proposals | Proposal | Tracking Issue | |-------------------------------|----------------| -| [`branch-hinting`] | [#9463](https://github.com/bytecodealliance/wasmtime/issues/9463) | | [`flexible-vectors`] | [#9464](https://github.com/bytecodealliance/wasmtime/issues/9464) | | [`memory-control`] | [#9467](https://github.com/bytecodealliance/wasmtime/issues/9467) | | [`shared-everything-threads`] | [#9466](https://github.com/bytecodealliance/wasmtime/issues/9466) | diff --git a/tests/disas/branch-hints.wat b/tests/disas/branch-hints.wat new file mode 100644 index 000000000000..15b5cbe5d140 --- /dev/null +++ b/tests/disas/branch-hints.wat @@ -0,0 +1,76 @@ +;;! target = "x86_64" +;;! test = "optimize" + +;; Test that branch hints from the `metadata.code.branch_hint` custom section +;; are used to mark cold blocks in the generated code. + +(module + ;; Test br_if with hint that branch is unlikely (not taken). + ;; The branch target block should be marked cold. + (func $unlikely_branch (param i32) (result i32) + (block $target (result i32) + i32.const 0 ;; value to return if branch taken + local.get 0 ;; condition + (@metadata.code.branch_hint "\00") + br_if $target + ;; Fallthrough path (likely) + drop + i32.const 42 + ) + ) + + ;; Test br_if with hint that branch is likely (taken). + ;; The fallthrough block should be marked cold. + (func $likely_branch (param i32) (result i32) + (block $target (result i32) + i32.const 0 ;; value to return if branch taken + local.get 0 ;; condition + (@metadata.code.branch_hint "\01") + br_if $target + ;; Fallthrough path (unlikely, should be cold) + drop + i32.const 42 + ) + ) +) +;; function u0:0(i64 vmctx, i64, i32) -> i32 tail { +;; gv0 = vmctx +;; gv1 = load.i64 notrap aligned readonly gv0+8 +;; gv2 = load.i64 notrap aligned gv1+16 +;; stack_limit = gv2 +;; +;; block0(v0: i64, v1: i64, v2: i32): +;; @0043 v5 = iconst.i32 0 +;; @0047 brif v2, block2(v5), block3 ; v5 = 0 +;; +;; block3: +;; @004a v6 = iconst.i32 42 +;; @004c jump block2(v6) ; v6 = 42 +;; +;; block2(v4: i32) cold: +;; @004d jump block1(v4) +;; +;; block1(v3: i32): +;; @004d return v3 +;; } +;; +;; function u0:1(i64 vmctx, i64, i32) -> i32 tail { +;; gv0 = vmctx +;; gv1 = load.i64 notrap aligned readonly gv0+8 +;; gv2 = load.i64 notrap aligned gv1+16 +;; stack_limit = gv2 +;; +;; block0(v0: i64, v1: i64, v2: i32): +;; @0052 v5 = iconst.i32 0 +;; @0056 brif v2, block2(v5), block3 ; v5 = 0 +;; +;; block3 cold: +;; @0059 v6 = iconst.i32 42 +;; @005b jump block2(v6) ; v6 = 42 +;; +;; block2(v4: i32): +;; @005c jump block1(v4) +;; +;; block1(v3: i32): +;; @005c return v3 +;; }