From d4406e3098ee480b42f0e73a12ae74573d0cab7c Mon Sep 17 00:00:00 2001 From: FUJI Goro Date: Sat, 31 Jan 2026 18:36:26 +0900 Subject: [PATCH 1/2] Implement branch hinting proposal support This commit adds support for the WebAssembly branch hinting proposal by parsing the `metadata.code.branch_hint` custom section and using the hints to mark cold blocks during Cranelift code generation. Implementation details: - Parse branch hints in `ModuleTranslation` from the custom section using wasmparser's `KnownCustom::BranchHints` reader - Store hints as a map from function index to (offset, taken) pairs - Add `get_branch_hint()` helper in `FuncEnvironment` to look up hints by converting absolute byte offsets to function-relative offsets - Apply hints in `translate_br_if()`: when a branch is marked unlikely (`taken=false`), mark the branch target as cold; when marked likely (`taken=true`), mark the fallthrough block as cold Branch hints are always parsed and applied when present in a module; no configuration flag is required. A disassembly test is included to verify cold block annotations, but official WebAssembly spec tests for this proposal are not yet available. Closes #9463 Co-Authored-By: Claude Opus 4.5 --- crates/cranelift/src/compiler.rs | 2 + crates/cranelift/src/func_environ.rs | 34 +++++++++ .../src/translate/code_translator.rs | 14 ++++ crates/environ/src/compile/module_environ.rs | 19 +++++ docs/stability-wasm-proposals.md | 5 +- tests/disas/branch-hints.wat | 76 +++++++++++++++++++ 6 files changed, 149 insertions(+), 1 deletion(-) create mode 100644 tests/disas/branch-hints.wat diff --git a/crates/cranelift/src/compiler.rs b/crates/cranelift/src/compiler.rs index 3b75e2821d9a..87e7896c55f8 100644 --- a/crates/cranelift/src/compiler.rs +++ b/crates/cranelift/src/compiler.rs @@ -316,6 +316,8 @@ impl wasmtime_environ::Compiler for Compiler { } } let FunctionBodyData { validator, body } = input; + // Set the function body offset for branch hint lookup + func_env.func_body_offset = body.get_binary_reader().original_position(); let mut validator = validator.into_validator(mem::take(&mut compiler.cx.validator_allocations)); compiler.cx.func_translator.translate_body( diff --git a/crates/cranelift/src/func_environ.rs b/crates/cranelift/src/func_environ.rs index fa50a47794e3..17b096493949 100644 --- a/crates/cranelift/src/func_environ.rs +++ b/crates/cranelift/src/func_environ.rs @@ -216,6 +216,10 @@ pub struct FuncEnvironment<'module_environment> { /// The stack-slot used for exposing Wasm state via debug /// instrumentation, if any, and the builder containing its metadata. pub(crate) state_slot: Option<(ir::StackSlot, FrameStateSlotBuilder)>, + + /// The byte offset of the current function body in the wasm module. + /// Used to convert absolute srcloc offsets to relative offsets for branch hint lookup. + pub(crate) func_body_offset: usize, } impl<'module_environment> FuncEnvironment<'module_environment> { @@ -277,6 +281,8 @@ impl<'module_environment> FuncEnvironment<'module_environment> { stack_switching_values_buffer: None, state_slot: None, + + func_body_offset: 0, } } @@ -1203,6 +1209,34 @@ impl<'module_environment> FuncEnvironment<'module_environment> { self.needs_gc_heap } + /// Get a branch hint for the current function at the given absolute byte offset. + /// + /// The offset is converted to a relative offset from the function body start + /// before looking up in the branch hints map. + /// + /// Returns `Some(true)` if the branch is likely taken, `Some(false)` if unlikely taken, + /// or `None` if no hint exists for this offset. + pub fn get_branch_hint(&self, absolute_offset: usize) -> Option { + // Extract the DefinedFuncIndex from the current function's key + let def_func_index = match self.key { + FuncKey::DefinedWasmFunction(_, def_func_index) => def_func_index, + _ => return None, + }; + // Convert absolute offset to relative offset from function body start + let relative_offset = absolute_offset.checked_sub(self.func_body_offset)?; + // Convert to full FuncIndex to look up in branch_hints + let func_index = self.module.func_index(def_func_index); + self.translation + .branch_hints + .get(&func_index.as_u32()) + .and_then(|hints| { + hints + .iter() + .find(|(o, _)| *o as usize == relative_offset) + .map(|(_, taken)| *taken) + }) + } + /// Get the number of Wasm parameters for the given function. pub(crate) fn num_params_for_func(&self, function_index: FuncIndex) -> usize { let ty = self.module.functions[function_index] diff --git a/crates/cranelift/src/translate/code_translator.rs b/crates/cranelift/src/translate/code_translator.rs index e2dc4b539c8c..ec5002ade4d8 100644 --- a/crates/cranelift/src/translate/code_translator.rs +++ b/crates/cranelift/src/translate/code_translator.rs @@ -3965,9 +3965,23 @@ fn translate_br_if( builder: &mut FunctionBuilder, env: &mut FuncEnvironment<'_>, ) { + // Check for branch hints before borrowing env mutably. + // The srcloc contains the byte offset of the current instruction. + let offset = builder.srcloc().bits() as usize; + let branch_hint = env.get_branch_hint(offset); + let val = env.stacks.pop1(); let (br_destination, inputs) = translate_br_if_args(relative_depth, env); let next_block = builder.create_block(); + + if let Some(likely) = branch_hint { + if likely { + builder.set_cold_block(next_block); + } else { + builder.set_cold_block(br_destination); + } + } + canonicalise_brif(builder, val, br_destination, inputs, next_block, &[]); builder.seal_block(next_block); // The only predecessor is the current block. diff --git a/crates/environ/src/compile/module_environ.rs b/crates/environ/src/compile/module_environ.rs index 411dc144ffd0..9017cdd0f048 100644 --- a/crates/environ/src/compile/module_environ.rs +++ b/crates/environ/src/compile/module_environ.rs @@ -110,6 +110,13 @@ pub struct ModuleTranslation<'data> { /// The type information of the current module made available at the end of the /// validation process. types: Option, + + /// Branch hints parsed from the `metadata.code.branch_hint` custom section. + /// + /// Maps function index to a list of (func_offset, taken) pairs where + /// func_offset is the byte offset within the function body and taken + /// indicates whether the branch is likely to be taken. + pub branch_hints: HashMap>, } impl<'data> ModuleTranslation<'data> { @@ -130,6 +137,7 @@ impl<'data> ModuleTranslation<'data> { total_passive_data: 0, code_index: 0, types: None, + branch_hints: HashMap::default(), } } @@ -740,6 +748,17 @@ and for re-adding support for interface types you can see this issue: log::warn!("failed to parse name section {e:?}"); } } + KnownCustom::BranchHints(reader) => { + for func_hints in reader.into_iter().flatten() { + let mut hints = Vec::new(); + for hint in func_hints.hints.into_iter().flatten() { + hints.push((hint.func_offset, hint.taken)); + } + if !hints.is_empty() { + self.result.branch_hints.insert(func_hints.func, hints); + } + } + } _ => { let name = section.name().trim_end_matches(".dwo"); if name.starts_with(".debug_") { diff --git a/docs/stability-wasm-proposals.md b/docs/stability-wasm-proposals.md index 68607b0ae020..8e985650c92a 100644 --- a/docs/stability-wasm-proposals.md +++ b/docs/stability-wasm-proposals.md @@ -84,17 +84,20 @@ The emoji legend is: | Proposal | Phase 4 | Tests | Finished | Fuzzed | API | C API | |-----------------------------|---------|-------|----------|--------|-----|-------| +| [`branch-hinting`] [^12] | ❌ | ❌ | ✅ | ❌ | ✅ | ✅ | | [`stack-switching`] [^11] | ❌ | 🚧 | 🚧 | ❌ | ❌ | ❌ | [^11]: The stack-switching proposal is a work-in-progress being tracked at [#9465](https://github.com/bytecodealliance/wasmtime/issues/9465). Currently the implementation is only for x86\_64 Linux. +[^12]: Branch hinting is implemented by parsing the `metadata.code.branch_hint` + custom section and marking cold blocks in Cranelift for optimization. + No configuration is required; hints are automatically used when present. ## Unimplemented proposals | Proposal | Tracking Issue | |-------------------------------|----------------| -| [`branch-hinting`] | [#9463](https://github.com/bytecodealliance/wasmtime/issues/9463) | | [`flexible-vectors`] | [#9464](https://github.com/bytecodealliance/wasmtime/issues/9464) | | [`memory-control`] | [#9467](https://github.com/bytecodealliance/wasmtime/issues/9467) | | [`shared-everything-threads`] | [#9466](https://github.com/bytecodealliance/wasmtime/issues/9466) | diff --git a/tests/disas/branch-hints.wat b/tests/disas/branch-hints.wat new file mode 100644 index 000000000000..15b5cbe5d140 --- /dev/null +++ b/tests/disas/branch-hints.wat @@ -0,0 +1,76 @@ +;;! target = "x86_64" +;;! test = "optimize" + +;; Test that branch hints from the `metadata.code.branch_hint` custom section +;; are used to mark cold blocks in the generated code. + +(module + ;; Test br_if with hint that branch is unlikely (not taken). + ;; The branch target block should be marked cold. + (func $unlikely_branch (param i32) (result i32) + (block $target (result i32) + i32.const 0 ;; value to return if branch taken + local.get 0 ;; condition + (@metadata.code.branch_hint "\00") + br_if $target + ;; Fallthrough path (likely) + drop + i32.const 42 + ) + ) + + ;; Test br_if with hint that branch is likely (taken). + ;; The fallthrough block should be marked cold. + (func $likely_branch (param i32) (result i32) + (block $target (result i32) + i32.const 0 ;; value to return if branch taken + local.get 0 ;; condition + (@metadata.code.branch_hint "\01") + br_if $target + ;; Fallthrough path (unlikely, should be cold) + drop + i32.const 42 + ) + ) +) +;; function u0:0(i64 vmctx, i64, i32) -> i32 tail { +;; gv0 = vmctx +;; gv1 = load.i64 notrap aligned readonly gv0+8 +;; gv2 = load.i64 notrap aligned gv1+16 +;; stack_limit = gv2 +;; +;; block0(v0: i64, v1: i64, v2: i32): +;; @0043 v5 = iconst.i32 0 +;; @0047 brif v2, block2(v5), block3 ; v5 = 0 +;; +;; block3: +;; @004a v6 = iconst.i32 42 +;; @004c jump block2(v6) ; v6 = 42 +;; +;; block2(v4: i32) cold: +;; @004d jump block1(v4) +;; +;; block1(v3: i32): +;; @004d return v3 +;; } +;; +;; function u0:1(i64 vmctx, i64, i32) -> i32 tail { +;; gv0 = vmctx +;; gv1 = load.i64 notrap aligned readonly gv0+8 +;; gv2 = load.i64 notrap aligned gv1+16 +;; stack_limit = gv2 +;; +;; block0(v0: i64, v1: i64, v2: i32): +;; @0052 v5 = iconst.i32 0 +;; @0056 brif v2, block2(v5), block3 ; v5 = 0 +;; +;; block3 cold: +;; @0059 v6 = iconst.i32 42 +;; @005b jump block2(v6) ; v6 = 42 +;; +;; block2(v4: i32): +;; @005c jump block1(v4) +;; +;; block1(v3: i32): +;; @005c return v3 +;; } From bc62088d6f7b31712badb427fd37e18c3c410a95 Mon Sep 17 00:00:00 2001 From: FUJI Goro Date: Tue, 3 Feb 2026 09:16:13 +0900 Subject: [PATCH 2/2] Use HashMap for branch hints lookup MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Change branch hints storage from Vec<(u32, bool)> to HashMap for O(1) lookup instead of O(n) linear search per lookup. The previous implementation had O(n²) complexity when processing many branch hints since each lookup scanned the entire vector. The HashMap approach also handles untrusted input robustly without assumptions about hint ordering in the custom section. Co-Authored-By: Claude Opus 4.5 --- crates/cranelift/src/compiler.rs | 4 +- crates/cranelift/src/func_environ.rs | 40 +++++++------------ .../src/translate/code_translator.rs | 3 +- crates/environ/src/compile/module_environ.rs | 10 ++--- 4 files changed, 22 insertions(+), 35 deletions(-) diff --git a/crates/cranelift/src/compiler.rs b/crates/cranelift/src/compiler.rs index 87e7896c55f8..751981439fcc 100644 --- a/crates/cranelift/src/compiler.rs +++ b/crates/cranelift/src/compiler.rs @@ -316,8 +316,10 @@ impl wasmtime_environ::Compiler for Compiler { } } let FunctionBodyData { validator, body } = input; - // Set the function body offset for branch hint lookup func_env.func_body_offset = body.get_binary_reader().original_position(); + if let Some(hints) = translation.branch_hints.get(&func_index.as_u32()) { + func_env.set_branch_hints(hints); + } let mut validator = validator.into_validator(mem::take(&mut compiler.cx.validator_allocations)); compiler.cx.func_translator.translate_body( diff --git a/crates/cranelift/src/func_environ.rs b/crates/cranelift/src/func_environ.rs index 17b096493949..d71f312bba15 100644 --- a/crates/cranelift/src/func_environ.rs +++ b/crates/cranelift/src/func_environ.rs @@ -220,6 +220,9 @@ pub struct FuncEnvironment<'module_environment> { /// The byte offset of the current function body in the wasm module. /// Used to convert absolute srcloc offsets to relative offsets for branch hint lookup. pub(crate) func_body_offset: usize, + + /// Branch hints for the current function (offset -> likely taken). + branch_hints: Option<&'module_environment std::collections::HashMap>, } impl<'module_environment> FuncEnvironment<'module_environment> { @@ -283,6 +286,7 @@ impl<'module_environment> FuncEnvironment<'module_environment> { state_slot: None, func_body_offset: 0, + branch_hints: None, } } @@ -1209,32 +1213,18 @@ impl<'module_environment> FuncEnvironment<'module_environment> { self.needs_gc_heap } - /// Get a branch hint for the current function at the given absolute byte offset. - /// - /// The offset is converted to a relative offset from the function body start - /// before looking up in the branch hints map. - /// - /// Returns `Some(true)` if the branch is likely taken, `Some(false)` if unlikely taken, - /// or `None` if no hint exists for this offset. + /// Returns branch hint at `absolute_offset`: `Some(true)` = likely, `Some(false)` = unlikely. pub fn get_branch_hint(&self, absolute_offset: usize) -> Option { - // Extract the DefinedFuncIndex from the current function's key - let def_func_index = match self.key { - FuncKey::DefinedWasmFunction(_, def_func_index) => def_func_index, - _ => return None, - }; - // Convert absolute offset to relative offset from function body start - let relative_offset = absolute_offset.checked_sub(self.func_body_offset)?; - // Convert to full FuncIndex to look up in branch_hints - let func_index = self.module.func_index(def_func_index); - self.translation - .branch_hints - .get(&func_index.as_u32()) - .and_then(|hints| { - hints - .iter() - .find(|(o, _)| *o as usize == relative_offset) - .map(|(_, taken)| *taken) - }) + let hints = self.branch_hints?; + let relative_offset = absolute_offset.checked_sub(self.func_body_offset)? as u32; + hints.get(&relative_offset).copied() + } + + pub(crate) fn set_branch_hints( + &mut self, + hints: &'module_environment std::collections::HashMap, + ) { + self.branch_hints = Some(hints); } /// Get the number of Wasm parameters for the given function. diff --git a/crates/cranelift/src/translate/code_translator.rs b/crates/cranelift/src/translate/code_translator.rs index ec5002ade4d8..91d0f858d2bb 100644 --- a/crates/cranelift/src/translate/code_translator.rs +++ b/crates/cranelift/src/translate/code_translator.rs @@ -3965,8 +3965,7 @@ fn translate_br_if( builder: &mut FunctionBuilder, env: &mut FuncEnvironment<'_>, ) { - // Check for branch hints before borrowing env mutably. - // The srcloc contains the byte offset of the current instruction. + // Get branch hint before `translate_br_if_args` borrows env mutably. let offset = builder.srcloc().bits() as usize; let branch_hint = env.get_branch_hint(offset); diff --git a/crates/environ/src/compile/module_environ.rs b/crates/environ/src/compile/module_environ.rs index 9017cdd0f048..cd3f5b8131ce 100644 --- a/crates/environ/src/compile/module_environ.rs +++ b/crates/environ/src/compile/module_environ.rs @@ -112,11 +112,7 @@ pub struct ModuleTranslation<'data> { types: Option, /// Branch hints parsed from the `metadata.code.branch_hint` custom section. - /// - /// Maps function index to a list of (func_offset, taken) pairs where - /// func_offset is the byte offset within the function body and taken - /// indicates whether the branch is likely to be taken. - pub branch_hints: HashMap>, + pub branch_hints: HashMap>, } impl<'data> ModuleTranslation<'data> { @@ -750,9 +746,9 @@ and for re-adding support for interface types you can see this issue: } KnownCustom::BranchHints(reader) => { for func_hints in reader.into_iter().flatten() { - let mut hints = Vec::new(); + let mut hints = HashMap::new(); for hint in func_hints.hints.into_iter().flatten() { - hints.push((hint.func_offset, hint.taken)); + hints.insert(hint.func_offset, hint.taken); } if !hints.is_empty() { self.result.branch_hints.insert(func_hints.func, hints);