@@ -42,19 +42,19 @@ void initBindings(pybind11::module_& m)
4242 py::arg (" output_sf" ) = std::nullopt , py::arg (" workspace_" ) = std::nullopt , py::arg (" sequence_length" ),
4343 py::arg (" host_past_key_value_lengths" ), py::arg (" host_total_kv_lens" ), py::arg (" context_lengths" ),
4444 py::arg (" host_context_lengths" ), py::arg (" host_request_types" ),
45- py::arg (" kv_cache_block_offsets" ) = std::nullopt , py::arg (" host_kv_cache_block_offsets " ) = std::nullopt ,
46- py::arg (" host_kv_cache_pool_pointers " ) = std::nullopt , py::arg (" host_kv_cache_pool_mapping " ) = std::nullopt ,
47- py::arg (" cache_indirection " ) = std::nullopt , py::arg (" kv_scale_orig_quant " ) = std::nullopt ,
48- py::arg (" kv_scale_quant_orig " ) = std::nullopt , py::arg (" out_scale " ) = std::nullopt ,
49- py::arg (" rotary_inv_freq " ) = std::nullopt , py::arg (" rotary_cos_sin " ) = std::nullopt ,
50- py::arg (" latent_cache " ) = std::nullopt , py::arg (" q_pe " ) = std::nullopt ,
51- py::arg (" block_ids_per_seq " ) = std::nullopt , py::arg (" attention_sinks " ) = std:: nullopt , py::arg (" is_fused_qkv " ),
52- py::arg (" update_kv_cache " ), py::arg (" predicted_tokens_per_seq " ), py::arg (" layer_idx " ), py::arg (" num_heads " ),
53- py::arg (" num_kv_heads " ), py::arg (" head_size " ) , py::arg (" tokens_per_block " ) = std:: nullopt ,
54- py::arg (" max_num_requests " ), py::arg (" max_context_length " ), py::arg (" attention_window_size " ),
55- py::arg (" sink_token_length " ), py::arg (" beam_width " ), py::arg (" mask_type " ), py::arg (" quant_mode " ),
56- py::arg (" q_scaling " ), py::arg (" position_embedding_type " ), py::arg (" rotary_embedding_dim " ),
57- py::arg (" rotary_embedding_base " ), py::arg ( " rotary_embedding_scale_type" ), py::arg (" rotary_embedding_scales" ),
45+ py::arg (" kv_cache_block_offsets" ) = std::nullopt , py::arg (" host_kv_cache_pool_pointers " ) = std::nullopt ,
46+ py::arg (" host_kv_cache_pool_mapping " ) = std::nullopt , py::arg (" cache_indirection " ) = std::nullopt ,
47+ py::arg (" kv_scale_orig_quant " ) = std::nullopt , py::arg (" kv_scale_quant_orig " ) = std::nullopt ,
48+ py::arg (" out_scale " ) = std::nullopt , py::arg (" rotary_inv_freq " ) = std::nullopt ,
49+ py::arg (" rotary_cos_sin " ) = std::nullopt , py::arg (" latent_cache " ) = std::nullopt ,
50+ py::arg (" q_pe " ) = std::nullopt , py::arg (" block_ids_per_seq " ) = std::nullopt ,
51+ py::arg (" attention_sinks " ) = std::nullopt , py::arg (" is_fused_qkv " ) , py::arg (" update_kv_cache " ),
52+ py::arg (" predicted_tokens_per_seq " ), py::arg (" layer_idx " ), py::arg (" num_heads " ), py::arg (" num_kv_heads " ),
53+ py::arg (" head_size " ), py::arg (" tokens_per_block " ) = std:: nullopt , py::arg (" max_num_requests " ) ,
54+ py::arg (" max_context_length " ), py::arg (" attention_window_size " ), py::arg (" sink_token_length " ),
55+ py::arg (" beam_width " ), py::arg (" mask_type " ), py::arg (" quant_mode " ), py::arg (" q_scaling " ),
56+ py::arg (" position_embedding_type " ), py::arg (" rotary_embedding_dim " ), py::arg (" rotary_embedding_base " ),
57+ py::arg (" rotary_embedding_scale_type" ), py::arg (" rotary_embedding_scales" ),
5858 py::arg (" rotary_embedding_max_position_info" ), py::arg (" use_paged_context_fmha" ),
5959 py::arg (" attention_input_type" ) = std::nullopt , py::arg (" is_mla_enable" ),
6060 py::arg (" chunked_prefill_buffer_batch_size" ) = std::nullopt , py::arg (" q_lora_rank" ) = std::nullopt ,
0 commit comments