configure max generation tokens for vllm/sglang

psyloy · psyloy · commit 9268115212fa · 2026-01-23T11:14:58.000+08:00
diff --git a/verl/experimental/agent_loop/agent_loop.py b/verl/experimental/agent_loop/agent_loop.py
@@ -433,6 +433,12 @@ async def generate_sequences(self, batch: DataProto) -> DataProto:
             logprobs=config.calculate_log_probs,
         )
 
+        # configure max generation tokens for vllm/sglang
+        for param_name in ["max_tokens", "max_new_tokens"]:
+            param_value = getattr(config, param_name, None)
+            if param_value is not None:
+                sampling_params[param_value] = param_value
+
         # override sampling params for validation
         if batch.meta_info.get("validate", False):
             sampling_params["top_p"] = config.val_kwargs.top_p
diff --git a/verl/experimental/fully_async_policy/agent_loop/agent_loop.py b/verl/experimental/fully_async_policy/agent_loop/agent_loop.py
@@ -101,13 +101,15 @@ async def generate_sequences_no_post(
         sampling_params = dict(
             temperature=config.temperature,
             top_p=config.top_p,
+            top_k=config.top_k,
             repetition_penalty=1.0,
             logprobs=config.calculate_log_probs,
         )
 
         # override sampling params for validation
         if batch.meta_info.get("validate", False):
             sampling_params["top_p"] = config.val_kwargs.top_p
+            sampling_params["top_k"] = config.val_kwargs.top_k
             sampling_params["temperature"] = config.val_kwargs.temperature
 
         if "agent_name" not in batch.non_tensor_batch:
diff --git a/verl/trainer/config/rollout/rollout.yaml b/verl/trainer/config/rollout/rollout.yaml
@@ -16,6 +16,12 @@ top_k: -1
 # Top-p sampling parameter. Default 1.0.
 top_p: 1
 
+# max number of tokens to generate for vllm.
+max_tokens: null
+
+# max number of tokens to generate for sglang.
+max_new_tokens: null
+
 # typically the same as data max prompt length
 # same as data.max_prompt_length if it exists
 prompt_length: ${oc.select:data.max_prompt_length,512}
diff --git a/verl/workers/config/rollout.py b/verl/workers/config/rollout.py
@@ -130,6 +130,8 @@ class RolloutConfig(BaseConfig):
     do_sample: bool = True
     n: int = 1
     repetition_penalty: float = 1.0
+    max_tokens: Optional[list] = None
+    max_new_tokens: Optional[list] = None
 
     # Early termination threshold for multi-turn rollout in sglang.
     # Abort remaining requests when (1 - over_sample_rate) * total_requests are completed.
diff --git a/verl/workers/rollout/sglang_rollout/async_sglang_server.py b/verl/workers/rollout/sglang_rollout/async_sglang_server.py
@@ -327,6 +327,7 @@ async def generate(
                 f"({self.config.max_model_len})."
             )
 
+        # Determine max_new_tokens from sampling_params or use configured response_length as default
         if "max_new_tokens" in sampling_params:
             max_new_tokens = sampling_params.pop("max_new_tokens")
         elif "max_tokens" in sampling_params:

Original file line number	Diff line number	Diff line change
`@@ -327,6 +327,7 @@ async def generate(`
`327`	`327`	`f"({self.config.max_model_len})."`
`328`	`328`	`)`
`329`	`329`
	`330`	`+ # Determine max_new_tokens from sampling_params or use configured response_length as default`
`330`	`331`	`if "max_new_tokens" in sampling_params:`
`331`	`332`	`max_new_tokens = sampling_params.pop("max_new_tokens")`
`332`	`333`	`elif "max_tokens" in sampling_params:`