Skip to content

Commit a23a530

Browse files
committed
feat: implement automatic history truncation for token limit errors
- Add automatic detection of token limit errors (GitHub, OpenAI, etc.) - Implement smart truncation that preserves system message and recent context - Use 80% of last successful message size as truncation target - Add automatic retry after truncation with fallback error handling - Remove stored state in favor of on-demand size calculation Fixes token limit errors like 'Request body too large for gpt-4.1 model. Max size: 8000 tokens'
1 parent b48bfce commit a23a530

File tree

1 file changed

+95
-18
lines changed

1 file changed

+95
-18
lines changed

python/translate.py

Lines changed: 95 additions & 18 deletions
Original file line numberDiff line numberDiff line change
@@ -329,24 +329,57 @@ async def read_mcp_resource(self, resource_uri: str, server_name: str = None) ->
329329
logger.error(f"❌ Failed to read MCP resource: {e}")
330330
return {"error": str(e), "isError": True}
331331

332-
async def gpt_call(self, messages: List[Dict[str, str]]) -> str:
333-
"""Make a call to the LLM."""
334-
try:
335-
# Validate messages to prevent empty content that causes Bedrock API errors
336-
validated_messages = []
337-
for msg in messages:
338-
content = msg.get("content", "").strip()
339-
if not content:
340-
logger.warning(f"⚠️ Skipping empty message with role: {msg.get('role', 'unknown')}")
341-
continue
342-
validated_messages.append({
343-
"role": msg["role"],
344-
"content": content
345-
})
332+
def _calculate_messages_size(self, messages: List[Dict[str, str]]) -> int:
333+
"""Calculate approximate size of messages in characters."""
334+
total_size = 0
335+
for msg in messages:
336+
total_size += len(msg.get("content", "")) + len(msg.get("role", ""))
337+
return total_size
338+
339+
def _truncate_messages(self, messages: List[Dict[str, str]], target_size: int) -> List[Dict[str, str]]:
340+
"""Truncate messages to fit within target size, keeping system message and recent messages."""
341+
if not messages:
342+
return messages
346343

347-
if not validated_messages:
348-
raise ValueError("No valid messages to send to LLM")
344+
# Always keep the system message (first message)
345+
truncated = [messages[0]] if messages[0].get("role") == "system" else []
346+
current_size = self._calculate_messages_size(truncated)
347+
348+
# Add messages from the end, working backwards
349+
recent_messages = []
350+
for msg in reversed(messages[1:]):
351+
msg_size = len(msg.get("content", "")) + len(msg.get("role", ""))
352+
if current_size + msg_size <= target_size:
353+
recent_messages.insert(0, msg) # Insert at beginning to maintain order
354+
current_size += msg_size
355+
else:
356+
break
357+
358+
# Combine system message with recent messages
359+
truncated.extend(recent_messages)
360+
361+
logger.info(f"🔄 Truncated messages from {len(messages)} to {len(truncated)} (size: {current_size} chars)")
362+
return truncated
349363

364+
async def gpt_call(self, messages: List[Dict[str, str]], last_successful_size: Optional[int] = None) -> str:
365+
"""Make a call to the LLM with automatic truncation on token limit errors."""
366+
# Validate messages to prevent empty content that causes Bedrock API errors
367+
validated_messages = []
368+
for msg in messages:
369+
content = msg.get("content", "").strip()
370+
if not content:
371+
logger.warning(f"⚠️ Skipping empty message with role: {msg.get('role', 'unknown')}")
372+
continue
373+
validated_messages.append({
374+
"role": msg["role"],
375+
"content": content
376+
})
377+
378+
if not validated_messages:
379+
raise ValueError("No valid messages to send to LLM")
380+
381+
# Try the call with original messages first
382+
try:
350383
# Prepare completion parameters
351384
completion_params = {
352385
"model": self.model,
@@ -359,8 +392,48 @@ async def gpt_call(self, messages: List[Dict[str, str]]) -> str:
359392
completion_params["model_id"] = self.model_id
360393

361394
response = await acompletion(**completion_params)
362-
return response["choices"][0]["message"]["content"]
395+
result = response["choices"][0]["message"]["content"]
396+
397+
current_size = self._calculate_messages_size(validated_messages)
398+
logger.debug(f"✅ Successful call with message size: {current_size} chars")
399+
400+
return result
401+
363402
except Exception as e:
403+
error_str = str(e)
404+
405+
# Check if this is a token limit error
406+
is_token_limit_error = (
407+
"Request body too large" in error_str or
408+
"too large for" in error_str or
409+
"Max size:" in error_str or
410+
"maximum context length" in error_str or
411+
"token limit" in error_str
412+
)
413+
414+
if is_token_limit_error and last_successful_size:
415+
logger.warning(f"⚠️ Token limit exceeded, attempting truncation based on last successful size: {last_successful_size}")
416+
417+
# Use 80% of last successful size as target to provide some buffer
418+
target_size = int(last_successful_size * 0.8)
419+
truncated_messages = self._truncate_messages(validated_messages, target_size)
420+
421+
if len(truncated_messages) < len(validated_messages):
422+
try:
423+
# Retry with truncated messages
424+
completion_params["messages"] = truncated_messages
425+
response = await acompletion(**completion_params)
426+
result = response["choices"][0]["message"]["content"]
427+
428+
logger.info(f"✅ Successful call after truncation")
429+
return result
430+
431+
except Exception as retry_e:
432+
logger.error(f"❌ LLM call failed even after truncation: {retry_e}")
433+
raise retry_e
434+
else:
435+
logger.warning("⚠️ No truncation possible, messages already at minimum size")
436+
364437
logger.error(f"❌ LLM call failed: {e}")
365438
raise
366439

@@ -577,12 +650,16 @@ async def run_agent_prompt(self, user_prompt: str, max_turns: int = 15) -> str:
577650
]
578651

579652
final_result = ""
653+
last_successful_size = None
580654

581655
for turn in range(max_turns):
582656
logger.info(f"🤖 Agent turn {turn + 1}/{max_turns}")
583657

584658
# Get LLM response
585-
content = await self.gpt_call(messages)
659+
content = await self.gpt_call(messages, last_successful_size)
660+
661+
# Update successful size for next iteration
662+
last_successful_size = self._calculate_messages_size(messages)
586663
logger.debug(f"LLM Response: {content}")
587664

588665
try:

0 commit comments

Comments
 (0)