@@ -329,24 +329,57 @@ async def read_mcp_resource(self, resource_uri: str, server_name: str = None) ->
329329 logger .error (f"❌ Failed to read MCP resource: { e } " )
330330 return {"error" : str (e ), "isError" : True }
331331
332- async def gpt_call (self , messages : List [Dict [str , str ]]) -> str :
333- """Make a call to the LLM."""
334- try :
335- # Validate messages to prevent empty content that causes Bedrock API errors
336- validated_messages = []
337- for msg in messages :
338- content = msg .get ("content" , "" ).strip ()
339- if not content :
340- logger .warning (f"⚠️ Skipping empty message with role: { msg .get ('role' , 'unknown' )} " )
341- continue
342- validated_messages .append ({
343- "role" : msg ["role" ],
344- "content" : content
345- })
332+ def _calculate_messages_size (self , messages : List [Dict [str , str ]]) -> int :
333+ """Calculate approximate size of messages in characters."""
334+ total_size = 0
335+ for msg in messages :
336+ total_size += len (msg .get ("content" , "" )) + len (msg .get ("role" , "" ))
337+ return total_size
338+
339+ def _truncate_messages (self , messages : List [Dict [str , str ]], target_size : int ) -> List [Dict [str , str ]]:
340+ """Truncate messages to fit within target size, keeping system message and recent messages."""
341+ if not messages :
342+ return messages
346343
347- if not validated_messages :
348- raise ValueError ("No valid messages to send to LLM" )
344+ # Always keep the system message (first message)
345+ truncated = [messages [0 ]] if messages [0 ].get ("role" ) == "system" else []
346+ current_size = self ._calculate_messages_size (truncated )
347+
348+ # Add messages from the end, working backwards
349+ recent_messages = []
350+ for msg in reversed (messages [1 :]):
351+ msg_size = len (msg .get ("content" , "" )) + len (msg .get ("role" , "" ))
352+ if current_size + msg_size <= target_size :
353+ recent_messages .insert (0 , msg ) # Insert at beginning to maintain order
354+ current_size += msg_size
355+ else :
356+ break
357+
358+ # Combine system message with recent messages
359+ truncated .extend (recent_messages )
360+
361+ logger .info (f"🔄 Truncated messages from { len (messages )} to { len (truncated )} (size: { current_size } chars)" )
362+ return truncated
349363
364+ async def gpt_call (self , messages : List [Dict [str , str ]], last_successful_size : Optional [int ] = None ) -> str :
365+ """Make a call to the LLM with automatic truncation on token limit errors."""
366+ # Validate messages to prevent empty content that causes Bedrock API errors
367+ validated_messages = []
368+ for msg in messages :
369+ content = msg .get ("content" , "" ).strip ()
370+ if not content :
371+ logger .warning (f"⚠️ Skipping empty message with role: { msg .get ('role' , 'unknown' )} " )
372+ continue
373+ validated_messages .append ({
374+ "role" : msg ["role" ],
375+ "content" : content
376+ })
377+
378+ if not validated_messages :
379+ raise ValueError ("No valid messages to send to LLM" )
380+
381+ # Try the call with original messages first
382+ try :
350383 # Prepare completion parameters
351384 completion_params = {
352385 "model" : self .model ,
@@ -359,8 +392,48 @@ async def gpt_call(self, messages: List[Dict[str, str]]) -> str:
359392 completion_params ["model_id" ] = self .model_id
360393
361394 response = await acompletion (** completion_params )
362- return response ["choices" ][0 ]["message" ]["content" ]
395+ result = response ["choices" ][0 ]["message" ]["content" ]
396+
397+ current_size = self ._calculate_messages_size (validated_messages )
398+ logger .debug (f"✅ Successful call with message size: { current_size } chars" )
399+
400+ return result
401+
363402 except Exception as e :
403+ error_str = str (e )
404+
405+ # Check if this is a token limit error
406+ is_token_limit_error = (
407+ "Request body too large" in error_str or
408+ "too large for" in error_str or
409+ "Max size:" in error_str or
410+ "maximum context length" in error_str or
411+ "token limit" in error_str
412+ )
413+
414+ if is_token_limit_error and last_successful_size :
415+ logger .warning (f"⚠️ Token limit exceeded, attempting truncation based on last successful size: { last_successful_size } " )
416+
417+ # Use 80% of last successful size as target to provide some buffer
418+ target_size = int (last_successful_size * 0.8 )
419+ truncated_messages = self ._truncate_messages (validated_messages , target_size )
420+
421+ if len (truncated_messages ) < len (validated_messages ):
422+ try :
423+ # Retry with truncated messages
424+ completion_params ["messages" ] = truncated_messages
425+ response = await acompletion (** completion_params )
426+ result = response ["choices" ][0 ]["message" ]["content" ]
427+
428+ logger .info (f"✅ Successful call after truncation" )
429+ return result
430+
431+ except Exception as retry_e :
432+ logger .error (f"❌ LLM call failed even after truncation: { retry_e } " )
433+ raise retry_e
434+ else :
435+ logger .warning ("⚠️ No truncation possible, messages already at minimum size" )
436+
364437 logger .error (f"❌ LLM call failed: { e } " )
365438 raise
366439
@@ -577,12 +650,16 @@ async def run_agent_prompt(self, user_prompt: str, max_turns: int = 15) -> str:
577650 ]
578651
579652 final_result = ""
653+ last_successful_size = None
580654
581655 for turn in range (max_turns ):
582656 logger .info (f"🤖 Agent turn { turn + 1 } /{ max_turns } " )
583657
584658 # Get LLM response
585- content = await self .gpt_call (messages )
659+ content = await self .gpt_call (messages , last_successful_size )
660+
661+ # Update successful size for next iteration
662+ last_successful_size = self ._calculate_messages_size (messages )
586663 logger .debug (f"LLM Response: { content } " )
587664
588665 try :
0 commit comments