|
23 | 23 | from openjudge.models.base_chat_model import BaseChatModel |
24 | 24 | from openjudge.models.schema.oai.message import ChatMessage |
25 | 25 | from openjudge.models.schema.prompt_template import LanguageEnum, PromptTemplate |
| 26 | +from openjudge.utils.utils import parse_structured_chat_response |
26 | 27 |
|
27 | 28 | # pylint: disable=line-too-long |
28 | 29 |
|
@@ -222,30 +223,27 @@ async def _aevaluate_single_image( |
222 | 223 | context_below=context_below or "", |
223 | 224 | ) |
224 | 225 |
|
225 | | - try: |
226 | | - # Format image content for OpenAI API |
227 | | - content = [{"type": "text", "text": prompt}] |
228 | | - |
229 | | - if image.url: |
230 | | - content.append({"type": "image_url", "image_url": {"url": image.url}}) |
231 | | - elif image.base64: |
232 | | - # Format base64 image with data URL scheme |
233 | | - image_format = image.format or "jpeg" |
234 | | - data_url = f"data:image/{image_format};base64,{image.base64}" |
235 | | - content.append({"type": "image_url", "image_url": {"url": data_url}}) |
236 | | - |
237 | | - # Call model without structured output |
238 | | - chat_response = await self.model.achat( |
239 | | - messages=[{"role": "user", "content": content}], |
240 | | - structured_model=GraderScoreCallback, |
241 | | - ) |
242 | | - score = chat_response.parsed["score"] |
243 | | - reason = chat_response.parsed["reason"] |
244 | | - return score, reason |
| 226 | + # Format image content for OpenAI API |
| 227 | + content = [{"type": "text", "text": prompt}] |
245 | 228 |
|
246 | | - except Exception as e: |
247 | | - logger.error(f"Error evaluating image coherence: {e}") |
248 | | - return 0.0, f"Evaluation error: {str(e)}" |
| 229 | + if image.url: |
| 230 | + content.append({"type": "image_url", "image_url": {"url": image.url}}) |
| 231 | + elif image.base64: |
| 232 | + # Format base64 image with data URL scheme |
| 233 | + image_format = image.format or "jpeg" |
| 234 | + data_url = f"data:image/{image_format};base64,{image.base64}" |
| 235 | + content.append({"type": "image_url", "image_url": {"url": data_url}}) |
| 236 | + |
| 237 | + chat_response = await self.model.achat( |
| 238 | + messages=[{"role": "user", "content": content}], |
| 239 | + structured_model=GraderScoreCallback, |
| 240 | + ) |
| 241 | + |
| 242 | + # Default to 5.0 (neutral score on 0-10 scale) for missing fields |
| 243 | + parsed = await parse_structured_chat_response(chat_response) |
| 244 | + score = parsed.get("score", 5.0) |
| 245 | + reason = parsed.get("reason", "") |
| 246 | + return score, reason |
249 | 247 |
|
250 | 248 | async def _acompute( |
251 | 249 | self, |
@@ -331,7 +329,16 @@ async def aevaluate( |
331 | 329 | ... ] |
332 | 330 | ... ) |
333 | 331 | """ |
334 | | - score, details = await self._acompute(response, **kwargs) |
| 332 | + try: |
| 333 | + score, details = await self._acompute(response, **kwargs) |
| 334 | + except Exception as e: |
| 335 | + logger.exception(f"Error evaluating image coherence: {e}") |
| 336 | + from openjudge.graders.base_grader import GraderError |
| 337 | + |
| 338 | + return GraderError( |
| 339 | + name=self.name, |
| 340 | + error=f"Evaluation error: {str(e)}", |
| 341 | + ) |
335 | 342 |
|
336 | 343 | if "error" in details: |
337 | 344 | return GraderScore( |
|
0 commit comments