diff --git a/docs/builtin-tools.md b/docs/builtin-tools.md index 85acb7dd37..02e5a5ccc0 100644 --- a/docs/builtin-tools.md +++ b/docs/builtin-tools.md @@ -353,6 +353,23 @@ assert isinstance(result.output, BinaryImage) _(This example is complete, it can be run "as is")_ +To control the image resolution with Google image generation models (Gemini 3 Pro Image and later), use the `size` parameter: + +```py {title="image_generation_google_resolution.py"} +from pydantic_ai import Agent, BinaryImage, ImageGenerationTool + +agent = Agent( + 'google-gla:gemini-3-pro-image-preview', + builtin_tools=[ImageGenerationTool(aspect_ratio='16:9', size='4K')], + output_type=BinaryImage, +) + +result = agent.run_sync('Generate a high-resolution wide landscape illustration of an axolotl.') +assert isinstance(result.output, BinaryImage) +``` + +_(This example is complete, it can be run "as is")_ + For more details, check the [API documentation][pydantic_ai.builtin_tools.ImageGenerationTool]. #### Provider Support @@ -366,8 +383,8 @@ For more details, check the [API documentation][pydantic_ai.builtin_tools.ImageG | `output_format` | ✅ | ❌ | | `partial_images` | ✅ | ❌ | | `quality` | ✅ | ❌ | -| `size` | ✅ | ❌ | -| `aspect_ratio` | ✅ (1:1, 2:3, 3:2) | ✅ | +| `size` | ✅ (1024x1024, 1024x1536, 1536x1024, auto) | ✅ (1K, 2K, 4K) | +| `aspect_ratio` | ✅ (1:1, 2:3, 3:2) | ✅ (1:1, 2:3, 3:2, 3:4, 4:3, 4:5, 5:4, 9:16, 16:9, 21:9) | ## Web Fetch Tool diff --git a/pydantic_ai_slim/pydantic_ai/builtin_tools.py b/pydantic_ai_slim/pydantic_ai/builtin_tools.py index 912538b715..7fef44eaeb 100644 --- a/pydantic_ai_slim/pydantic_ai/builtin_tools.py +++ b/pydantic_ai_slim/pydantic_ai/builtin_tools.py @@ -311,12 +311,13 @@ class ImageGenerationTool(AbstractBuiltinTool): * OpenAI Responses """ - size: Literal['1024x1024', '1024x1536', '1536x1024', 'auto'] = 'auto' + size: Literal['1024x1024', '1024x1536', '1536x1024', '1K', '2K', '4K', 'auto'] = 'auto' """The size of the generated image. Supported by: - * OpenAI Responses + * OpenAI Responses: '1024x1024', '1024x1536', '1536x1024', 'auto' + * Google (Gemini 3 Pro Image and later): '1K', '2K', '4K' """ aspect_ratio: ImageAspectRatio | None = None diff --git a/pydantic_ai_slim/pydantic_ai/models/google.py b/pydantic_ai_slim/pydantic_ai/models/google.py index c6f5459f08..2cde4c5fb5 100644 --- a/pydantic_ai_slim/pydantic_ai/models/google.py +++ b/pydantic_ai_slim/pydantic_ai/models/google.py @@ -362,8 +362,12 @@ def _get_tools( raise UserError( "`ImageGenerationTool` is not supported by this model. Use a model with 'image' in the name instead." ) - if tool.aspect_ratio: - image_config = ImageConfigDict(aspect_ratio=tool.aspect_ratio) + + image_config = ImageConfigDict() + if tool.aspect_ratio is not None: + image_config['aspect_ratio'] = tool.aspect_ratio + if tool.size in ('1K', '2K', '4K'): + image_config['image_size'] = tool.size else: # pragma: no cover raise UserError( f'`{tool.__class__.__name__}` is not supported by `GoogleModel`. If it should be, please file an issue.' diff --git a/pydantic_ai_slim/pydantic_ai/models/openai.py b/pydantic_ai_slim/pydantic_ai/models/openai.py index efe9629c3a..67935592d8 100644 --- a/pydantic_ai_slim/pydantic_ai/models/openai.py +++ b/pydantic_ai_slim/pydantic_ai/models/openai.py @@ -158,7 +158,11 @@ def _resolve_openai_image_generation_size( """Map `ImageGenerationTool.aspect_ratio` to an OpenAI size string when provided.""" aspect_ratio = tool.aspect_ratio if aspect_ratio is None: - return tool.size + # Only return size if it's an OpenAI-supported value + if tool.size in ('auto', '1024x1024', '1024x1536', '1536x1024'): + return tool.size + # Default to auto if an unsupported size is provided + return 'auto' mapped_size = _OPENAI_ASPECT_RATIO_TO_SIZE.get(aspect_ratio) if mapped_size is None: diff --git a/tests/models/test_google.py b/tests/models/test_google.py index be6d4bd68a..5833b8ee2b 100644 --- a/tests/models/test_google.py +++ b/tests/models/test_google.py @@ -3652,6 +3652,26 @@ async def test_google_image_generation_tool_aspect_ratio(google_provider: Google assert image_config == {'aspect_ratio': '16:9'} +async def test_google_image_generation_resolution(google_provider: GoogleProvider) -> None: + """Test that resolution parameter from ImageGenerationTool is added to image_config.""" + model = GoogleModel('gemini-3-pro-image-preview', provider=google_provider) + params = ModelRequestParameters(builtin_tools=[ImageGenerationTool(size='2K')]) + + tools, image_config = model._get_tools(params) # pyright: ignore[reportPrivateUsage] + assert tools is None + assert image_config == {'image_size': '2K'} + + +async def test_google_image_generation_resolution_with_aspect_ratio(google_provider: GoogleProvider) -> None: + """Test that resolution and aspect_ratio from ImageGenerationTool work together.""" + model = GoogleModel('gemini-3-pro-image-preview', provider=google_provider) + params = ModelRequestParameters(builtin_tools=[ImageGenerationTool(aspect_ratio='16:9', size='4K')]) + + tools, image_config = model._get_tools(params) # pyright: ignore[reportPrivateUsage] + assert tools is None + assert image_config == {'aspect_ratio': '16:9', 'image_size': '4K'} + + async def test_google_vertexai_image_generation(allow_model_requests: None, vertex_provider: GoogleProvider): model = GoogleModel('gemini-2.5-flash-image', provider=vertex_provider) diff --git a/tests/models/test_openai_responses.py b/tests/models/test_openai_responses.py index b03e99bb91..092e2be49f 100644 --- a/tests/models/test_openai_responses.py +++ b/tests/models/test_openai_responses.py @@ -162,6 +162,11 @@ def test_openai_responses_image_generation_tool_aspect_ratio_conflicts_with_size _resolve_openai_image_generation_size(tool) +def test_openai_responses_image_generation_tool_unsupported_size_falls_back_to_auto() -> None: + tool = ImageGenerationTool(size='2K') + assert _resolve_openai_image_generation_size(tool) == 'auto' + + async def test_openai_responses_model_simple_response_with_tool_call(allow_model_requests: None, openai_api_key: str): model = OpenAIResponsesModel('gpt-4o', provider=OpenAIProvider(api_key=openai_api_key)) diff --git a/tests/test_examples.py b/tests/test_examples.py index 8ed0828250..bc887504d2 100644 --- a/tests/test_examples.py +++ b/tests/test_examples.py @@ -698,6 +698,12 @@ async def model_logic( # noqa: C901 FilePart(content=BinaryImage(data=b'fake', media_type='image/png', identifier='wide-axolotl-city')), ] ) + elif m.content == 'Generate a high-resolution wide landscape illustration of an axolotl.': + return ModelResponse( + parts=[ + FilePart(content=BinaryImage(data=b'fake', media_type='image/png', identifier='high-res-axolotl')), + ] + ) elif m.content == 'Generate a chart of y=x^2 for x=-5 to 5.': return ModelResponse( parts=[