Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
21 changes: 19 additions & 2 deletions docs/builtin-tools.md
Original file line number Diff line number Diff line change
Expand Up @@ -353,6 +353,23 @@ assert isinstance(result.output, BinaryImage)

_(This example is complete, it can be run "as is")_

To control the image resolution with Google image generation models (Gemini 3 Pro Image and later), use the `size` parameter:

```py {title="image_generation_google_resolution.py"}
from pydantic_ai import Agent, BinaryImage, ImageGenerationTool

agent = Agent(
'google-gla:gemini-3-pro-image-preview',
builtin_tools=[ImageGenerationTool(aspect_ratio='16:9', size='4K')],
output_type=BinaryImage,
)

result = agent.run_sync('Generate a high-resolution wide landscape illustration of an axolotl.')
assert isinstance(result.output, BinaryImage)
```

_(This example is complete, it can be run "as is")_

For more details, check the [API documentation][pydantic_ai.builtin_tools.ImageGenerationTool].

#### Provider Support
Expand All @@ -366,8 +383,8 @@ For more details, check the [API documentation][pydantic_ai.builtin_tools.ImageG
| `output_format` | ✅ | ❌ |
| `partial_images` | ✅ | ❌ |
| `quality` | ✅ | ❌ |
| `size` | ✅ | ❌ |
| `aspect_ratio` | ✅ (1:1, 2:3, 3:2) | ✅ |
| `size` | ✅ (1024x1024, 1024x1536, 1536x1024, auto) | ✅ (1K, 2K, 4K) |
| `aspect_ratio` | ✅ (1:1, 2:3, 3:2) | ✅ (1:1, 2:3, 3:2, 3:4, 4:3, 4:5, 5:4, 9:16, 16:9, 21:9) |

## Web Fetch Tool

Expand Down
5 changes: 3 additions & 2 deletions pydantic_ai_slim/pydantic_ai/builtin_tools.py
Original file line number Diff line number Diff line change
Expand Up @@ -311,12 +311,13 @@ class ImageGenerationTool(AbstractBuiltinTool):
* OpenAI Responses
"""

size: Literal['1024x1024', '1024x1536', '1536x1024', 'auto'] = 'auto'
size: Literal['1024x1024', '1024x1536', '1536x1024', '1K', '2K', '4K', 'auto'] = 'auto'
"""The size of the generated image.

Supported by:

* OpenAI Responses
* OpenAI Responses: '1024x1024', '1024x1536', '1536x1024', 'auto'
* Google (Gemini 3 Pro Image and later): '1K', '2K', '4K'
"""

aspect_ratio: ImageAspectRatio | None = None
Expand Down
8 changes: 6 additions & 2 deletions pydantic_ai_slim/pydantic_ai/models/google.py
Original file line number Diff line number Diff line change
Expand Up @@ -362,8 +362,12 @@ def _get_tools(
raise UserError(
"`ImageGenerationTool` is not supported by this model. Use a model with 'image' in the name instead."
)
if tool.aspect_ratio:
image_config = ImageConfigDict(aspect_ratio=tool.aspect_ratio)

image_config = ImageConfigDict()
if tool.aspect_ratio is not None:
image_config['aspect_ratio'] = tool.aspect_ratio
if tool.size in ('1K', '2K', '4K'):
image_config['image_size'] = tool.size
else: # pragma: no cover
raise UserError(
f'`{tool.__class__.__name__}` is not supported by `GoogleModel`. If it should be, please file an issue.'
Expand Down
8 changes: 8 additions & 0 deletions pydantic_ai_slim/pydantic_ai/models/openai.py
Original file line number Diff line number Diff line change
Expand Up @@ -151,13 +151,21 @@
'3:2': '1536x1024',
}

_OPENAI_SUPPORTED_SIZES = ('auto', '1024x1024', '1024x1536', '1536x1024')


def _resolve_openai_image_generation_size(
tool: ImageGenerationTool,
) -> Literal['auto', '1024x1024', '1024x1536', '1536x1024']:
"""Map `ImageGenerationTool.aspect_ratio` to an OpenAI size string when provided."""
aspect_ratio = tool.aspect_ratio
if aspect_ratio is None:
if tool.size not in _OPENAI_SUPPORTED_SIZES:
supported = ', '.join(f"'{s}'" for s in _OPENAI_SUPPORTED_SIZES)
raise UserError(
f'OpenAI image generation only supports `size` values: {supported}. '
f'Got {tool.size!r}, specify one of the supported values.'
)
return tool.size

mapped_size = _OPENAI_ASPECT_RATIO_TO_SIZE.get(aspect_ratio)
Expand Down
20 changes: 20 additions & 0 deletions tests/models/test_google.py
Original file line number Diff line number Diff line change
Expand Up @@ -3652,6 +3652,26 @@ async def test_google_image_generation_tool_aspect_ratio(google_provider: Google
assert image_config == {'aspect_ratio': '16:9'}


async def test_google_image_generation_resolution(google_provider: GoogleProvider) -> None:
"""Test that resolution parameter from ImageGenerationTool is added to image_config."""
model = GoogleModel('gemini-3-pro-image-preview', provider=google_provider)
params = ModelRequestParameters(builtin_tools=[ImageGenerationTool(size='2K')])

tools, image_config = model._get_tools(params) # pyright: ignore[reportPrivateUsage]
assert tools is None
assert image_config == {'image_size': '2K'}


async def test_google_image_generation_resolution_with_aspect_ratio(google_provider: GoogleProvider) -> None:
"""Test that resolution and aspect_ratio from ImageGenerationTool work together."""
model = GoogleModel('gemini-3-pro-image-preview', provider=google_provider)
params = ModelRequestParameters(builtin_tools=[ImageGenerationTool(aspect_ratio='16:9', size='4K')])

tools, image_config = model._get_tools(params) # pyright: ignore[reportPrivateUsage]
assert tools is None
assert image_config == {'aspect_ratio': '16:9', 'image_size': '4K'}


async def test_google_vertexai_image_generation(allow_model_requests: None, vertex_provider: GoogleProvider):
model = GoogleModel('gemini-2.5-flash-image', provider=vertex_provider)

Expand Down
7 changes: 7 additions & 0 deletions tests/models/test_openai_responses.py
Original file line number Diff line number Diff line change
Expand Up @@ -162,6 +162,13 @@ def test_openai_responses_image_generation_tool_aspect_ratio_conflicts_with_size
_resolve_openai_image_generation_size(tool)


def test_openai_responses_image_generation_tool_invalid_size() -> None:
tool = ImageGenerationTool(size='2K')

with pytest.raises(UserError, match='OpenAI image generation only supports `size` values'):
_resolve_openai_image_generation_size(tool)


async def test_openai_responses_model_simple_response_with_tool_call(allow_model_requests: None, openai_api_key: str):
model = OpenAIResponsesModel('gpt-4o', provider=OpenAIProvider(api_key=openai_api_key))

Expand Down
6 changes: 6 additions & 0 deletions tests/test_examples.py
Original file line number Diff line number Diff line change
Expand Up @@ -698,6 +698,12 @@ async def model_logic( # noqa: C901
FilePart(content=BinaryImage(data=b'fake', media_type='image/png', identifier='wide-axolotl-city')),
]
)
elif m.content == 'Generate a high-resolution wide landscape illustration of an axolotl.':
return ModelResponse(
parts=[
FilePart(content=BinaryImage(data=b'fake', media_type='image/png', identifier='high-res-axolotl')),
]
)
elif m.content == 'Generate a chart of y=x^2 for x=-5 to 5.':
return ModelResponse(
parts=[
Expand Down