-
Notifications
You must be signed in to change notification settings - Fork 1.5k
Add force_download support for Anthropic and OAIR models and clarify proper BinaryContent base64 handling
#3694
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
base: main
Are you sure you want to change the base?
Changes from all commits
c642ac7
754c0f0
2f5bcf6
5d99ed2
ff5d79a
887a79f
6bba553
7389467
8ad810d
da9ec78
9a55ce2
703b772
35d8745
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -64,7 +64,6 @@ | |
| omit as OMIT, | ||
| ) | ||
| from anthropic.types.beta import ( | ||
| BetaBase64PDFBlockParam, | ||
| BetaBase64PDFSourceParam, | ||
| BetaCacheControlEphemeralParam, | ||
| BetaCitationsConfigParam, | ||
|
|
@@ -98,6 +97,7 @@ | |
| BetaRawMessageStreamEvent, | ||
| BetaRedactedThinkingBlock, | ||
| BetaRedactedThinkingBlockParam, | ||
| BetaRequestDocumentBlockParam, | ||
| BetaRequestMCPServerToolConfigurationParam, | ||
| BetaRequestMCPServerURLDefinitionParam, | ||
| BetaServerToolUseBlock, | ||
|
|
@@ -1034,6 +1034,31 @@ def _add_cache_control_to_last_param( | |
| # Add cache_control to the last param | ||
| last_param['cache_control'] = self._build_cache_control(ttl) | ||
|
|
||
| @staticmethod | ||
| def _map_binary_data(data: bytes, media_type: str) -> BetaContentBlockParam: | ||
| # Anthropic SDK accepts file-like objects (IO[bytes]) and handles base64 encoding internally | ||
| if media_type.startswith('image/'): | ||
| return BetaImageBlockParam( | ||
| source={'data': io.BytesIO(data), 'media_type': media_type, 'type': 'base64'}, # type: ignore | ||
| type='image', | ||
| ) | ||
| elif media_type == 'application/pdf': | ||
| return BetaRequestDocumentBlockParam( | ||
| source=BetaBase64PDFSourceParam( | ||
|
Collaborator
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I'm looking at what other sources this supports, and there's also We're adding support for uploaded files in #2611, but that PR has been stale for a bit so may be interesting for you to pick up. |
||
| data=io.BytesIO(data), | ||
| media_type='application/pdf', | ||
| type='base64', | ||
| ), | ||
| type='document', | ||
| ) | ||
| elif media_type == 'text/plain': | ||
| return BetaRequestDocumentBlockParam( | ||
| source=BetaPlainTextSourceParam(data=data.decode('utf-8'), media_type=media_type, type='text'), | ||
| type='document', | ||
| ) | ||
| else: | ||
| raise RuntimeError(f'Unsupported binary content media type for Anthropic: {media_type}') | ||
|
|
||
| @staticmethod | ||
| async def _map_user_prompt( | ||
| part: UserPromptPart, | ||
|
|
@@ -1049,30 +1074,25 @@ async def _map_user_prompt( | |
| elif isinstance(item, CachePoint): | ||
| yield item | ||
| elif isinstance(item, BinaryContent): | ||
| if item.is_image: | ||
| yield BetaImageBlockParam( | ||
| source={'data': io.BytesIO(item.data), 'media_type': item.media_type, 'type': 'base64'}, # type: ignore | ||
| type='image', | ||
| ) | ||
| elif item.media_type == 'application/pdf': | ||
| yield BetaBase64PDFBlockParam( | ||
| source=BetaBase64PDFSourceParam( | ||
| data=io.BytesIO(item.data), | ||
| media_type='application/pdf', | ||
| type='base64', | ||
| ), | ||
| type='document', | ||
| ) | ||
| else: | ||
| raise RuntimeError('Only images and PDFs are supported for binary content') | ||
| yield AnthropicModel._map_binary_data(item.data, item.media_type) | ||
| elif isinstance(item, ImageUrl): | ||
| yield BetaImageBlockParam(source={'type': 'url', 'url': item.url}, type='image') | ||
| if item.force_download: | ||
| downloaded = await download_item(item, data_format='bytes') | ||
|
Collaborator
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. We should also respect
Collaborator
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Would it make sense to use |
||
| yield AnthropicModel._map_binary_data(downloaded['data'], item.media_type) | ||
| else: | ||
| yield BetaImageBlockParam(source={'type': 'url', 'url': item.url}, type='image') | ||
| elif isinstance(item, DocumentUrl): | ||
| if item.media_type == 'application/pdf': | ||
| yield BetaBase64PDFBlockParam(source={'url': item.url, 'type': 'url'}, type='document') | ||
| if item.force_download: | ||
| downloaded = await download_item(item, data_format='bytes') | ||
| yield AnthropicModel._map_binary_data(downloaded['data'], item.media_type) | ||
| else: | ||
| yield BetaRequestDocumentBlockParam( | ||
| source={'url': item.url, 'type': 'url'}, type='document' | ||
| ) | ||
| elif item.media_type == 'text/plain': | ||
| downloaded_item = await download_item(item, data_format='text') | ||
| yield BetaBase64PDFBlockParam( | ||
| yield BetaRequestDocumentBlockParam( | ||
| source=BetaPlainTextSourceParam( | ||
| data=downloaded_item['data'], media_type=item.media_type, type='text' | ||
| ), | ||
|
|
||
Uh oh!
There was an error while loading. Please reload this page.