Skip to content
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
5 changes: 5 additions & 0 deletions pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -55,6 +55,10 @@ dependencies = [
"opentelemetry-exporter-otlp-proto-http>=1.36.0",
"opentelemetry-resourcedetector-gcp>=1.9.0a0, <2.0.0",
"opentelemetry-sdk>=1.36.0, <1.40.0",
"openpyxl>=3.1.0", # For spreadsheet parsing
"pandas>=2.2.3", # For spreadsheet parsing
"odfpy>=1.4.1", # For spreadsheet parsing
"xlrd>=2.0.1", # For spreadsheet parsing
"pyarrow>=14.0.0",
"pydantic>=2.0, <3.0.0", # For data validation/models
"python-dateutil>=2.9.0.post0, <3.0.0", # For Vertext AI Session Service
Expand All @@ -63,6 +67,7 @@ dependencies = [
"sqlalchemy-spanner>=1.14.0", # Spanner database session service
"sqlalchemy>=2.0, <3.0.0", # SQL database ORM
"starlette>=0.49.1, <1.0.0", # For FastAPI CLI
"tabulate>=0.9.0", # For spreadsheet parsing
"tenacity>=9.0.0, <10.0.0", # For Retry management
"typing-extensions>=4.5, <5",
"tzlocal>=5.3, <6.0", # Time zone utilities
Expand Down
41 changes: 41 additions & 0 deletions src/google/adk/tools/load_artifacts_tool.py
Original file line number Diff line number Diff line change
Expand Up @@ -28,6 +28,9 @@
from ..features import is_feature_enabled
from .base_tool import BaseTool

import io
import pandas as pd

# MIME types Gemini accepts for inline data in requests.
_GEMINI_SUPPORTED_INLINE_MIME_PREFIXES = (
'image/',
Expand All @@ -40,6 +43,11 @@
'application/json',
'application/xml',
})
_SPREADSHEET_MIME_TYPES = frozenset({
'application/vnd.openxmlformats-officedocument.spreadsheetml.sheet', # .xlsx
'application/vnd.ms-excel', # .xls
'application/vnd.oasis.opendocument.spreadsheet', # .ods
})

if TYPE_CHECKING:
from ..models.llm_request import LlmRequest
Expand Down Expand Up @@ -76,6 +84,35 @@ def _maybe_base64_to_bytes(data: str) -> bytes | None:
return None


def _parse_spreadsheet(data: bytes) -> str:
"""Parses a spreadsheet into a markdown representation."""
try:
# Load the spreadsheet
xl = pd.ExcelFile(io.BytesIO(data))

output = []

# Process each sheet
for sheet_name in xl.sheet_names:
df = xl.parse(sheet_name)
if df.empty:
continue

# Convert to markdown table
markdown_table = df.to_markdown(index=False, numalign="left", stralign="left")

output.append(f"### Sheet: {sheet_name}\n\n{markdown_table}")

if not output:
return "[Empty Spreadsheet]"

return "\n\n".join(output)

except (ValueError, ImportError) as e:
logger.warning(f"Failed to parse spreadsheet: {e}")
return f"[Error parsing spreadsheet: {e}]"


def _as_safe_part_for_llm(
artifact: types.Part, artifact_name: str
) -> types.Part:
Expand Down Expand Up @@ -111,6 +148,10 @@ def _as_safe_part_for_llm(
except UnicodeDecodeError:
return types.Part.from_text(text=data.decode('utf-8', errors='replace'))

if mime_type in _SPREADSHEET_MIME_TYPES:
text_content = _parse_spreadsheet(data)
return types.Part.from_text(text=text_content)

size_kb = len(data) / 1024
return types.Part.from_text(
text=(
Expand Down
64 changes: 64 additions & 0 deletions tests/unittests/tools/test_load_artifacts_tool.py
Original file line number Diff line number Diff line change
Expand Up @@ -180,3 +180,67 @@ def test_get_declaration_with_json_schema_feature_enabled():
},
},
}


@mark.asyncio
async def test_load_artifacts_parses_spreadsheet():
"""Spreadsheet artifacts are parsed into markdown."""
artifact_name = 'test.xlsx'

import pandas as pd
import io

df = pd.DataFrame({'col1': [1, 2], 'col2': ['a', 'b']})
output = io.BytesIO()
# Use openpyxl as engine since it is in deps
with pd.ExcelWriter(output, engine='openpyxl') as writer:
df.to_excel(writer, sheet_name='Sheet1', index=False)
xlsx_bytes = output.getvalue()

artifact = types.Part(
inline_data=types.Blob(
data=xlsx_bytes,
mime_type='application/vnd.openxmlformats-officedocument.spreadsheetml.sheet'
)
)

tool_context = _StubToolContext({artifact_name: artifact})
llm_request = LlmRequest(
contents=[
types.Content(
role='user',
parts=[
types.Part(
function_response=types.FunctionResponse(
name='load_artifacts',
response={'artifact_names': [artifact_name]},
)
)
],
)
]
)

await load_artifacts_tool.process_llm_request(
tool_context=tool_context, llm_request=llm_request
)

artifact_part = llm_request.contents[-1].parts[1]
assert artifact_part.inline_data is None

# Check for Markdown table content
# We expect something like:
# ### Sheet: Sheet1
#
# | col1 | col2 |
# | :--- | :--- |
# | 1 | a |
# | 2 | b |

markdown_output = artifact_part.text

assert "Sheet1" in markdown_output
assert "| col1" in markdown_output
assert "| col2" in markdown_output
assert "| 1" in markdown_output and "| a" in markdown_output
assert "| 2" in markdown_output and "| b" in markdown_output
Loading