From c31b1819e06cc89433b6d50689b572456a7c4c22 Mon Sep 17 00:00:00 2001 From: Issei Nakamura Date: Tue, 10 Feb 2026 11:32:05 +0900 Subject: [PATCH 1/3] feat: add spreadsheet mime types to load_artifact_tool --- pyproject.toml | 5 ++ src/google/adk/tools/load_artifacts_tool.py | 41 ++++++++++++ .../tools/test_load_artifacts_tool.py | 62 +++++++++++++++++++ 3 files changed, 108 insertions(+) diff --git a/pyproject.toml b/pyproject.toml index da05cfcee9..a97ddf160f 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -55,6 +55,10 @@ dependencies = [ "opentelemetry-exporter-otlp-proto-http>=1.36.0", "opentelemetry-resourcedetector-gcp>=1.9.0a0, <2.0.0", "opentelemetry-sdk>=1.36.0, <1.40.0", + "openpyxl>=3.1.0", # For spreadsheet parsing + "pandas>=2.2.3", # For spreadsheet parsing + "odfpy>=1.4.1", # For spreadsheet parsing + "xlrd>=2.0.1", # For spreadsheet parsing "pyarrow>=14.0.0", "pydantic>=2.0, <3.0.0", # For data validation/models "python-dateutil>=2.9.0.post0, <3.0.0", # For Vertext AI Session Service @@ -63,6 +67,7 @@ dependencies = [ "sqlalchemy-spanner>=1.14.0", # Spanner database session service "sqlalchemy>=2.0, <3.0.0", # SQL database ORM "starlette>=0.49.1, <1.0.0", # For FastAPI CLI + "tabulate>=0.9.0", # For spreadsheet parsing "tenacity>=9.0.0, <10.0.0", # For Retry management "typing-extensions>=4.5, <5", "tzlocal>=5.3, <6.0", # Time zone utilities diff --git a/src/google/adk/tools/load_artifacts_tool.py b/src/google/adk/tools/load_artifacts_tool.py index ec717bad4c..3b2df63582 100644 --- a/src/google/adk/tools/load_artifacts_tool.py +++ b/src/google/adk/tools/load_artifacts_tool.py @@ -28,6 +28,9 @@ from ..features import is_feature_enabled from .base_tool import BaseTool +import io +import pandas as pd + # MIME types Gemini accepts for inline data in requests. _GEMINI_SUPPORTED_INLINE_MIME_PREFIXES = ( 'image/', @@ -40,6 +43,11 @@ 'application/json', 'application/xml', }) +_SPREADSHEET_MIME_TYPES = frozenset({ + 'application/vnd.openxmlformats-officedocument.spreadsheetml.sheet', # .xlsx + 'application/vnd.ms-excel', # .xls + 'application/vnd.oasis.opendocument.spreadsheet', # .ods +}) if TYPE_CHECKING: from ..models.llm_request import LlmRequest @@ -76,6 +84,35 @@ def _maybe_base64_to_bytes(data: str) -> bytes | None: return None +def _parse_spreadsheet(data: bytes, mime_type: str) -> str: + """Parses a spreadsheet into a markdown representation.""" + try: + # Load the spreadsheet + xl = pd.ExcelFile(io.BytesIO(data)) + + output = [] + + # Process each sheet + for sheet_name in xl.sheet_names: + df = xl.parse(sheet_name) + if df.empty: + continue + + # Convert to markdown table + markdown_table = df.to_markdown(index=False, numalign="left", stralign="left") + + output.append(f"### Sheet: {sheet_name}\n\n{markdown_table}") + + if not output: + return "[Empty Spreadsheet]" + + return "\n\n".join(output) + + except Exception as e: + logger.warning(f"Failed to parse spreadsheet: {e}") + return f"[Error parsing spreadsheet: {e}]" + + def _as_safe_part_for_llm( artifact: types.Part, artifact_name: str ) -> types.Part: @@ -111,6 +148,10 @@ def _as_safe_part_for_llm( except UnicodeDecodeError: return types.Part.from_text(text=data.decode('utf-8', errors='replace')) + if mime_type in _SPREADSHEET_MIME_TYPES: + text_content = _parse_spreadsheet(data, mime_type) + return types.Part.from_text(text=text_content) + size_kb = len(data) / 1024 return types.Part.from_text( text=( diff --git a/tests/unittests/tools/test_load_artifacts_tool.py b/tests/unittests/tools/test_load_artifacts_tool.py index 6a420574f0..496214ee19 100644 --- a/tests/unittests/tools/test_load_artifacts_tool.py +++ b/tests/unittests/tools/test_load_artifacts_tool.py @@ -180,3 +180,65 @@ def test_get_declaration_with_json_schema_feature_enabled(): }, }, } + + +@mark.asyncio +async def test_load_artifacts_parses_spreadsheet(): + """Spreadsheet artifacts are parsed into markdown.""" + artifact_name = 'test.xlsx' + + import pandas as pd + import io + + df = pd.DataFrame({'col1': [1, 2], 'col2': ['a', 'b']}) + output = io.BytesIO() + # Use openpyxl as engine since it is in deps + with pd.ExcelWriter(output, engine='openpyxl') as writer: + df.to_excel(writer, sheet_name='Sheet1', index=False) + xlsx_bytes = output.getvalue() + + artifact = types.Part( + inline_data=types.Blob( + data=xlsx_bytes, + mime_type='application/vnd.openxmlformats-officedocument.spreadsheetml.sheet' + ) + ) + + tool_context = _StubToolContext({artifact_name: artifact}) + llm_request = LlmRequest( + contents=[ + types.Content( + role='user', + parts=[ + types.Part( + function_response=types.FunctionResponse( + name='load_artifacts', + response={'artifact_names': [artifact_name]}, + ) + ) + ], + ) + ] + ) + + await load_artifacts_tool.process_llm_request( + tool_context=tool_context, llm_request=llm_request + ) + + artifact_part = llm_request.contents[-1].parts[1] + assert artifact_part.inline_data is None + + # Check for Markdown table content + # We expect something like: + # ### Sheet: Sheet1 + # + # | col1 | col2 | + # | :--- | :--- | + # | 1 | a | + # | 2 | b | + + assert "Sheet1" in artifact_part.text + assert "col1" in artifact_part.text + assert "col2" in artifact_part.text + assert "1" in artifact_part.text + assert "a" in artifact_part.text From e48d271b4b450dafc58051c90fa1cf2a20400718 Mon Sep 17 00:00:00 2001 From: issein <61493341+n-issei-777@users.noreply.github.com> Date: Wed, 11 Feb 2026 12:04:22 +0900 Subject: [PATCH 2/3] Update src/google/adk/tools/load_artifacts_tool.py Delete mime_type parameter within the _parse_spreadsheet function. Co-authored-by: gemini-code-assist[bot] <176961590+gemini-code-assist[bot]@users.noreply.github.com> --- src/google/adk/tools/load_artifacts_tool.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/google/adk/tools/load_artifacts_tool.py b/src/google/adk/tools/load_artifacts_tool.py index 3b2df63582..ea5cb38c8e 100644 --- a/src/google/adk/tools/load_artifacts_tool.py +++ b/src/google/adk/tools/load_artifacts_tool.py @@ -84,7 +84,7 @@ def _maybe_base64_to_bytes(data: str) -> bytes | None: return None -def _parse_spreadsheet(data: bytes, mime_type: str) -> str: +def _parse_spreadsheet(data: bytes) -> str: """Parses a spreadsheet into a markdown representation.""" try: # Load the spreadsheet From 69df525b809b69d73903e578d43c7b9eaafbe718 Mon Sep 17 00:00:00 2001 From: Issei Nakamura Date: Wed, 11 Feb 2026 13:00:10 +0900 Subject: [PATCH 3/3] fix: catch specific exceptions and improve test assertions --- src/google/adk/tools/load_artifacts_tool.py | 4 ++-- tests/unittests/tools/test_load_artifacts_tool.py | 12 +++++++----- 2 files changed, 9 insertions(+), 7 deletions(-) diff --git a/src/google/adk/tools/load_artifacts_tool.py b/src/google/adk/tools/load_artifacts_tool.py index ea5cb38c8e..1bcbad16fd 100644 --- a/src/google/adk/tools/load_artifacts_tool.py +++ b/src/google/adk/tools/load_artifacts_tool.py @@ -108,7 +108,7 @@ def _parse_spreadsheet(data: bytes) -> str: return "\n\n".join(output) - except Exception as e: + except (ValueError, ImportError) as e: logger.warning(f"Failed to parse spreadsheet: {e}") return f"[Error parsing spreadsheet: {e}]" @@ -149,7 +149,7 @@ def _as_safe_part_for_llm( return types.Part.from_text(text=data.decode('utf-8', errors='replace')) if mime_type in _SPREADSHEET_MIME_TYPES: - text_content = _parse_spreadsheet(data, mime_type) + text_content = _parse_spreadsheet(data) return types.Part.from_text(text=text_content) size_kb = len(data) / 1024 diff --git a/tests/unittests/tools/test_load_artifacts_tool.py b/tests/unittests/tools/test_load_artifacts_tool.py index 496214ee19..7f63a04c4c 100644 --- a/tests/unittests/tools/test_load_artifacts_tool.py +++ b/tests/unittests/tools/test_load_artifacts_tool.py @@ -237,8 +237,10 @@ async def test_load_artifacts_parses_spreadsheet(): # | 1 | a | # | 2 | b | - assert "Sheet1" in artifact_part.text - assert "col1" in artifact_part.text - assert "col2" in artifact_part.text - assert "1" in artifact_part.text - assert "a" in artifact_part.text + markdown_output = artifact_part.text + + assert "Sheet1" in markdown_output + assert "| col1" in markdown_output + assert "| col2" in markdown_output + assert "| 1" in markdown_output and "| a" in markdown_output + assert "| 2" in markdown_output and "| b" in markdown_output \ No newline at end of file