diff --git a/CHANGELOG.md b/CHANGELOG.md index e71ea5e..83f7e9b 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -8,6 +8,16 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0 ## [Unreleased] ### Added +- **Fetch Web Tool** - HTTP web content fetching capability (ADR 0007) + - Direct URL content fetching with multiple format support (text, JSON, HTML, raw) + - Configurable timeout (default 30s, max 5min) and size limits (default 1MB, max 50MB) + - Custom HTTP headers support for authentication and API access + - Redirect handling with configurable behavior + - Security controls: URL validation, response size limits, timeout enforcement + - Model-agnostic design - works with all LLM providers (Gemini, OpenAI, Ollama, etc.) + - Comprehensive unit tests with 22 test cases covering all functionality + - Registered in Search & Discovery category with priority 1 (complementary to Google Search) + - New `web` package in `tools/` for web content operations - Google Search tool integration via ADK's `geminitool.GoogleSearch` - Enables web search capabilities for the agent - Works with Gemini 2.0+ models @@ -17,8 +27,6 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0 - Comprehensive unit tests for Google Search tool - Documentation in TOOL_DEVELOPMENT.md for using ADK built-in tools -## [Unreleased] - ## [0.2.1] - 2025-11-14 ### Fixed diff --git a/README.md b/README.md index 7f8bc0a..7021d35 100644 --- a/README.md +++ b/README.md @@ -14,7 +14,7 @@ ### Key Features - **🤖 Multi-Model Support**: Seamlessly switch between Gemini, OpenAI, and Vertex AI -- **🛠️ 21 Built-in Tools**: File operations, code editing, execution, web search, and more +- **🛠️ 22 Built-in Tools**: File operations, code editing, execution, web search, web fetching, and more - **🔌 MCP Integration**: Unlimited extensibility via Model Context Protocol - **💾 Session Persistence**: Maintain context across conversations with automatic history - **⚡ Streaming Responses**: Real-time output as the model thinks and executes diff --git a/adk-code/go.mod b/adk-code/go.mod index e3ed329..070f8f2 100644 --- a/adk-code/go.mod +++ b/adk-code/go.mod @@ -12,6 +12,7 @@ require ( github.com/ncruces/go-sqlite3/gormlite v0.30.1 github.com/ollama/ollama v0.12.11 github.com/openai/openai-go/v3 v3.8.1 + golang.org/x/net v0.46.0 golang.org/x/term v0.36.0 google.golang.org/adk v0.1.0 google.golang.org/genai v1.20.0 @@ -68,7 +69,6 @@ require ( go.opentelemetry.io/otel/sdk v1.38.0 // indirect go.opentelemetry.io/otel/trace v1.38.0 // indirect golang.org/x/crypto v0.43.0 // indirect - golang.org/x/net v0.46.0 // indirect golang.org/x/oauth2 v0.32.0 // indirect golang.org/x/sys v0.38.0 // indirect golang.org/x/text v0.30.0 // indirect diff --git a/adk-code/tools/tools.go b/adk-code/tools/tools.go index 53eb39c..0abdb99 100644 --- a/adk-code/tools/tools.go +++ b/adk-code/tools/tools.go @@ -28,6 +28,7 @@ import ( "adk-code/tools/file" "adk-code/tools/search" "adk-code/tools/v4a" + "adk-code/tools/web" "adk-code/tools/websearch" "adk-code/tools/workspace" ) @@ -104,6 +105,10 @@ type ( ListAgentsInput = agents.ListAgentsInput ListAgentsOutput = agents.ListAgentsOutput AgentEntry = agents.AgentEntry + + // Web tool types + FetchWebInput = web.FetchWebInput + FetchWebOutput = web.FetchWebOutput ) // Re-export category constants for tool classification @@ -160,6 +165,9 @@ var ( // Web search tools NewGoogleSearchTool = websearch.NewGoogleSearchTool + + // Web tools + NewFetchWebTool = web.NewFetchWebTool ) // Re-export registry functions for tool access and registration diff --git a/adk-code/tools/web/fetch.go b/adk-code/tools/web/fetch.go new file mode 100644 index 0000000..81a66a3 --- /dev/null +++ b/adk-code/tools/web/fetch.go @@ -0,0 +1,490 @@ +// Package web provides web content fetching tools for the coding agent. +package web + +import ( + "context" + "encoding/json" + "fmt" + "io" + "net/http" + "net/url" + "os" + "regexp" + "strings" + "time" + + "golang.org/x/net/html" + "google.golang.org/adk/tool" + "google.golang.org/adk/tool/functiontool" + + common "adk-code/tools/base" +) + +// FetchWebInput defines parameters for fetching web content. +type FetchWebInput struct { + // URL to fetch (required) + URL string `json:"url" jsonschema:"URL to fetch (e.g., https://example.com/page)"` + + // Format specifies how to process the response (optional) + // "text" (default) - plain text extraction + // "json" - parse as JSON + // "html" - parse HTML structure + // "raw" - return raw response + Format *string `json:"format,omitempty" jsonschema:"Response format: 'text', 'json', 'html', 'raw' (default: text)"` + + // Timeout in seconds (optional, default: 30s) + Timeout *int `json:"timeout,omitempty" jsonschema:"Request timeout in seconds (default: 30)"` + + // FollowRedirects controls automatic redirect following (optional, default: true) + FollowRedirects *bool `json:"follow_redirects,omitempty" jsonschema:"Follow HTTP redirects (default: true)"` + + // MaxSize is the maximum response size in bytes (optional, default: 1MB) + // Prevents fetching extremely large files + MaxSize *int64 `json:"max_size,omitempty" jsonschema:"Maximum response size in bytes (default: 1048576)"` + + // Headers are optional custom HTTP headers to send with the request + Headers map[string]string `json:"headers,omitempty" jsonschema:"Custom HTTP headers (e.g., Authorization)"` + + // StripCSSJS controls whether to remove style/script tags and linked CSS + // from HTML responses when processing as text/html or html format. + // Default: true + StripCSSJS *bool `json:"strip_css_js,omitempty" jsonschema:"Strip blocks + reScript := regexp.MustCompile(`(?is)`) + content = reScript.ReplaceAllString(content, "") + + // Remove blocks + reStyle := regexp.MustCompile(`(?is)`) + content = reStyle.ReplaceAllString(content, "") + + // Remove + reLink := regexp.MustCompile(`(?i)]+rel=["']?stylesheet["']?[^>]*>`) + content = reLink.ReplaceAllString(content, "") + + return content +} + +// extractJSON validates and formats JSON. +func extractJSON(content, contentType string) (string, bool) { + if !isJSONContent(contentType) { + // Try to parse anyway in case content-type is wrong + } + + var data interface{} + if err := json.Unmarshal([]byte(content), &data); err != nil { + return content, false + } + + // Re-marshal with indentation for readability + pretty, err := json.MarshalIndent(data, "", " ") + if err != nil { + return content, false + } + + return string(pretty), true +} + +// isHTMLContent checks if content-type indicates HTML. +func isHTMLContent(contentType string) bool { + return strings.Contains(strings.ToLower(contentType), "text/html") +} + +// isJSONContent checks if content-type indicates JSON. +func isJSONContent(contentType string) bool { + ct := strings.ToLower(contentType) + return strings.Contains(ct, "application/json") || strings.Contains(ct, "text/json") +} + +// NewFetchWebTool creates a tool for fetching web content. +func NewFetchWebTool() (tool.Tool, error) { + t, err := functiontool.New(functiontool.Config{ + Name: "builtin_fetch_web", + Description: `Fetches content from a web URL with optional parsing and formatting. + +**Parameters:** +- url (required): The URL to fetch (http or https only) +- format (optional): How to process the response - "text" (default, extracts plain text from HTML), "json" (formats JSON), "html" (extracts HTML structure), "raw" (returns raw content) +- strip_css_js (optional): When true (default), strip
Content