diff --git a/docs/README.md b/docs/README.md new file mode 100644 index 00000000..3832942b --- /dev/null +++ b/docs/README.md @@ -0,0 +1,32 @@ +# ACP Documentation + +The ACP documentation uses [Mintlify](https://mintlify.com/). +All files are MDX and can host custom components if there's a need to. + +## Running + +Run the following command to have the documentation live locally: + +```bash +npm run docs +``` + +## Preview changes locally + +To preview the changes locally, run the following command: + +```bash +mint dev +``` + +### Install the CLI + +Before running the site locally you need to install Mint's CLI: + +```bash +npm i -g mint +``` + +## Deployment + +The documentation site is updated every time changes get to `main`. diff --git a/docs/community/contributing.mdx b/docs/community/contributing.mdx new file mode 100644 index 00000000..74c83e14 --- /dev/null +++ b/docs/community/contributing.mdx @@ -0,0 +1,10 @@ +--- +title: "Contributing" +description: "How to participate in the development of ACP" +--- + +We welcome contributions from the community! + +All contributors must adhere to [Zed's Code of Conduct](https://zed.dev/code-of-conduct). + +For questions and discussions, please use GitHub Discussions. diff --git a/docs/community/versioning.mdx b/docs/community/versioning.mdx new file mode 100644 index 00000000..d3357452 --- /dev/null +++ b/docs/community/versioning.mdx @@ -0,0 +1,4 @@ +--- +title: "Versioning" +description: "Versioning policy for the Agent Client Protocol" +--- diff --git a/docs/docs.json b/docs/docs.json new file mode 100644 index 00000000..bdb06f3b --- /dev/null +++ b/docs/docs.json @@ -0,0 +1,90 @@ +{ + "$schema": "https://mintlify.com/docs.json", + "theme": "maple", + "name": "Agent Client Protocol", + "description": "The Agent Client Protocol (ACP) is a protocol that standardizes communication between code editors.", + "colors": { + "primary": "#0084d1", + "light": "#00bcff", + "dark": "#0084d1" + }, + "background": { + "color": { + "light": "#f8f8f6", + "dark": "#100f0f" + } + }, + "fonts": { + "heading": { + "family": "Lora", + "format": "woff2" + }, + "body": { + "family": "Public Sans", + "format": "woff2" + } + }, + "favicon": { + "light": "/logo/fav-light.png", + "dark": "/logo/fav-dark.png" + }, + "navbar": { + "links": [ + { + "label": "GitHub", + "href": "https://github.com/zed-industries/agent-client-protocol" + }, + { + "label": "Zed Industries", + "href": "https://zed.dev" + } + ] + }, + "navigation": { + "groups": [ + { + "group": "Overview", + "pages": ["overview/introduction", "overview/architecture"] + }, + { + "group": "Protocol", + "pages": [ + "protocol/overview", + "protocol/initialization", + "protocol/session-setup", + "protocol/prompt-turn", + "protocol/content", + "protocol/tool-calls", + "protocol/agent-plan", + "protocol/schema" + ] + }, + { + "group": "Libraries", + "pages": ["libraries/typescript", "libraries/rust"] + }, + { + "group": "Community", + "pages": ["community/contributing", "community/versioning"] + } + ] + }, + "logo": { + "light": "/logo/light.svg", + "dark": "/logo/dark.svg" + }, + "seo": { + "metatags": { + "og:image": "https://raw.githubusercontent.com/modelcontextprotocol/docs/2eb6171ddbfeefde349dc3b8d5e2b87414c26250/images/og-image.png" + }, + "indexing": "navigable" + }, + "footer": { + "socials": { + "github": "https://github.com/zed-industries/agent-client-protocol" + } + }, + "contextual": { + "options": ["copy", "view"] + } +} diff --git a/docs/images/architecture-diagram.png b/docs/images/architecture-diagram.png new file mode 100644 index 00000000..7f03fbbc Binary files /dev/null and b/docs/images/architecture-diagram.png differ diff --git a/docs/images/mcp-proxy.d2 b/docs/images/mcp-proxy.d2 new file mode 100644 index 00000000..72d8995c --- /dev/null +++ b/docs/images/mcp-proxy.d2 @@ -0,0 +1,34 @@ +'Code Editor': {near: top-center} + +'MCP Proxy': {near: center-left} +# 'MCP Server ...': { +# near: center-right +# style: { +# stroke-dash: 3 +# } +# } +" ---------------------------------------------- ": { + style: { + fill: transparent + font-color: transparent + stroke-width: 0 + } +} + +# Bottom row: Agent +Agent: {near: bottom-center} + +# Connections +'Code Editor' -> Agent: MCP Proxy Configuration { + style: { + stroke-dash: 3 + } +} + +# The agent connects up to the MCP servers +Agent <-> 'MCP Proxy': MCP over stdio {direction: up} +'MCP Proxy' <-> 'Code Editor': MCP over socket { + style: { + stroke-dash: 3 + } +} diff --git a/docs/images/mcp-proxy.svg b/docs/images/mcp-proxy.svg new file mode 100644 index 00000000..9e1882dc --- /dev/null +++ b/docs/images/mcp-proxy.svg @@ -0,0 +1,104 @@ +Code EditorMCP Proxy ---------------------------------------------- Agent MCP Proxy Configuration MCP over stdio MCP over socket + + + + + diff --git a/docs/images/mcp.d2 b/docs/images/mcp.d2 new file mode 100644 index 00000000..adee9642 --- /dev/null +++ b/docs/images/mcp.d2 @@ -0,0 +1,30 @@ +'Code Editor': {near: top-center} + +'MCP Server 1': {near: center-left} +'MCP Server ...': { + near: center-right + style: { + stroke-dash: 3 + } +} +" ----------------------- ": { + style: { + fill: transparent + font-color: transparent + stroke-width: 0 + } +} + +# Bottom row: Agent +Agent: {near: bottom-center} + +# Connections +'Code Editor' -> Agent: MCP Configuration {direction: down} + +# The agent connects up to the MCP servers +Agent -> 'MCP Server 1': MCP {direction: up} +Agent -> 'MCP Server ...': MCP { + style: { + stroke-dash: 3 + } +} diff --git a/docs/images/mcp.svg b/docs/images/mcp.svg new file mode 100644 index 00000000..5816effa --- /dev/null +++ b/docs/images/mcp.svg @@ -0,0 +1,104 @@ +Code EditorMCP Server 1MCP Server ... ----------------------- Agent MCP ConfigurationMCP MCP + + + + + diff --git a/docs/images/server-client.d2 b/docs/images/server-client.d2 new file mode 100644 index 00000000..7bdd0b28 --- /dev/null +++ b/docs/images/server-client.d2 @@ -0,0 +1,17 @@ +# file generated by putting this code into https://play.d2lang.com/ +# and setting theme to Earth tones +Code Editor -> agent1: stdio +agent1: Agent 1 +Code Editor -> agent2: stdio +agent2: Agent 2 + +Code Editor -> "...": { + style: { + stroke-dash: 3 + } +} +"...": { + style: { + stroke-dash: 3 + } +} diff --git a/docs/images/server-client.svg b/docs/images/server-client.svg new file mode 100644 index 00000000..7ac1dc0a --- /dev/null +++ b/docs/images/server-client.svg @@ -0,0 +1,103 @@ +Code EditorAgent 1Agent 2... stdiostdio + + + + diff --git a/docs/libraries/rust.mdx b/docs/libraries/rust.mdx new file mode 100644 index 00000000..8325d442 --- /dev/null +++ b/docs/libraries/rust.mdx @@ -0,0 +1,4 @@ +--- +title: "Rust" +description: "Rust library for the Agent Client Protocol" +--- diff --git a/docs/libraries/typescript.mdx b/docs/libraries/typescript.mdx new file mode 100644 index 00000000..5b30e934 --- /dev/null +++ b/docs/libraries/typescript.mdx @@ -0,0 +1,4 @@ +--- +title: "TypeScript" +description: "TypeScript library for the Agent Client Protocol" +--- diff --git a/docs/logo/dark.svg b/docs/logo/dark.svg new file mode 100644 index 00000000..2c063491 --- /dev/null +++ b/docs/logo/dark.svg @@ -0,0 +1,10 @@ + + + + + + + + + + diff --git a/docs/logo/fav-dark.png b/docs/logo/fav-dark.png new file mode 100644 index 00000000..99860ec1 Binary files /dev/null and b/docs/logo/fav-dark.png differ diff --git a/docs/logo/fav-light.png b/docs/logo/fav-light.png new file mode 100644 index 00000000..1569365e Binary files /dev/null and b/docs/logo/fav-light.png differ diff --git a/docs/logo/light.svg b/docs/logo/light.svg new file mode 100644 index 00000000..69040a55 --- /dev/null +++ b/docs/logo/light.svg @@ -0,0 +1,10 @@ + + + + + + + + + + diff --git a/docs/overview/architecture.mdx b/docs/overview/architecture.mdx new file mode 100644 index 00000000..26532035 --- /dev/null +++ b/docs/overview/architecture.mdx @@ -0,0 +1,34 @@ +--- +title: "Architecture" +description: "Overview of the Agent Client Protocol architecture" +--- + +The Agent Client Protocol defines a standard interface for communication between AI agents and client applications. The architecture is designed to be flexible, extensible, and platform-agnostic. + +## Design Philosophy + +The protocol architecture follows several key principles: + +1. **MCP-friendly**: The protocol is built on JSON-RPC, and re-uses MCP types where possible so that integrators don't need to build yet-another representation for common data types. +2. **UX-first**: It is designed to solve the UX challenges of interacting with AI agents; ensuring there's enough flexibility to render clearly the agents intent, but is no more abstract than it needs to be. +3. **Trusted**: ACP works when you're using a code editor to talk to a model you trust. You still have controls over the agent's tool calls, but the code editor gives the agent access to local files and MCP servers. + +## Setup + +When the user tries to connect to an agent, the editor boots the agent sub-process on demand, and all communication happens over stdin/stdout. + +Each connection can suppport several concurrent sessions, so you can have multiple trains of thought going on at once. + +![Server Client setup](../images/server-client.svg) + +ACP makes heavy use of JSON-RPC notifications to allow the agent to stream updates to the UI in real-time. It also uses JSON-RPC's bidrectional requests to allow the agent to make requests of the code editor: for example to request permissions for a tool call. + +## MCP + +Commonly the code editor will have user-configured MCP servers. When forwarding the prompt from the user, it passes configuration for these to the agent. This allows the agent to connect directly to the MCP server. + +![MCP Server connection](../images/mcp.svg) + +The code editor may itself also wish to export MCP based tools. Instead of trying to run MCP and ACP on the same socket, the code editor can provide its own MCP server as configuration. As agents may only support MCP over stdio, the code editor can provide a small proxy that tunnels requests back to itself: + +![MCP connection to self](../images/mcp-proxy.svg) diff --git a/docs/overview/introduction.mdx b/docs/overview/introduction.mdx new file mode 100644 index 00000000..d02c1474 --- /dev/null +++ b/docs/overview/introduction.mdx @@ -0,0 +1,48 @@ +--- +title: "Introduction" +description: "Get started with the Agent Client Protocol (ACP)" +--- + +The Agent Client Protocol standardizes communication between code editors (IDEs, text-editors, etc.) and coding agents (programs that use generative AI to autonomously modify code). + +The protocol is still under development, but it should be complete enough to build interesting user experiences using it. + +## Why ACP? + +AI coding agents and editors are tightly coupled but interoperability isn't the default. Each editor must build custom integrations for every agent they want to support, and agents must implement editor-specific APIs to reach users. +This creates several problems: + +- Integration overhead: Every new agent-editor combination requires custom work +- Limited compatibility: Agents work with only a subset of available editors +- Developer lock-in: Choosing an agent often means accepting their available interfaces + +ACP solves this by providing a standardized protocol for agent-editor communication, similar to how the [Language Server Protocol (LSP)](https://microsoft.github.io/language-server-protocol/) standardized language server integration. + +Agents that implement ACP work with any compatible editor. Editors that support ACP gain access to the entire ecosystem of ACP-compatible agents. +This decoupling allows both sides to innovate independently while giving developers the freedom to choose the best tools for their workflow. + +## Overview + +ACP assumes that the user is primarily in their editor, and wants to reach out and use agents to assist them with specific tasks. + +Agents run as sub-processes of the code editor, and communicate using JSON-RPC over stdio. The protocol re-uses the JSON representations used in MCP where possible, but includes custom types for useful agentic coding UX elements, like displaying diffs. + +The default format for user-readable text is Markdown, which allows enough flexibility to represent rich formatting without requiring that the code editor is capable of rendering HTML. + +## Implementations + +Currently ACP is supported by: + +### Editors + +- [Zed](https://zed.dev) +- [neovim](https://neovim.io) if you install the [CodeCompanion](https://codecompanion.olimorris.dev) plugin. + +### Agents + +- [Gemini](https://github.com/google-gemini/gemini-cli) +- ... more coming soon ;) + +## Further reading + +For an overview of the architecture, see the [Architecture](./Architecture) section. For ... TODO diff --git a/docs/protocol/agent-plan.mdx b/docs/protocol/agent-plan.mdx new file mode 100644 index 00000000..874572a7 --- /dev/null +++ b/docs/protocol/agent-plan.mdx @@ -0,0 +1,83 @@ +--- +title: "Agent Plan" +description: "How Agents communicate their execution plans" +--- + +Plans are execution strategies for complex tasks that require multiple steps. + +Agents may share plans with Clients through [`session/update`](./prompt-turn#3-agent-reports-output) notifications, providing real-time visibility into their thinking and progress. + +## Creating Plans + +When the language model creates an execution plan, the Agent **SHOULD** report it to the Client: + +```json +{ + "jsonrpc": "2.0", + "method": "session/update", + "params": { + "sessionId": "sess_abc123def456", + "update": { + "sessionUpdate": "plan", + "entries": [ + { + "content": "Analyze the existing codebase structure", + "priority": "high", + "status": "pending" + }, + { + "content": "Identify components that need refactoring", + "priority": "high", + "status": "pending" + }, + { + "content": "Create unit tests for critical functions", + "priority": "medium", + "status": "pending" + } + ] + } + } +} +``` + + + An array of [plan entries](#plan-entries) representing the tasks to be + accomplished + + +## Plan Entries + +Each plan entry represents a specific task or goal within the overall execution strategy: + + + A human-readable description of what this task aims to accomplish + + + + The relative importance of this task. + +- `high` +- `medium` +- `low` + + + + + The current [execution status](#status) of this task + +- `pending` +- `in_progress` +- `completed` + + + +## Updating Plans + +As the Agent progresses through the plan, it **SHOULD** report updates by sending more `session/update` notifications with the same structure. + +The Agent **MUST** send a complete list of all plan entries in each update and their current status. The Client **MUST** replace the current plan completely. + +### Dynamic Planning + +Plans can evolve during execution. The Agent **MAY** add, remove, or modify plan entries as it discovers new requirements or completes tasks, allowing it to adapt based on what it learns. diff --git a/docs/protocol/content.mdx b/docs/protocol/content.mdx new file mode 100644 index 00000000..931f451e --- /dev/null +++ b/docs/protocol/content.mdx @@ -0,0 +1,204 @@ +--- +title: "Content" +description: "Understanding content blocks in the Agent Client Protocol" +--- + +Content blocks represent displayable information that flows through the Agent Client Protocol. They provide a structured way to handle various types of user-facing content—whether it's text from language models, images for analysis, or embedded resources for context. + +Content blocks appear in: + +- User prompts sent via [`session/prompt`](./prompt-turn#1-user-message) +- Language model output streamed through [`session/update`](./prompt-turn#3-agent-reports-output) notifications +- Progress updates and results from [tool calls](./tool-calls) + +## Content Types + +The Agent Client Protocol uses the same `ContentBlock` structure as the [Model Context Protocol (MCP)](https://modelcontextprotocol.io/specification/2025-06-18/schema#contentblock). + +This design choice enables Agents to seamlessly forward content from MCP tool outputs without transformation. + +### Text Content + +Plain text messages form the foundation of most interactions. + +```json +{ + "type": "text", + "text": "What's the weather like today?" +} +``` + +All Agents **MUST** support text content blocks when included in prompts. + + + The text content to display + + + + Optional metadata about how the content should be used or displayed. [Learn + more](https://modelcontextprotocol.io/specification/2025-06-18/server/resources#annotations). + + +### Image Content + +Images can be included for visual context or analysis. + +```json +{ + "type": "image", + "mimeType": "image/png", + "data": "iVBORw0KGgoAAAANSUhEUgAAAAEAAAAB..." +} +``` + + Requires the `image` [prompt +capability](./initialization#prompt-capabilities) when included in prompts. + + + Base64-encoded image data + + + + The MIME type of the image (e.g., "image/png", "image/jpeg") + + + + Optional URI reference for the image source + + + + Optional metadata about how the content should be used or displayed. [Learn + more](https://modelcontextprotocol.io/specification/2025-06-18/server/resources#annotations). + + +### Audio Content + +Audio data for transcription or analysis. + +```json +{ + "type": "audio", + "mimeType": "audio/wav", + "data": "UklGRiQAAABXQVZFZm10IBAAAAABAAEAQB8AAAB..." +} +``` + + Requires the `audio` [prompt +capability](./initialization#prompt-capabilities) when included in prompts. + + + Base64-encoded audio data + + + + The MIME type of the audio (e.g., "audio/wav", "audio/mp3") + + + + Optional metadata about how the content should be used or displayed. [Learn + more](https://modelcontextprotocol.io/specification/2025-06-18/server/resources#annotations). + + +### Embedded Resource + +Complete resource contents embedded directly in the message. + +```json +{ + "type": "resource", + "resource": { + "uri": "file:///home/user/script.py", + "mimeType": "text/x-python", + "text": "def hello():\n print('Hello, world!')" + } +} +``` + +This is the preferred way to include context in prompts, such as when using @-mentions to reference files or other resources. + +By embedding the content directly in the request, Clients can include context from sources that the Agent may not have direct access to. + + Requires the `embeddedContext` [prompt +capability](./initialization#prompt-capabilities) when included in prompts. + + + The embedded resource contents, which can be either: + + + + The URI identifying the resource + + + + The text content of the resource + + + + Optional MIME type of the text content + + + + + + + The URI identifying the resource + + + + Base64-encoded binary data + + + + Optional MIME type of the blob + + + + + + + Optional metadata about how the content should be used or displayed. [Learn + more](https://modelcontextprotocol.io/specification/2025-06-18/server/resources#annotations). + + +### Resource Link + +References to resources that the Agent can access. + +```json +{ + "type": "resource_link", + "uri": "file:///home/user/document.pdf", + "name": "document.pdf", + "mimeType": "application/pdf", + "size": 1024000 +} +``` + + + The URI of the resource + + + + A human-readable name for the resource + + + + The MIME type of the resource + + + + Optional display title for the resource + + + + Optional description of the resource contents + + + + Optional size of the resource in bytes + + + + Optional metadata about how the content should be used or displayed. [Learn + more](https://modelcontextprotocol.io/specification/2025-06-18/server/resources#annotations). + diff --git a/docs/protocol/error.mdx b/docs/protocol/error.mdx new file mode 100644 index 00000000..01d1aba3 --- /dev/null +++ b/docs/protocol/error.mdx @@ -0,0 +1,6 @@ +--- +title: "Error" +description: "Error handling in the Agent Client Protocol" +--- + +_Documentation coming soon_ diff --git a/docs/protocol/initialization.mdx b/docs/protocol/initialization.mdx new file mode 100644 index 00000000..3c40e678 --- /dev/null +++ b/docs/protocol/initialization.mdx @@ -0,0 +1,146 @@ +--- +title: "Initialization" +description: "How all Agent Client Protocol connections begin" +--- + +{/* todo! link to all concepts */} + +The Initialization phase allows [Clients](./overview#client) and [Agents](./overview#agent) to negotiate protocol versions, capabilities, and authentication methods. + +
+ +```mermaid +sequenceDiagram + participant Client + participant Agent + + Note over Client, Agent: Connection established + Client->>Agent: initialize + Note right of Agent: Negotiate protocol
version & capabilities + Agent-->>Client: initialize response + Note over Client,Agent: Ready for session setup +``` + +
+ +Before a Session can be created, Clients **MUST** initialize the connection by calling the `initialize` method with: + +- The latest [protocol version](#protocol-version) supported +- The [capabilities](#client-capabilities) supported + +```json +{ + "jsonrpc": "2.0", + "id": 0, + "method": "initialize", + "params": { + "protocolVersion": 1, + "clientCapabilities": { + "fs": { + "readTextFile": true, + "writeTextFile": true + } + } + } +} +``` + +The Agent **MUST** respond with the chosen [protocol version](#protocol-version) and the [capabilities](#agent-capabilities) it supports: + +```json +{ + "jsonrpc": "2.0", + "id": 0, + "result": { + "protocolVersion": 1, + "agentCapabilities": { + "loadSession": true, + "promptCapabilities": { + "image": true, + "audio": true, + "embeddedContext": true + } + }, + "authMethods": [] + } +} +``` + +## Protocol version + +The protocol versions that appear in the `initialize` requests and responses are a single integer that identifies a **MAJOR** protocol version. This version is only incremented when breaking changes are introduced. + +Clients and Agents **MUST** agree on a protocol version and act according to its specification. + +See [Capabilities](#capabilities) to learn how non-breaking features are introduced. + +### Version Negotiation + +The `initialize` request **MUST** include the latest protocol version the Client supports. + +If the Agent supports the requested version, it **MUST** respond with the same version. Otherwise, the Agent **MUST** respond with the latest version it supports. + +If the Client does not support the version specified by the Agent in the `initialize` response, the Client **SHOULD** close the connection and inform the user about it. + +## Capabilities + +Capabilities describe features supported by the Client and the Agent. + +All capabilities included in the `initialize` request are **OPTIONAL**. Clients and Agents **SHOULD** support all possible combinations of their peer's capabilities. + +The introduction of new capabilities is not considered a breaking change. Therefore, Clients and Agents **MUST** treat all capabilities omitted in the `initialize` request as **UNSUPPORTED**. + +Capabilities are high-level and are not attached to a specific base protocol concept. + +Capabilities may specify the availability of protocol methods, notifications, or a subset of their parameters. They may also signal behaviors of the Agent or Client implementation. + +### Client Capabilities + +The Client **SHOULD** specify whether it supports the following capability: + +#### FileSystem + +The Client **MAY** expose its FileSystem abstraction to varying degrees: + + + The `fs/read_text_file` method is available. + + + + The `fs/write_text_file` method is available. + + +### Agent Capabilities + +The Agent **SHOULD** specify whether it supports the following capabilities: + + + The `session/load` method is available. + + + + Object indicating the different types of content that may be included in + `session/prompt` requests. + + +#### Prompt capabilities + +As a baseline, all Agents **MUST** support `ContentBlock::Text` and `ContentBlock::ResourceLink` in `session/prompt` requests. + +Optionally, they **MAY** support richer types of content by specifying the following capabilities: + + + The prompt may include `ContentBlock::Image` + + + + The prompt may include `ContentBlock::Audio` + + + + The prompt may include `ContentBlock::EmbeddedResource` + + +--- + +Once the connection is initialized, you're ready to [create a session](./session-setup) and begin the conversation with the Agent. diff --git a/docs/protocol/overview.mdx b/docs/protocol/overview.mdx new file mode 100644 index 00000000..8c586684 --- /dev/null +++ b/docs/protocol/overview.mdx @@ -0,0 +1,109 @@ +--- +title: "Overview" +description: "How the Agent Client Protocol works" +--- + +The Agent Client Protocol allows [Agents](#agent) and [Clients](#client) to communicate by exposing methods that each side can call and sending notifications to inform each other of events. + +## Communication Model + +The protocol follows [JSON-RPC 2.0](https://www.jsonrpc.org/specification) specifications with two types of messages: + +- **Methods**: Request-response pairs that expect a result or error +- **Notifications**: One-way messages that don't expect a response + +Both sides expose methods, making the protocol symmetric - Agents can call Client methods and Clients can call Agent methods. + +## Message Flow + +A typical conversation follows this pattern: + + + + - Client → Agent: `initialize` to establish connection + - Client → Agent: `authenticate` if required by the Agent + + + + Either: - Client → Agent: `session/new` to create a new session - Client → + Agent: `session/load` to resume an existing session if supported + + + + - Client → Agent: `session/prompt` to send user message + - Agent → Client: `session/update` notifications for progress updates + - Agent → Client: File operations or permission requests as needed + - Client → Agent: `session/cancel` to interrupt processing if needed + - Turn ends and the Agent sends the `session/prompt` response with a stop reason + + + +## Agent + +Agents are programs that use generative AI to autonomously modify code. They typically run as subprocesses of the Client. + +### Baseline Methods + + + Establish connection and negotiate capabilities. + + + + Authenticate with the Agent (if required). + + +Create a new conversation session. + +Send user prompts to the Agent. + +### Optional Methods + + + Load an existing session (requires `loadSession` capability). + + +### Notifications + + + Cancel ongoing operations (no response expected). + + +## Client + +Clients provide the interface between users and agents. They are typically code editors (IDEs, text editors) but can also be other UIs for interacting with agents. Clients manage the environment, handle user interactions, and control access to resources. + +### Baseline Methods + + + Request user authorization for tool calls. + + +### Optional Methods + + + Read file contents (requires `fs.readTextFile` capability). + + + + Write file contents (requires `fs.writeTextFile` capability). + + +### Notifications + + + Send progress updates during prompt processing (no response expected). + + +## Error Handling + +All methods follow standard JSON-RPC 2.0 [error handling](https://www.jsonrpc.org/specification#error_object): + +- Successful responses include a `result` field +- Errors include an `error` object with `code` and `message` +- Notifications never receive responses (success or error) + +## Next Steps + +- Learn about [Initialization](./initialization) to understand version and capability negotiation +- Understand [Session Setup](./session-setup) for creating and loading sessions +- Review the [Prompt Turn](./prompt-turn) lifecycle diff --git a/docs/protocol/prompt-turn.mdx b/docs/protocol/prompt-turn.mdx new file mode 100644 index 00000000..57e8df34 --- /dev/null +++ b/docs/protocol/prompt-turn.mdx @@ -0,0 +1,319 @@ +--- +title: "Prompt Turn" +description: "Understanding the core conversation flow" +--- + +A prompt turn represents a complete interaction cycle between the [Client](./overview#client) and [Agent](./overview#agent), starting with a user message and continuing until the Agent completes its response. This may involve multiple exchanges with the language model and tool invocations. + +Before sending prompts, Clients **MUST** first complete the [initialization](./initialization) phase and [session setup](./session-setup). + +## The Prompt Turn Lifecycle + +A prompt turn follows a structured flow that enables rich interactions between the user, Agent, and any connected tools. + +
+ +```mermaid +sequenceDiagram + participant Client + participant Agent + + Note over Agent,Client: Session ready + + Note left of Client: User sends message + Client->>Agent: session/prompt (user message) + Note right of Agent: Process with LLM + + loop Until completion + Note right of Agent: LLM responds with
content/tool calls + Agent->>Client: session/update (plan) + Agent->>Client: session/update (agent_message_chunk) + + opt Tool calls requested + Agent->>Client: session/update (tool_call) + opt Permission required + Agent->>Client: session/request_permission + Note left of Client: User grants/denies + Client-->>Agent: Permission response + end + Agent->>Client: session/update (tool_call status: in_progress) + Note right of Agent: Execute tool + Agent->>Client: session/update (tool_call status: completed) + Note right of Agent: Send tool results
back to LLM + end + + opt User cancelled during execution + Note left of Client: User cancels prompt + Client->>Agent: session/cancel + Note right of Agent: Abort operations + Agent-->>Client: session/prompt response (cancelled) + end + end + + Agent-->>Client: session/prompt response (stopReason) + +``` + +### 1. User Message + +The turn begins when the Client sends a `session/prompt`: + +```json +{ + "jsonrpc": "2.0", + "id": 2, + "method": "session/prompt", + "params": { + "sessionId": "sess_abc123def456", + "prompt": [ + { + "type": "text", + "text": "Can you analyze this code for potential issues?" + }, + { + "type": "resource", + "resource": { + "uri": "file:///home/user/project/main.py", + "mimeType": "text/x-python", + "text": "def process_data(items):\n for item in items:\n print(item)" + } + } + ] + } +} +``` + + + The [ID](./session-setup#session-id) of the session to send this message to. + + + The contents of the user message, e.g. text, images, files, etc. + + Clients **MUST** restrict types of content according to the [Prompt Capabilities](./initialization#prompt-capabilities) established during [initialization](./initialization). + + + Learn more about Content + + + + +### 2. Agent Processing + +Upon receiving the prompt request, the Agent processes the user's message and sends it to the language model, which **MAY** respond with text content, tool calls, or both. + +### 3. Agent Reports Output + +The Agent reports the model's output to the Client via `session/update` notifications. This may include the Agent's plan for accomplishing the task: + +```json expandable +{ + "jsonrpc": "2.0", + "method": "session/update", + "params": { + "sessionId": "sess_abc123def456", + "update": { + "sessionUpdate": "plan", + "entries": [ + { + "content": "Check for syntax errors", + "priority": "high", + "status": "pending" + }, + { + "content": "Identify potential type issues", + "priority": "medium", + "status": "pending" + }, + { + "content": "Review error handling patterns", + "priority": "medium", + "status": "pending" + }, + { + "content": "Suggest improvements", + "priority": "low", + "status": "pending" + } + ] + } + } +} +``` + + + Learn more about Agent Plans + + +The Agent then reports text responses from the model: + +```json +{ + "jsonrpc": "2.0", + "method": "session/update", + "params": { + "sessionId": "sess_abc123def456", + "update": { + "sessionUpdate": "agent_message_chunk", + "content": { + "type": "text", + "text": "I'll analyze your code for potential issues. Let me examine it..." + } + } + } +} +``` + +If the model requested tool calls, these are also reported immediately: + +```json +{ + "jsonrpc": "2.0", + "method": "session/update", + "params": { + "sessionId": "sess_abc123def456", + "update": { + "sessionUpdate": "tool_call", + "toolCallId": "call_001", + "title": "Analyzing Python code", + "kind": "other", + "status": "pending" + } + } +} +``` + +### 4. Check for Completion + +If there are no pending tool calls, the turn ends and the Agent **MUST** respond to the original `session/prompt` request with a `StopReason`: + +```json +{ + "jsonrpc": "2.0", + "id": 2, + "result": { + "stopReason": "end_turn" + } +} +``` + +Agents **MAY** stop the turn at any point by returning the corresponding [`StopReason`](#stop-reasons). + +### 5. Tool Invocation and Status Reporting + +Before proceeding with execution, the Agent **MAY** request permission from the Client via the `session/request_permission` method. + +Once permission is granted (if required), the Agent **SHOULD** invoke the tool and report a status update marking the tool as `in_progress`: + +```json +{ + "jsonrpc": "2.0", + "method": "session/update", + "params": { + "sessionId": "sess_abc123def456", + "update": { + "sessionUpdate": "tool_call_update", + "toolCallId": "call_001", + "status": "in_progress" + } + } +} +``` + +As the tool runs, the Agent **MAY** send additional updates, providing real-time feedback about tool execution progress. + +While tools execute on the Agent, they **MAY** leverage Client capabilities such as the file system (`fs`) methods to access resources within the Client's environment. + +When the tool completes, the Agent sends another update with the final status and any content: + +```json +{ + "jsonrpc": "2.0", + "method": "session/update", + "params": { + "sessionId": "sess_abc123def456", + "update": { + "sessionUpdate": "tool_call_update", + "toolCallId": "call_001", + "status": "completed", + "content": [ + { + "type": "content", + "content": { + "type": "text", + "text": "Analysis complete:\n- No syntax errors found\n- Consider adding type hints for better clarity\n- The function could benefit from error handling for empty lists" + } + } + ] + } + } +} +``` + + + Learn more about Tool Calls + + +### 6. Continue Conversation + +The Agent sends the tool results back to the language model as another request. + +The cycle returns to [step 2](#2-agent-processing), continuing until the language model completes its response without requesting additional tool calls or the turn gets stopped by the Agent or cancelled by the Client. + +## Stop Reasons + +When an Agent stops a turn, it must specify the corresponding `StopReason`: + + + The language model finishes responding without requesting more tools + + + + The maximum token limit is reached + + + + The maximum number of model requests in a single turn is exceeded + + +The Agent refuses to continue + +The Client cancels the turn + +## Cancellation + +Clients **MAY** cancel an ongoing prompt turn at any time by sending a `session/cancel` notification: + +```json +{ + "jsonrpc": "2.0", + "method": "session/cancel", + "params": { + "sessionId": "sess_abc123def456" + } +} +``` + +The Client **SHOULD** preemptively mark all non-finished tool calls pertaining to the current turn as `cancelled` as soon as it sends the `session/cancel` notification. + +The Client **MUST** respond to all pending `session/request_permission` requests with the `cancelled` outcome. + +When the Agent receives this notification, it **SHOULD** stop all language model requests and all tool call invocations as soon as possible. + +After all ongoing operations have been successfully aborted and pending updates have been sent, the Agent **MUST** respond to the original `session/prompt` request with the `cancelled` [stop reason](#stop-reasons). + + + API client libraries and tools often throw an exception when their operation is aborted, which may propagate as an error response to `session/prompt`. + +Clients often display unrecognized errors from the Agent to the user, which would be undesirable for cancellations as they aren't considered errors. + +Agents **MUST** catch these errors and return the semantically meaningful `cancelled` stop reason, so that Clients can reliably confirm the cancellation. + + + +The Agent **MAY** send `session/update` notifications with content or tool call updates after receiving the `session/cancel` notification, but it **MUST** ensure that it does so before responding to the `session/prompt` request. + +The Client **SHOULD** still accept tool call updates received after sending `session/cancel`. + +--- + +Once a prompt turn completes, the Client may send another `session/prompt` to continue the conversation, building on the context established in previous turns. diff --git a/docs/protocol/schema.mdx b/docs/protocol/schema.mdx new file mode 100644 index 00000000..16bf23a3 --- /dev/null +++ b/docs/protocol/schema.mdx @@ -0,0 +1,4 @@ +--- +title: "Schema" +description: "JSON Schema definitions for the Agent Client Protocol" +--- diff --git a/docs/protocol/session-setup.mdx b/docs/protocol/session-setup.mdx new file mode 100644 index 00000000..778ce189 --- /dev/null +++ b/docs/protocol/session-setup.mdx @@ -0,0 +1,232 @@ +--- +title: "Session Setup" +description: "Creating and loading sessions" +--- + +Sessions represent a specific conversation or thread between the [Client](./overview#client) and [Agent](./overview#agent). Each session maintains its own context, conversation history, and state, allowing multiple independent interactions with the same Agent. + +Before creating a session, Clients **MUST** first complete the [initialization](./initialization) phase to establish protocol compatibility and capabilities. + +
+ +```mermaid +sequenceDiagram + participant Client + participant Agent + + Note over Agent,Client: Initialized + + alt + Client->>Agent: session/new + Note over Agent: Create session context + Note over Agent: Connect to MCP servers + Agent-->>Client: session/new response (sessionId) + else + Client->>Agent: session/load (sessionId) + Note over Agent: Restore session context + Note over Agent: Connect to MCP servers + Note over Agent,Client: Replay conversation history... + Agent->>Client: session/update + Agent->>Client: session/update + Note over Agent,Client: All content streamed + Agent-->>Client: session/load response + end + + Note over Client,Agent: Ready for prompts +``` + +
+ +## Creating a Session + +Clients create a new session by calling the `session/new` method with: + +- The [working directory](#working-directory) for the session +- A list of [MCP servers](#mcp-servers) the Agent should connect to + +```json +{ + "jsonrpc": "2.0", + "id": 1, + "method": "session/new", + "params": { + "cwd": "/home/user/project", + "mcpServers": [ + { + "name": "filesystem", + "command": "/path/to/mcp-server", + "args": ["--stdio"], + "env": [] + } + ] + } +} +``` + +The Agent **MUST** respond with a unique [Session ID](#session-id) that identifies this conversation: + +```json +{ + "jsonrpc": "2.0", + "id": 1, + "result": { + "sessionId": "sess_abc123def456" + } +} +``` + +## Loading Sessions + +Agents that support the `loadSession` capability allow Clients to resume previous conversations. This feature enables persistence across restarts and sharing sessions between different Client instances. + +### Checking Support + +Before attempting to load a session, Clients **MUST** verify that the Agent supports this capability by checking the `loadSession` field in the `initialize` response: + +```json highlight={7} +{ + "jsonrpc": "2.0", + "id": 0, + "result": { + "protocolVersion": 1, + "agentCapabilities": { + "loadSession": true + } + } +} +``` + +If `loadSession` is `false` or not present, the Agent does not support loading sessions and Clients **MUST NOT** attempt to call `session/load`. + +### Loading a Session + +To load an existing session, Clients **MUST** call the `session/load` method with: + +- The [Session ID](#session-id) to resume +- [MCP servers](#mcp-servers) to connect to +- The [working directory](#working-directory) + +```json +{ + "jsonrpc": "2.0", + "id": 1, + "method": "session/load", + "params": { + "sessionId": "sess_789xyz", + "cwd": "/home/user/project", + "mcpServers": [ + { + "name": "filesystem", + "command": "/path/to/mcp-server", + "args": ["--mode", "filesystem"], + "env": [] + } + ] + } +} +``` + +The Agent **MUST** replay the entire conversation to the Client in the form of `session/update` notifications (like `session/prompt`). + +For example, a user message from the conversation history: + +```json +{ + "jsonrpc": "2.0", + "method": "session/update", + "params": { + "sessionId": "sess_789xyz", + "update": { + "sessionUpdate": "user_message_chunk", + "content": { + "type": "text", + "text": "What's the capital of France?" + } + } + } +} +``` + +Followed by the agent's response: + +```json +{ + "jsonrpc": "2.0", + "method": "session/update", + "params": { + "sessionId": "sess_789xyz", + "update": { + "sessionUpdate": "agent_message_chunk", + "content": { + "type": "text", + "text": "The capital of France is Paris." + } + } + } +} +``` + +When **all** the conversation entries have been streamed to the Client, the Agent **MUST** respond to the original `session/load` request. + +```json +{ + "jsonrpc": "2.0", + "id": 1, + "result": null +} +``` + +The Client can then continue sending prompts as if the session was never interrupted. + +## Session ID + +The session ID returned by `session/new` is a unique identifier for the conversation context. + +Clients use this ID to: + +- Send prompt requests via `session/prompt` +- Cancel ongoing operations via `session/cancel` +- Load previous sessions via `session/load` (if the Agent supports the `loadSession` capability) + +## Working Directory + +The `cwd` (current working directory) parameter establishes the file system context for the session. This directory: + +- **MUST** be an absolute path +- **MUST** be used for the session regardless of where the Agent subprocess was spawned +- **SHOULD** serve as a boundary for tool operations on the file system + +## MCP Servers + +The [Model Context Protocol (MCP)](https://modelcontextprotocol.io) allows Agents to access external tools and data sources. When creating a session, Clients **MAY** include connection details for MCP servers that the Agent should connect to. + +Each MCP server specification includes: + + + A human-readable identifier for the server + + + + The path to the MCP server executable + + + + Command-line arguments to pass to the server + + + + Environment variables to set when launching the server + + + + The name of the environment variable. + + + The value of the environment variable. + + + + +Agents **SHOULD** connect to all MCP servers specified by the Client. + +Clients **MAY** use this ability to provide tools directly to the underlying language model by including their own MCP server. diff --git a/docs/protocol/tool-calls.mdx b/docs/protocol/tool-calls.mdx new file mode 100644 index 00000000..429722aa --- /dev/null +++ b/docs/protocol/tool-calls.mdx @@ -0,0 +1,289 @@ +--- +title: "Tool Calls" +description: "How Agents report tool call execution" +--- + +Tool calls represent actions that language models request Agents to perform during a [prompt turn](./prompt-turn). When an LLM determines it needs to interact with external systems—like reading files, running code, or fetching data—it generates tool calls that the Agent executes on its behalf. + +Agents report tool calls through [`session/update`](./prompt-turn#3-agent-reports-output) notifications, allowing Clients to display real-time progress and results to users. + +While Agents handle the actual execution, they may leverage Client capabilities like permission requests or file system access to provide a richer, more integrated experience. + +## Creating + +When the language model requests a tool invocation, the Agent **SHOULD** report it to the Client: + +```json +{ + "jsonrpc": "2.0", + "method": "session/update", + "params": { + "sessionId": "sess_abc123def456", + "update": { + "sessionUpdate": "tool_call", + "toolCallId": "call_001", + "title": "Reading configuration file", + "kind": "read", + "status": "pending" + } + } +} +``` + + + A unique identifier for this tool call within the session + + + + A human-readable title describing what the tool is doing + + + + The category of tool being invoked. + + + - `read` - Reading files or data - `edit` - Modifying files or content - + `delete` - Removing files or data - `move` - Moving or renaming files - + `search` - Searching for information - `execute` - Running commands or code - + `think` - Internal reasoning or planning - `fetch` - Retrieving external data + - `other` - Other tool types (default) + + +Tool kinds help Clients choose appropriate icons and optimize how they display tool execution progress. + + + + + The current [execution status](#status) (defaults to `pending`) + + + + [Content produced](#content) by the tool call + + + + [File locations](#following-the-agent) affected by this tool call + + + + The raw input parameters sent to the tool + + + + The raw output returned by the tool + + +## Updating + +As tools execute, Agents send updates to report progress and results. + +Updates use the `session/update` notification with `tool_call_update`: + +```json +{ + "jsonrpc": "2.0", + "method": "session/update", + "params": { + "sessionId": "sess_abc123def456", + "update": { + "sessionUpdate": "tool_call_update", + "toolCallId": "call_001", + "status": "in_progress", + "content": [ + { + "type": "content", + "content": { + "type": "text", + "text": "Found 3 configuration files..." + } + } + ] + } + } +} +``` + +All fields except `toolCallId` are optional in updates. Only the fields being changed need to be included. + +## Requesting Permission + +The Agent **MAY** request permission from the user before executing a tool call by calling the `session/request_permission` method: + +```json +{ + "jsonrpc": "2.0", + "id": 5, + "method": "session/request_permission", + "params": { + "sessionId": "sess_abc123def456", + "toolCall": { + "toolCallId": "call_001" + }, + "options": [ + { + "optionId": "allow-once", + "name": "Allow once", + "kind": "allow_once" + }, + { + "optionId": "reject-once", + "name": "Reject", + "kind": "reject_once" + } + ] + } +} +``` + + + The session ID for this request + + + + The tool call update containing details about the operation + + + + Available [permission options](#permission-options) for the user to choose + from + + +The Client responds with the user's decision: + +```json +{ + "jsonrpc": "2.0", + "id": 5, + "result": { + "outcome": { + "outcome": "selected", + "optionId": "allow-once" + } + } +} +``` + +Clients **MAY** automatically allow or reject permission requests according to the user settings. + +If the current prompt turn gets [cancelled](./prompt-turn#cancellation), the Client **MUST** respond with the `"cancelled"` outcome: + +```json +{ + "jsonrpc": "2.0", + "id": 5, + "result": { + "outcome": { + "outcome": "cancelled" + } + } +} +``` + + + The user's decision, either: - `cancelled` - The [prompt turn was + cancelled](./prompt-turn#cancellation) - `selected` with an `optionId` - The + ID of the selected permission option + + +### Permission Options + +Each permission option provided to the Client contains: + + + Unique identifier for this option + + + + Human-readable label to display to the user + + + + A hint to help Clients choose appropriate icons and UI treatment for each option. + +- `allow_once` - Allow this operation only this time +- `allow_always` - Allow this operation and remember the choice +- `reject_once` - Reject this operation only this time +- `reject_always` - Reject this operation and remember the choice + + + +## Status + +Tool calls progress through different statuses during their lifecycle: + + + The tool call hasn't started running yet because the input is either streaming + or awaiting approval + + + + The tool call is currently running + + + + The tool call completed successfully + + +The tool call failed with an error + +## Content + +Tool calls can produce different types of content: + +### Regular Content + +Standard [content blocks](./content) like text, images, or resources: + +```json +{ + "type": "content", + "content": { + "type": "text", + "text": "Analysis complete. Found 3 issues." + } +} +``` + +### Diffs + +File modifications shown as diffs: + +```json +{ + "type": "diff", + "path": "src/config.json", + "oldText": "{\n \"debug\": false\n}", + "newText": "{\n \"debug\": true\n}" +} +``` + + + The file path being modified + + + + The original content (null for new files) + + + + The new content after modification + + +## Following the Agent + +Tool calls can report file locations they're working with, enabling Clients to implement "follow-along" features that track which files the Agent is accessing or modifying in real-time. + +```json +{ + "path": "src/main.py", + "line": 42 +} +``` + + + The file path being accessed or modified + + + + Optional line number within the file + diff --git a/package.json b/package.json index 214d3f15..23c5fc50 100644 --- a/package.json +++ b/package.json @@ -41,7 +41,8 @@ "format:check": "prettier --check . && cargo fmt -- --check", "lint": "cargo clippy", "lint:fix": "cargo clippy --fix", - "check": "npm run lint && npm run format:check && npm run build && npm run test" + "check": "npm run lint && npm run format:check && npm run build && npm run test", + "docs": "cd docs && npx mint dev" }, "devDependencies": { "@types/node": "^24.1.0",