AgentRunKit

A lightweight Swift 6 framework for building LLM-powered agents with type-safe tool calling.

Zero dependencies · Full Sendable · Async/await · Multi-provider · Production-ready

Quick Start

import AgentRunKit

let client = OpenAIClient(
    apiKey: ProcessInfo.processInfo.environment["OPENAI_API_KEY"]!,
    model: "gpt-4o",
    baseURL: OpenAIClient.openAIBaseURL
)

let agent = Agent<EmptyContext>(client: client, tools: [])
let result = try await agent.run(
    userMessage: "What is the capital of France?",
    context: EmptyContext()
)

print(result.content)
print("Tokens: \(result.totalTokenUsage.total)")

Installation

Add to your Package.swift:

dependencies: [
    .package(url: "https://github.com/Tom-Ryder/AgentRunKit.git", from: "1.0.0")
]

.target(name: "YourApp", dependencies: ["AgentRunKit"])

Core Concepts

When to Use What

Interface	Use Case
`Agent`	Tool-calling workflows. Loops until the model calls `finish`.
`Chat`	Multi-turn conversations without agent overhead.
`client.stream()`	Raw streaming with direct control over deltas.
`client.generate()`	Single request/response without streaming.

Note: Agent requires the model to call its finish tool. For simple chat, use Chat to avoid maxIterationsReached errors.

Defining Tools

Tools use strongly-typed parameters with automatic JSON schema generation:

struct WeatherParams: Codable, SchemaProviding, Sendable {
    let city: String
    let units: String?
}

struct WeatherResult: Codable, Sendable {
    let temperature: Double
    let condition: String
}

let weatherTool = Tool<WeatherParams, WeatherResult, EmptyContext>(
    name: "get_weather",
    description: "Get current weather for a city",
    executor: { params, _ in
        WeatherResult(temperature: 22.0, condition: "Sunny")
    }
)

Manual Schema Definition

For more control, implement jsonSchema explicitly:

struct ComplexParams: Codable, SchemaProviding, Sendable {
    let items: [String]

    static var jsonSchema: JSONSchema {
        .object(
            properties: [
                "items": .array(
                    items: .string(description: "Item to process"),
                    description: "List of items"
                )
            ],
            required: ["items"]
        )
    }
}

Tool Context

Inject dependencies (database, user session, etc.) via a custom context:

struct AppContext: ToolContext {
    let database: Database
    let currentUserId: String
}

let userTool = Tool<UserParams, UserResult, AppContext>(
    name: "get_user",
    description: "Fetch user from database",
    executor: { params, context in
        let user = try await context.database.fetchUser(id: params.userId)
        return UserResult(name: user.name, email: user.email)
    }
)

let result = try await agent.run(
    userMessage: "Get user 456",
    context: AppContext(database: db, currentUserId: "user_123")
)

Guides

Agent with Tools

let config = AgentConfiguration(
    maxIterations: 10,
    toolTimeout: .seconds(30),
    systemPrompt: "You are a helpful assistant."
)

let agent = Agent<EmptyContext>(
    client: client,
    tools: [weatherTool, calculatorTool],
    configuration: config
)

let result = try await agent.run(
    userMessage: "What's the weather in Paris?",
    context: EmptyContext()
)

print("Answer: \(result.content)")
print("Iterations: \(result.iterations)")

Conversation History

Each run(), send(), or stream() returns updated history for multi-turn:

let result1 = try await agent.run(
    userMessage: "Remember the number 42.",
    context: EmptyContext()
)

let result2 = try await agent.run(
    userMessage: "What number did I ask you to remember?",
    history: result1.history,
    context: EmptyContext()
)

print(result2.content)  // "42"

With Chat

let chat = Chat<EmptyContext>(client: client)

let (response1, history1) = try await chat.send("My name is Alice.")
let (response2, _) = try await chat.send("What's my name?", history: history1)

print(response2.content)  // "Alice"

Streaming

Agent/Chat streaming with StreamEvent:

for try await event in agent.stream(userMessage: "Write a poem", context: EmptyContext(), tokenBudget: 10000) {
    switch event {
    case .delta(let text):
        print(text, terminator: "")
    case .reasoningDelta(let text):
        print("[Thinking] \(text)", terminator: "")
    case .toolCallStarted(let name, _):
        print("\n[Executing \(name)...]")
    case .toolCallCompleted(_, let name, _):
        print("[Completed \(name)]")
    case .audioData(let data):
        audioPlayer.enqueue(data)  // PCM16 chunk for real-time playback
    case .audioTranscript(let text):
        print(text, terminator: "")
    case .audioFinished(_, _, let data):
        audioPlayer.finalize(data)  // Complete audio buffer
    case .finished(let tokenUsage, _, _, _):
        print("\nTokens: \(tokenUsage.total)")
    }
}

Raw Client Streaming

Use client.stream() for lower-level control:

for try await delta in client.stream(messages: messages, tools: []) {
    switch delta {
    case .content(let text):
        print(text, terminator: "")
    case .reasoning(let text):
        print("[Thinking] \(text)", terminator: "")
    case .toolCallStart(_, _, let name):
        print("\n[Tool: \(name)]")
    case .toolCallDelta(_, _):
        break
    case .audioData(let data):
        audioPlayer.enqueue(data)
    case .audioTranscript(let text):
        print(text, terminator: "")
    case .audioStarted(_, _):
        break
    case .finished(let usage):
        if let usage { print("\nTokens: \(usage.total)") }
    }
}

StreamEvent (Agent/Chat)	StreamDelta (Client)
`.delta(String)`	`.content(String)`
`.reasoningDelta(String)`	`.reasoning(String)`
`.toolCallStarted(name:id:)`	`.toolCallStart(index:id:name:)`
`.toolCallCompleted(id:name:result:)`	`.toolCallDelta(index:arguments:)`
`.audioData(Data)`	`.audioData(Data)`
`.audioTranscript(String)`	`.audioTranscript(String)`
`.audioFinished(id:expiresAt:data:)`	`.audioStarted(id:expiresAt:)`
`.finished(tokenUsage:content:reason:history:)`	`.finished(usage:)`

Reasoning Models

For models with extended thinking:

let client = OpenAIClient(
    apiKey: apiKey,
    model: "deepseek/deepseek-r1",
    baseURL: OpenAIClient.openRouterBaseURL,
    reasoningConfig: .high  // .xhigh, .high, .medium, .low, .minimal, .none
)

Access reasoning content:

let response = try await client.generate(messages: messages, tools: [])

if let reasoning = response.reasoning {
    print("Thinking: \(reasoning.content)")
}
print("Answer: \(response.content)")

Fine-Grained Control

let client = OpenAIClient(
    apiKey: apiKey,
    model: "your-model",
    baseURL: OpenAIClient.openRouterBaseURL,
    reasoningConfig: ReasoningConfig(effort: .high, maxTokens: 16000, exclude: false)
)

Interleaved Thinking

Models like Claude and DeepSeek return opaque reasoning_details blocks alongside their responses. These must be echoed back verbatim on subsequent requests to maintain thinking continuity across tool-calling turns.

AgentRunKit handles this automatically — reasoning_details are extracted from each response, stored on AssistantMessage, and included in the next request. No configuration needed.

// reasoning_details are preserved across tool-calling turns automatically
let result = try await agent.run(
    userMessage: "Analyze this data and search for related papers",
    context: EmptyContext()
)

// Access reasoning details if needed
for message in result.history {
    if case .assistant(let msg) = message, let details = msg.reasoningDetails {
        print("Reasoning blocks: \(details.count)")
    }
}

Keys inside reasoning_details are preserved verbatim (not mangled by camelCase conversion), ensuring the opaque contract with the provider is maintained.

Multimodal Input

Images, audio, video, and PDFs:

// Image from URL
let message = ChatMessage.user(
    text: "Describe this image",
    imageURL: "https://example.com/image.jpg"
)

// Image from data
let message = ChatMessage.user(
    text: "What's in this photo?",
    imageData: imageData,
    mimeType: "image/jpeg"
)

// Audio (speech to text)
let message = ChatMessage.user(
    text: "Transcribe this:",
    audioData: audioData,
    format: .wav
)

// PDF document
let message = ChatMessage.user([
    .text("Summarize:"),
    .pdf(data: pdfData)
])

Direct Transcription

let transcript = try await client.transcribe(
    audio: audioData,
    format: .wav,
    model: "whisper-1"
)

Audio Output

Stream audio responses from models that support modalities: ["text", "audio"] (e.g., gpt-4o-audio-preview). Enable audio output via RequestContext.extraFields:

let requestContext = RequestContext(extraFields: [
    "modalities": .array([.string("text"), .string("audio")]),
    "audio": .object([
        "voice": .string("alloy"),
        "format": .string("pcm16"),
    ]),
])

for try await event in chat.stream("Tell me a story", context: EmptyContext(), requestContext: requestContext) {
    switch event {
    case .audioData(let chunk):
        audioPlayer.enqueue(chunk)       // PCM16 24kHz mono, stream in real time
    case .audioTranscript(let text):
        print(text, terminator: "")      // Partial transcript of the spoken audio
    case .audioFinished(let id, let expiresAt, let data):
        save(data, id: id)               // Complete accumulated audio buffer
    case .delta(let text):
        print(text, terminator: "")      // Text content (if any)
    case .finished(_, _, _, _):
        break
    default:
        break
    }
}

Audio events flow alongside text and tool call events. The streaming pipeline accumulates audio chunks internally and emits .audioFinished with the complete buffer after the stream ends. When the model returns audio without text content, the audio transcript is automatically used as the assistant's content in conversation history.

Structured Output

Request JSON schema-constrained responses:

struct WeatherReport: Codable, SchemaProviding, Sendable {
    let temperature: Int
    let conditions: String
}

// With client
let response = try await client.generate(
    messages: [.user("Weather in Paris?")],
    tools: [],
    responseFormat: .jsonSchema(WeatherReport.self)
)
let report = try JSONDecoder().decode(WeatherReport.self, from: Data(response.content.utf8))

// With Chat (automatic decoding)
let chat = Chat<EmptyContext>(client: client)
let report: WeatherReport = try await chat.send("Weather in Paris?", returning: WeatherReport.self)

Sub-Agents

Agents can spawn child agents as tools, with automatic depth limiting and token budget enforcement:

struct ResearchParams: Codable, SchemaProviding, Sendable {
    let query: String
    static var jsonSchema: JSONSchema {
        .object(properties: ["query": .string()], required: ["query"])
    }
}

let researchAgent = Agent<SubAgentContext<AppContext>>(
    client: client,
    tools: [webSearchTool, summarizeTool]
)

let researchTool = try SubAgentTool<ResearchParams, AppContext>(
    name: "research",
    description: "Research a topic using web search",
    agent: researchAgent,
    tokenBudget: 5000,
    messageBuilder: { $0.query }
)

let orchestrator = Agent<SubAgentContext<AppContext>>(
    client: client,
    tools: [researchTool, writeTool]
)

let ctx = SubAgentContext(inner: AppContext(), maxDepth: 3)
let result = try await orchestrator.run(userMessage: "Write a report on Swift concurrency", context: ctx)

SubAgentContext wraps your existing context with depth tracking. Each sub-agent call increments depth automatically — if currentDepth reaches maxDepth, the call throws AgentError.maxDepthExceeded. Token budgets are enforced per sub-agent run, preventing any single child from consuming unbounded tokens.

Factory Function

For better type inference at call sites, use the free function:

let tool: any AnyTool<SubAgentContext<AppContext>> = try subAgentTool(
    name: "research",
    description: "Research a topic",
    agent: researchAgent,
    messageBuilder: { (params: ResearchParams) in params.query }
)

Error Handling

do {
    let result = try await agent.run(userMessage: "...", context: EmptyContext())
} catch let error as AgentError {
    switch error {
    case .maxIterationsReached(let count):
        print("Didn't finish in \(count) iterations")
    case .toolTimeout(let tool):
        print("Tool '\(tool)' timed out")
    case .toolNotFound(let name):
        print("Unknown tool: \(name)")
    case .toolExecutionFailed(let tool, let message):
        print("Tool '\(tool)' failed: \(message)")
    case .maxDepthExceeded(let depth):
        print("Sub-agent nesting too deep at level \(depth)")
    case .tokenBudgetExceeded(let budget, let used):
        print("Token budget \(budget) exceeded (used \(used))")
    case .llmError(let transport):
        switch transport {
        case .rateLimited(let retryAfter):
            print("Rate limited. Retry: \(retryAfter?.description ?? "unknown")")
        case .httpError(let status, let body):
            print("HTTP \(status): \(body)")
        default:
            print("Transport: \(transport)")
        }
    default:
        print("Error: \(error)")
    }
}

Tool errors are automatically fed back to the LLM for recovery via AgentError.feedbackMessage.

Configuration

Agent Configuration

let config = AgentConfiguration(
    maxIterations: 10,          // Max tool-calling rounds
    maxMessages: 50,            // Context truncation limit
    toolTimeout: .seconds(30),  // Per-tool timeout
    systemPrompt: "You are a helpful assistant."
)

Retry Policy

let client = OpenAIClient(
    apiKey: apiKey,
    model: "gpt-4o",
    baseURL: OpenAIClient.openAIBaseURL,
    retryPolicy: RetryPolicy(
        maxAttempts: 5,
        baseDelay: .seconds(2),
        maxDelay: .seconds(60),
        streamStallTimeout: .seconds(30)  // Detect silently dropped SSE connections
    )
)

Per-Request Customization

RequestContext injects arbitrary fields into the HTTP request body and provides access to response headers. Pass it to any Agent, Chat, or client method:

let requestContext = RequestContext(
    extraFields: [
        "web_search_options": .object(["search_context_size": .string("high")]),
        "provider": .object(["order": .array([.string("cerebras")])]),
    ],
    onResponse: { response in
        print(response.value(forHTTPHeaderField: "X-Request-Id") ?? "")
    }
)

// Agent
let result = try await agent.run(
    userMessage: "Search the web for latest news",
    context: myContext,
    requestContext: requestContext
)

// Streaming
for try await event in agent.stream(
    userMessage: "Summarize recent events",
    context: myContext,
    requestContext: requestContext
) { ... }

// Chat
let (response, history) = try await chat.send("Hello", requestContext: requestContext)

LLM Providers

Works with any OpenAI-compatible API:

Provider	Base URL
OpenAI	`OpenAIClient.openAIBaseURL`
OpenRouter	`OpenAIClient.openRouterBaseURL`
Groq	`OpenAIClient.groqBaseURL`
Together	`OpenAIClient.togetherBaseURL`
Ollama	`OpenAIClient.ollamaBaseURL`

// OpenRouter
let client = OpenAIClient(
    apiKey: ProcessInfo.processInfo.environment["OPENROUTER_API_KEY"]!,
    model: "anthropic/claude-sonnet-4",
    baseURL: OpenAIClient.openRouterBaseURL
)

// Local Ollama
let client = OpenAIClient(
    apiKey: "ollama",
    model: "llama3.2",
    baseURL: OpenAIClient.ollamaBaseURL
)

Proxy Mode

For backends that handle auth and model selection server-side:

let client = OpenAIClient.proxy(
    baseURL: URL(string: "https://api.myapp.com/v1/ai")!,
    additionalHeaders: { ["Authorization": "Bearer \(userToken)"] }
)

The header closure is evaluated per-request, enabling rotating tokens or dynamic auth.

Useful for iOS apps where:

Backend manages LLM API keys (security)
Backend selects models (A/B testing, upgrades without app updates)
Backend injects context or tracks usage

The proxy() factory omits Authorization: Bearer and model from requests.

API Reference

Core Types

Type	Description
`Agent<C>`	Main agent loop coordinator
`AgentConfiguration`	Agent behavior settings
`AgentResult`	Final result with content and token usage
`Chat<C>`	Lightweight multi-turn chat interface
`StreamEvent`	Streaming event types

Tool Types

Type	Description
`Tool<P, O, C>`	Type-safe tool definition
`AnyTool`	Type-erased tool protocol
`ToolContext`	Protocol for dependency injection
`EmptyContext`	Null context for stateless tools
`ToolResult`	Tool execution result
`SubAgentTool<P, C>`	Tool that delegates to a child agent
`SubAgentContext<C>`	Context wrapper with depth tracking

Schema Types

Type	Description
`JSONSchema`	JSON Schema representation
`SchemaProviding`	Protocol for automatic schema generation
`SchemaDecoder`	Automatic schema inference from Decodable

LLM Types

Type	Description
`LLMClient`	Protocol for LLM implementations
`OpenAIClient`	OpenAI-compatible client
`ResponseFormat`	Structured output configuration
`RetryPolicy`	Exponential backoff settings
`ReasoningConfig`	Reasoning effort for thinking models
`RequestContext`	Per-request extra fields and callbacks
`JSONValue`	Type-safe JSON value enum

Message Types

Type	Description
`ChatMessage`	Conversation message enum
`AssistantMessage`	LLM response with tool calls and reasoning
`TokenUsage`	Token accounting (input, output, reasoning, total)
`ContentPart`	Multimodal content element
`ReasoningContent`	Reasoning/thinking content

Error Types

Type	Description
`AgentError`	Typed agent framework errors
`TransportError`	HTTP and network errors

Custom LLM Client

Implement LLMClient for non-OpenAI-compatible providers:

public protocol LLMClient: Sendable {
    func generate(
        messages: [ChatMessage],
        tools: [ToolDefinition],
        responseFormat: ResponseFormat?
    ) async throws -> AssistantMessage

    func stream(
        messages: [ChatMessage],
        tools: [ToolDefinition]
    ) -> AsyncThrowingStream<StreamDelta, Error>
}

Requirements

Platform	Version
iOS	18.0+
macOS	15.0+
Swift	6.0+
Xcode	16+

License

MIT License. See LICENSE for details.

Name		Name	Last commit message	Last commit date
Latest commit History 32 Commits
Sources/AgentRunKit		Sources/AgentRunKit
Tests/AgentRunKitTests		Tests/AgentRunKitTests
assets		assets
.gitignore		.gitignore
.swiftformat		.swiftformat
.swiftlint.yml		.swiftlint.yml
LICENSE		LICENSE
Makefile		Makefile
Package.swift		Package.swift
README.md		README.md

Provide feedback

Saved searches

Use saved searches to filter your results more quickly

Repository files navigation

AgentRunKit

Table of Contents

Quick Start

Installation

Core Concepts

When to Use What

Defining Tools

Tool Context

Guides

Agent with Tools

Conversation History

Streaming

Reasoning Models

Multimodal Input

Audio Output

Structured Output

Sub-Agents

Error Handling

Configuration

Agent Configuration

Retry Policy

Per-Request Customization

LLM Providers

Proxy Mode

API Reference

Requirements

License

About

Uh oh!

Releases

Packages

Contributors 2

Uh oh!

Languages

License

Tom-Ryder/AgentRunKit

Folders and files

Latest commit

History

Repository files navigation

AgentRunKit

Table of Contents

Quick Start

Installation

Core Concepts

When to Use What

Defining Tools

Tool Context

Guides

Agent with Tools

Conversation History

Streaming

Reasoning Models

Multimodal Input

Audio Output

Structured Output

Sub-Agents

Error Handling

Configuration

Agent Configuration

Retry Policy

Per-Request Customization

LLM Providers

Proxy Mode

API Reference

Requirements

License

About

Topics

Resources

License

Uh oh!

Stars

Watchers

Forks

Releases

Packages 0

Contributors 2

Uh oh!

Languages

Packages