Skip to content
22 changes: 22 additions & 0 deletions extensions/cli/src/stream/streamChatResponse.helpers.ts
Original file line number Diff line number Diff line change
Expand Up @@ -377,6 +377,28 @@ export function recordStreamTelemetry(options: {
});
} catch {}

// Report prompt cache metrics to PostHog
if (fullUsage?.prompt_tokens_details) {
const cacheReadTokens =
fullUsage.prompt_tokens_details.cache_read_tokens ?? 0;
const cacheWriteTokens =
fullUsage.prompt_tokens_details.cache_write_tokens ?? 0;
const totalPromptTokens = fullUsage.prompt_tokens ?? 0;
const cacheHitRate =
totalPromptTokens > 0 ? cacheReadTokens / totalPromptTokens : 0;

try {
void posthogService.capture("prompt_cache_metrics", {
model: model.model,
cache_read_tokens: cacheReadTokens,
cache_write_tokens: cacheWriteTokens,
total_prompt_tokens: totalPromptTokens,
cache_hit_rate: cacheHitRate,
tool_count: tools?.length ?? 0,
});
} catch {}
}

return cost;
}

Expand Down
140 changes: 140 additions & 0 deletions packages/openai-adapters/src/apis/AnthropicCachingStrategies.test.ts
Original file line number Diff line number Diff line change
Expand Up @@ -210,6 +210,146 @@ describe("AnthropicCachingStrategies", () => {
expect(result.messages).toEqual(body.messages);
});

it("should add cache_control to last two user messages for turn-level caching", () => {
const body: MessageCreateParams = {
system: [{ type: "text", text: "system message" }],
tools: [makeTool("tool1")],
messages: [
{
role: "user",
content: [{ type: "text", text: "first user message" }],
},
{
role: "assistant",
content: [{ type: "text", text: "assistant reply" }],
},
{
role: "user",
content: [{ type: "text", text: "second user message" }],
},
{
role: "assistant",
content: [{ type: "text", text: "another assistant reply" }],
},
{
role: "user",
content: [{ type: "text", text: "third user message" }],
},
],
...body_params,
};

const result = CACHING_STRATEGIES.systemAndTools(body);

// System and tools should still be cached
expect(result.system).toEqual([
{
type: "text",
text: "system message",
cache_control: { type: "ephemeral" },
},
]);
expect(result.tools).toEqual([
{
name: "tool1",
input_schema: { type: "object" },
cache_control: { type: "ephemeral" },
},
]);

// Last two user messages should have cache_control
// First user message (index 0) should NOT have cache_control
expect(result.messages[0]).toEqual({
role: "user",
content: [{ type: "text", text: "first user message" }],
});

// Second user message (index 2) should have cache_control (second-to-last user msg)
expect(result.messages[2]).toEqual({
role: "user",
content: [
{
type: "text",
text: "second user message",
cache_control: { type: "ephemeral" },
},
],
});

// Third user message (index 4) should have cache_control (last user msg)
expect(result.messages[4]).toEqual({
role: "user",
content: [
{
type: "text",
text: "third user message",
cache_control: { type: "ephemeral" },
},
],
});
});

it("should not add cache_control to string content user messages", () => {
const body: MessageCreateParams = {
system: [{ type: "text", text: "system message" }],
messages: [
{ role: "user", content: "string content message" },
{
role: "assistant",
content: [{ type: "text", text: "reply" }],
},
{
role: "user",
content: [{ type: "text", text: "array content message" }],
},
],
...body_params,
};

const result = CACHING_STRATEGIES.systemAndTools(body);

// String content user message is skipped by addCacheControlToLastTwoUserMessages
expect(result.messages[0]).toEqual({
role: "user",
content: "string content message",
});

// Array content user message gets cache_control
expect(result.messages[2]).toEqual({
role: "user",
content: [
{
type: "text",
text: "array content message",
cache_control: { type: "ephemeral" },
},
],
});
});

it("should not mutate the original input body", () => {
const body: MessageCreateParams = {
system: [{ type: "text", text: "system message" }],
tools: [makeTool("tool1")],
messages: [
{
role: "user",
content: [{ type: "text", text: "user message" }],
},
],
...body_params,
};

// Deep-check original content block before strategy
const originalContentBlock = (body.messages[0].content as any[])[0];
expect(originalContentBlock.cache_control).toBeUndefined();

CACHING_STRATEGIES.systemAndTools(body);

// Original content block should still NOT have cache_control
expect(originalContentBlock.cache_control).toBeUndefined();
});

it("should add only 4 cache controls in total to both system and tools", () => {
const body: MessageCreateParams = {
system: [
Expand Down
13 changes: 13 additions & 0 deletions packages/openai-adapters/src/apis/AnthropicCachingStrategies.ts
Original file line number Diff line number Diff line change
@@ -1,5 +1,7 @@
import { MessageCreateParams } from "@anthropic-ai/sdk/resources";

import { addCacheControlToLastTwoUserMessages } from "./AnthropicUtils.js";

const MAX_CACHING_MESSAGES = 4;

// Caching strategy type - transforms a clean Anthropic body by adding cache_control
Expand Down Expand Up @@ -67,6 +69,17 @@ const systemAndToolsStrategy: CachingStrategy = (body) => {
});
}

// Cache last two user messages for turn-level caching.
// Uses the remaining 2 breakpoints (system=1, last tool=1, 2 user messages=2, total=4).
// Clone messages to avoid mutating the original input.
result.messages = result.messages.map((msg) => ({
...msg,
content: Array.isArray(msg.content)
? msg.content.map((block) => ({ ...block }))
: msg.content,
}));
addCacheControlToLastTwoUserMessages(result.messages);

return result;
};

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -55,6 +55,7 @@ describe("Anthropic Adapter Tests", () => {
role: "user",
content: [
{
cache_control: { type: "ephemeral" },
type: "text",
text: "hello",
},
Expand Down Expand Up @@ -119,6 +120,7 @@ describe("Anthropic Adapter Tests", () => {
role: "user",
content: [
{
cache_control: { type: "ephemeral" },
type: "text",
text: "hello",
},
Expand Down Expand Up @@ -184,6 +186,7 @@ describe("Anthropic Adapter Tests", () => {
role: "user",
content: [
{
cache_control: { type: "ephemeral" },
type: "text",
text: "hello",
},
Expand Down
Loading
Loading