diff --git a/src/common/utils/tools/tools.ts b/src/common/utils/tools/tools.ts index d78fff8269..0ac75fb4e6 100644 --- a/src/common/utils/tools/tools.ts +++ b/src/common/utils/tools/tools.ts @@ -42,6 +42,8 @@ import type { WorkspaceChatMessage } from "@/common/orpc/types"; import type { FileState } from "@/node/services/agentSession"; import type { AgentDefinitionDescriptor } from "@/common/types/agentDefinition"; import type { AgentSkillDescriptor } from "@/common/types/agentSkill"; +import type { StreamEditTracker } from "@/node/services/streamGuardrails/StreamEditTracker"; +import type { StreamVerificationTracker } from "@/node/services/streamGuardrails/StreamVerificationTracker"; /** * Configuration for tools that need runtime context @@ -80,6 +82,10 @@ export interface ToolConfiguration { taskService?: TaskService; /** Enable agent_report tool (only valid for child task workspaces) */ enableAgentReport?: boolean; + /** Per-stream edit tracker for doom-loop detection (not set for IPC tool calls) */ + editTracker?: StreamEditTracker; + /** Per-stream verification tracker for completion guard (not set for IPC tool calls) */ + verificationTracker?: StreamVerificationTracker; /** Experiments inherited from parent (for subagent spawning) */ experiments?: { programmaticToolCalling?: boolean; diff --git a/src/node/services/aiService.ts b/src/node/services/aiService.ts index 8ba84526ab..b57e20a66e 100644 --- a/src/node/services/aiService.ts +++ b/src/node/services/aiService.ts @@ -68,6 +68,8 @@ import { } from "./streamSimulation"; import { applyToolPolicyAndExperiments, captureMcpToolTelemetry } from "./toolAssembly"; import { getErrorMessage } from "@/common/utils/errors"; +import { StreamEditTracker } from "./streamGuardrails/StreamEditTracker"; +import { StreamVerificationTracker } from "./streamGuardrails/StreamVerificationTracker"; // --------------------------------------------------------------------------- // streamMessage options @@ -699,6 +701,10 @@ export class AIService extends EventEmitter { } } + // Guardrail trackers are per-stream and only enabled for AI tool execution. + const editTracker = new StreamEditTracker(); + const verificationTracker = new StreamVerificationTracker(); + // Get model-specific tools with workspace path (correct for local or remote) const allTools = await getToolsForModel( modelString, @@ -734,6 +740,9 @@ export class AIService extends EventEmitter { workspaceId, // Only child workspaces (tasks) can report to a parent. enableAgentReport: Boolean(metadata.parentWorkspaceId), + // Per-stream deterministic guardrails for completion + doom-loop detection. + editTracker, + verificationTracker, // External edit detection callback recordFileState, taskService: this.taskService, diff --git a/src/node/services/streamGuardrails/StreamEditTracker.test.ts b/src/node/services/streamGuardrails/StreamEditTracker.test.ts new file mode 100644 index 0000000000..7c0f8297a8 --- /dev/null +++ b/src/node/services/streamGuardrails/StreamEditTracker.test.ts @@ -0,0 +1,64 @@ +import { describe, expect, test } from "bun:test"; + +import { DOOM_LOOP_EDIT_THRESHOLD, StreamEditTracker } from "./StreamEditTracker"; + +describe("StreamEditTracker", () => { + test("recordEdit increments edit count for the same file", () => { + const tracker = new StreamEditTracker(); + + expect(tracker.recordEdit("/tmp/file.ts")).toBe(1); + expect(tracker.recordEdit("/tmp/file.ts")).toBe(2); + expect(tracker.recordEdit("/tmp/file.ts")).toBe(3); + }); + + test("hasAnyEdits is false before edits and true after first edit", () => { + const tracker = new StreamEditTracker(); + + expect(tracker.hasAnyEdits()).toBe(false); + tracker.recordEdit("/tmp/file.ts"); + expect(tracker.hasAnyEdits()).toBe(true); + }); + + test("shouldNudge is false below threshold and true at threshold", () => { + const tracker = new StreamEditTracker(); + const filePath = "/tmp/file.ts"; + + for (let i = 0; i < DOOM_LOOP_EDIT_THRESHOLD - 1; i += 1) { + tracker.recordEdit(filePath); + } + + expect(tracker.shouldNudge(filePath, DOOM_LOOP_EDIT_THRESHOLD)).toBe(false); + + tracker.recordEdit(filePath); + expect(tracker.shouldNudge(filePath, DOOM_LOOP_EDIT_THRESHOLD)).toBe(true); + }); + + test("shouldNudge is once per file after markNudged", () => { + const tracker = new StreamEditTracker(); + const filePath = "/tmp/file.ts"; + + for (let i = 0; i < DOOM_LOOP_EDIT_THRESHOLD; i += 1) { + tracker.recordEdit(filePath); + } + + expect(tracker.shouldNudge(filePath, DOOM_LOOP_EDIT_THRESHOLD)).toBe(true); + + tracker.markNudged(filePath); + expect(tracker.shouldNudge(filePath, DOOM_LOOP_EDIT_THRESHOLD)).toBe(false); + + tracker.recordEdit(filePath); + expect(tracker.shouldNudge(filePath, DOOM_LOOP_EDIT_THRESHOLD)).toBe(false); + }); + + test("tracks edit counts independently per file", () => { + const tracker = new StreamEditTracker(); + + for (let i = 0; i < DOOM_LOOP_EDIT_THRESHOLD; i += 1) { + tracker.recordEdit("/tmp/a.ts"); + } + tracker.recordEdit("/tmp/b.ts"); + + expect(tracker.shouldNudge("/tmp/a.ts", DOOM_LOOP_EDIT_THRESHOLD)).toBe(true); + expect(tracker.shouldNudge("/tmp/b.ts", DOOM_LOOP_EDIT_THRESHOLD)).toBe(false); + }); +}); diff --git a/src/node/services/streamGuardrails/StreamEditTracker.ts b/src/node/services/streamGuardrails/StreamEditTracker.ts new file mode 100644 index 0000000000..d6a6acdd03 --- /dev/null +++ b/src/node/services/streamGuardrails/StreamEditTracker.ts @@ -0,0 +1,45 @@ +import assert from "@/common/utils/assert"; + +export const DOOM_LOOP_EDIT_THRESHOLD = 7; + +/** + * Tracks file edit frequency for a single stream to detect potential doom loops. + */ +export class StreamEditTracker { + private readonly editCountsByFile = new Map(); + private readonly nudgedFiles = new Set(); + + recordEdit(filePath: string): number { + assert( + typeof filePath === "string" && filePath.length > 0, + "filePath must be a non-empty string" + ); + + const nextCount = (this.editCountsByFile.get(filePath) ?? 0) + 1; + this.editCountsByFile.set(filePath, nextCount); + return nextCount; + } + + hasAnyEdits(): boolean { + return this.editCountsByFile.size > 0; + } + + shouldNudge(filePath: string, threshold: number): boolean { + assert( + typeof filePath === "string" && filePath.length > 0, + "filePath must be a non-empty string" + ); + assert(Number.isFinite(threshold) && threshold > 0, "threshold must be a positive number"); + + const editCount = this.editCountsByFile.get(filePath) ?? 0; + return editCount >= threshold && !this.nudgedFiles.has(filePath); + } + + markNudged(filePath: string): void { + assert( + typeof filePath === "string" && filePath.length > 0, + "filePath must be a non-empty string" + ); + this.nudgedFiles.add(filePath); + } +} diff --git a/src/node/services/streamGuardrails/StreamVerificationTracker.test.ts b/src/node/services/streamGuardrails/StreamVerificationTracker.test.ts new file mode 100644 index 0000000000..1a4cb315ed --- /dev/null +++ b/src/node/services/streamGuardrails/StreamVerificationTracker.test.ts @@ -0,0 +1,59 @@ +import { describe, expect, test } from "bun:test"; + +import { StreamVerificationTracker } from "./StreamVerificationTracker"; + +describe("StreamVerificationTracker", () => { + test("hasValidationAttempt is false initially and true after markValidationAttempt", () => { + const tracker = new StreamVerificationTracker(); + + expect(tracker.hasValidationAttempt()).toBe(false); + + tracker.markValidationAttempt(); + expect(tracker.hasValidationAttempt()).toBe(true); + }); + + test("nudge lifecycle for completion guard", () => { + const tracker = new StreamVerificationTracker(); + + expect(tracker.hasBeenNudged()).toBe(false); + expect(tracker.shouldNudgeBeforeAllowingReport(false)).toBe(false); + expect(tracker.shouldNudgeBeforeAllowingReport(true)).toBe(true); + + tracker.markNudged(); + expect(tracker.hasBeenNudged()).toBe(true); + expect(tracker.shouldNudgeBeforeAllowingReport(true)).toBe(false); + + tracker.markValidationAttempt(); + expect(tracker.hasValidationAttempt()).toBe(true); + expect(tracker.shouldNudgeBeforeAllowingReport(true)).toBe(false); + }); + + test("resetValidation clears validation state so pre-edit validation doesn't count", () => { + const tracker = new StreamVerificationTracker(); + + // Validate first + tracker.markValidationAttempt(); + expect(tracker.hasValidationAttempt()).toBe(true); + + // Then an edit happens — validation should be reset + tracker.resetValidation(); + expect(tracker.hasValidationAttempt()).toBe(false); + + // Guard should now nudge because validation was pre-edit + expect(tracker.shouldNudgeBeforeAllowingReport(true)).toBe(true); + }); + + test("post-edit validation still counts after reset", () => { + const tracker = new StreamVerificationTracker(); + + // Validate, then edit resets it + tracker.markValidationAttempt(); + tracker.resetValidation(); + expect(tracker.hasValidationAttempt()).toBe(false); + + // Validate again (post-edit) — should count + tracker.markValidationAttempt(); + expect(tracker.hasValidationAttempt()).toBe(true); + expect(tracker.shouldNudgeBeforeAllowingReport(true)).toBe(false); + }); +}); diff --git a/src/node/services/streamGuardrails/StreamVerificationTracker.ts b/src/node/services/streamGuardrails/StreamVerificationTracker.ts new file mode 100644 index 0000000000..937d4fe5b2 --- /dev/null +++ b/src/node/services/streamGuardrails/StreamVerificationTracker.ts @@ -0,0 +1,36 @@ +/** + * Tracks whether a stream attempted validation commands before completion. + * + * Validation state is reset whenever a file edit is recorded, so only + * post-edit validation counts towards the pre-completion guard. + */ +export class StreamVerificationTracker { + private validationAttempted = false; + private nudgedBeforeReport = false; + + markValidationAttempt(): void { + this.validationAttempted = true; + } + + /** Reset validation state — called when new edits are recorded so that + * pre-edit validation doesn't satisfy the post-edit verification guard. */ + resetValidation(): void { + this.validationAttempted = false; + } + + hasValidationAttempt(): boolean { + return this.validationAttempted; + } + + hasBeenNudged(): boolean { + return this.nudgedBeforeReport; + } + + markNudged(): void { + this.nudgedBeforeReport = true; + } + + shouldNudgeBeforeAllowingReport(hasEdits: boolean): boolean { + return hasEdits && !this.validationAttempted && !this.nudgedBeforeReport; + } +} diff --git a/src/node/services/tools/agent_report.test.ts b/src/node/services/tools/agent_report.test.ts index 7b4d98388b..1c09ae5274 100644 --- a/src/node/services/tools/agent_report.test.ts +++ b/src/node/services/tools/agent_report.test.ts @@ -4,20 +4,25 @@ import type { ToolExecutionOptions } from "ai"; import { createAgentReportTool } from "./agent_report"; import { TestTempDir, createTestToolConfig } from "./testHelpers"; import type { TaskService } from "@/node/services/taskService"; +import { StreamEditTracker } from "@/node/services/streamGuardrails/StreamEditTracker"; +import { StreamVerificationTracker } from "@/node/services/streamGuardrails/StreamVerificationTracker"; const mockToolCallOptions: ToolExecutionOptions = { toolCallId: "test-call-id", messages: [], }; +function createTaskService(hasActiveDescendants: boolean): TaskService { + return { + hasActiveDescendantAgentTasksForWorkspace: mock(() => hasActiveDescendants), + } as unknown as TaskService; +} describe("agent_report tool", () => { it("throws when the task has active descendants", async () => { using tempDir = new TestTempDir("test-agent-report-tool"); const baseConfig = createTestToolConfig(tempDir.path, { workspaceId: "task-workspace" }); - const taskService = { - hasActiveDescendantAgentTasksForWorkspace: mock(() => true), - } as unknown as TaskService; + const taskService = createTaskService(true); const tool = createAgentReportTool({ ...baseConfig, taskService }); @@ -40,9 +45,7 @@ describe("agent_report tool", () => { using tempDir = new TestTempDir("test-agent-report-tool-ok"); const baseConfig = createTestToolConfig(tempDir.path, { workspaceId: "task-workspace" }); - const taskService = { - hasActiveDescendantAgentTasksForWorkspace: mock(() => false), - } as unknown as TaskService; + const taskService = createTaskService(false); const tool = createAgentReportTool({ ...baseConfig, taskService }); @@ -55,4 +58,123 @@ describe("agent_report tool", () => { message: "Report submitted successfully.", }); }); + + it("allows report when trackers are present but no edits occurred", async () => { + using tempDir = new TestTempDir("test-agent-report-no-edits"); + const baseConfig = createTestToolConfig(tempDir.path, { workspaceId: "task-workspace" }); + + const editTracker = new StreamEditTracker(); + const verificationTracker = new StreamVerificationTracker(); + const tool = createAgentReportTool({ + ...baseConfig, + taskService: createTaskService(false), + editTracker, + verificationTracker, + }); + + const result = (await Promise.resolve( + tool.execute!({ reportMarkdown: "done", title: "t" }, mockToolCallOptions) + )) as unknown; + + expect(result).toEqual({ + success: true, + message: "Report submitted successfully.", + }); + }); + + it("rejects first report when edits occurred but no validation was attempted", async () => { + using tempDir = new TestTempDir("test-agent-report-missing-validation"); + const baseConfig = createTestToolConfig(tempDir.path, { workspaceId: "task-workspace" }); + + const editTracker = new StreamEditTracker(); + editTracker.recordEdit("/tmp/file.ts"); + const verificationTracker = new StreamVerificationTracker(); + + const tool = createAgentReportTool({ + ...baseConfig, + taskService: createTaskService(false), + editTracker, + verificationTracker, + }); + + let caught: unknown = null; + try { + await Promise.resolve( + tool.execute!({ reportMarkdown: "done", title: "t" }, mockToolCallOptions) + ); + } catch (error: unknown) { + caught = error; + } + + expect(caught).toBeInstanceOf(Error); + if (caught instanceof Error) { + expect(caught.message).toMatch(/no validation commands detected/i); + } + expect(verificationTracker.hasBeenNudged()).toBe(true); + }); + + it("allows a second report without validation as an explicit escape hatch", async () => { + using tempDir = new TestTempDir("test-agent-report-escape-hatch"); + const baseConfig = createTestToolConfig(tempDir.path, { workspaceId: "task-workspace" }); + + const editTracker = new StreamEditTracker(); + editTracker.recordEdit("/tmp/file.ts"); + const verificationTracker = new StreamVerificationTracker(); + + const tool = createAgentReportTool({ + ...baseConfig, + taskService: createTaskService(false), + editTracker, + verificationTracker, + }); + + let firstError: unknown = null; + try { + await Promise.resolve( + tool.execute!({ reportMarkdown: "done", title: "t" }, mockToolCallOptions) + ); + } catch (error: unknown) { + firstError = error; + } + + expect(firstError).toBeInstanceOf(Error); + if (firstError instanceof Error) { + expect(firstError.message).toMatch(/no validation commands detected/i); + } + + const secondResult: unknown = await Promise.resolve( + tool.execute!({ reportMarkdown: "done", title: "t" }, mockToolCallOptions) + ); + + expect(secondResult).toEqual({ + success: true, + message: "Report submitted successfully.", + }); + }); + + it("allows report after validation attempt", async () => { + using tempDir = new TestTempDir("test-agent-report-validated"); + const baseConfig = createTestToolConfig(tempDir.path, { workspaceId: "task-workspace" }); + + const editTracker = new StreamEditTracker(); + editTracker.recordEdit("/tmp/file.ts"); + const verificationTracker = new StreamVerificationTracker(); + verificationTracker.markValidationAttempt(); + + const tool = createAgentReportTool({ + ...baseConfig, + taskService: createTaskService(false), + editTracker, + verificationTracker, + }); + + const result: unknown = await Promise.resolve( + tool.execute!({ reportMarkdown: "done", title: "t" }, mockToolCallOptions) + ); + + expect(result).toEqual({ + success: true, + message: "Report submitted successfully.", + }); + }); }); diff --git a/src/node/services/tools/agent_report.ts b/src/node/services/tools/agent_report.ts index 99f949a1cb..e762c55a32 100644 --- a/src/node/services/tools/agent_report.ts +++ b/src/node/services/tools/agent_report.ts @@ -20,6 +20,20 @@ export const createAgentReportTool: ToolFactory = (config: ToolConfiguration) => ); } + // Guard: if edits were made but no validation was attempted, nudge the agent to verify. + const hasEdits = config.editTracker?.hasAnyEdits() ?? false; + const hasValidated = config.verificationTracker?.hasValidationAttempt() ?? false; + if (hasEdits && !hasValidated) { + if (!config.verificationTracker?.hasBeenNudged()) { + config.verificationTracker?.markNudged(); + throw new Error( + "agent_report rejected: no validation commands detected after file edits. " + + "Run the most relevant check (tests, typecheck, lint) and then call agent_report again. " + + "If validation is not applicable, call agent_report again to confirm." + ); + } + } + // Intentionally no side-effects. The backend orchestrator consumes the tool-call args // via persisted history/partial state once the tool call completes successfully. // The stream continues after this so the SDK can record usage, while StreamManager diff --git a/src/node/services/tools/bash.test.ts b/src/node/services/tools/bash.test.ts index bf1d13f0d6..df5d24a1a4 100644 --- a/src/node/services/tools/bash.test.ts +++ b/src/node/services/tools/bash.test.ts @@ -1,6 +1,6 @@ import { describe, it, expect } from "bun:test"; import { LocalRuntime } from "@/node/runtime/LocalRuntime"; -import { createBashTool } from "./bash"; +import { createBashTool, looksLikeValidationCommand } from "./bash"; import type { BashOutputEvent } from "@/common/types/stream"; import type { BashToolArgs, BashToolResult } from "@/common/types/tools"; import { BASH_MAX_TOTAL_BYTES } from "@/common/constants/toolLimits"; @@ -19,6 +19,7 @@ function isForegroundSuccess( } import { BackgroundProcessManager } from "@/node/services/backgroundProcessManager"; +import { StreamVerificationTracker } from "@/node/services/streamGuardrails/StreamVerificationTracker"; // Mock ToolCallOptions for testing const mockToolCallOptions: ToolExecutionOptions = { @@ -43,6 +44,36 @@ function createTestBashTool() { }; } +describe("looksLikeValidationCommand", () => { + it("matches common validation command patterns", () => { + expect(looksLikeValidationCommand("make test")).toBe(true); + expect(looksLikeValidationCommand("make typecheck")).toBe(true); + expect(looksLikeValidationCommand("bun test")).toBe(true); + expect(looksLikeValidationCommand("bun run lint")).toBe(true); + expect(looksLikeValidationCommand("vitest --run")).toBe(true); + expect(looksLikeValidationCommand("run_and_report typecheck make typecheck")).toBe(true); + expect(looksLikeValidationCommand("npm run test\npnpm run lint")).toBe(true); + // Validation commands after shell operators (monorepo/subdirectory workflows) + expect(looksLikeValidationCommand("cd packages/app && make test")).toBe(true); + expect(looksLikeValidationCommand("cd packages/app && bun test")).toBe(true); + expect(looksLikeValidationCommand("source .env; make typecheck")).toBe(true); + expect(looksLikeValidationCommand("run_and_report unit cd packages/app && bun test")).toBe( + true + ); + }); + + it("does not match non-validation commands", () => { + expect(looksLikeValidationCommand("make build")).toBe(false); + expect(looksLikeValidationCommand("bun install")).toBe(false); + expect(looksLikeValidationCommand("echo test")).toBe(false); + expect(looksLikeValidationCommand("cargo build")).toBe(false); + // run_and_report wrapping a non-validation command should NOT trigger + expect(looksLikeValidationCommand("run_and_report install bun install")).toBe(false); + // run_and_report with validation text appearing as arguments, not the actual command + expect(looksLikeValidationCommand("run_and_report note echo make test")).toBe(false); + }); +}); + describe("bash tool", () => { it("should execute a simple command successfully", async () => { using testEnv = createTestBashTool(); @@ -1891,3 +1922,43 @@ describe("bash tool - background execution", () => { tempDir[Symbol.dispose](); }); }); + +describe("bash tool - verification tracker", () => { + it("should mark verification for foreground validation commands", async () => { + const tempDir = new TestTempDir("test-bash-verify"); + const config = createTestToolConfig(tempDir.path); + const tracker = new StreamVerificationTracker(); + config.verificationTracker = tracker; + const tool = createBashTool(config); + + await tool.execute!( + { script: "make test", timeout_secs: 5, run_in_background: false, display_name: "test" }, + mockToolCallOptions + ); + + expect(tracker.hasValidationAttempt()).toBe(true); + tempDir[Symbol.dispose](); + }); + + it("should not mark verification for background validation commands", async () => { + const manager = new BackgroundProcessManager("/tmp/mux-test-verify-bg"); + const tempDir = new TestTempDir("test-bash-verify-bg"); + const config = createTestToolConfig(tempDir.path); + const tracker = new StreamVerificationTracker(); + config.verificationTracker = tracker; + config.backgroundProcessManager = manager; + const tool = createBashTool(config); + + await tool.execute!( + { script: "make test", timeout_secs: 5, run_in_background: true, display_name: "test-bg" }, + mockToolCallOptions + ); + + // Background commands haven't produced results yet, so they shouldn't + // count as "validation attempted" + expect(tracker.hasValidationAttempt()).toBe(false); + + await manager.terminateAll(); + tempDir[Symbol.dispose](); + }); +}); diff --git a/src/node/services/tools/bash.ts b/src/node/services/tools/bash.ts index b51b70c5ad..f8f94b9b61 100644 --- a/src/node/services/tools/bash.ts +++ b/src/node/services/tools/bash.ts @@ -522,6 +522,33 @@ function validateScript(script: string, config: ToolConfiguration): BashToolResu return null; // Valid } +// Patterns for commands that are inherently validation (tests, typecheck, lint). +// `run_and_report` is a generic wrapper (`run_and_report `), so we +// match it only when the wrapped command itself is a validation command. +const VALIDATION_COMMAND_RE = + /(?:make\s+(?:test|typecheck|lint|static-check|fmt-check)|bun\s+(?:test|run\s+(?:test|typecheck|lint))|npm\s+(?:test|run\s+(?:test|typecheck|lint))|pnpm\s+(?:test|run\s+(?:test|typecheck|lint))|yarn\s+(?:test|run\s+(?:test|typecheck|lint))|tsc|eslint|vitest|pytest|cargo\s+(?:test|check|clippy))\b/; + +// Match validation commands at line start, after shell operators (&&, ||, ;, |), +// or after run_and_report wrappers, to handle monorepo/subdirectory workflows +// like `cd packages/app && make test`. +const CMD_PREFIX = String.raw`(?:^|\n|&&|\|\||[;|])\s*`; +const VALIDATION_PATTERNS: RegExp[] = [ + // run_and_report — only when the actual command + // (third word) is a validation command. Chained commands like + // `run_and_report unit cd app && bun test` are caught by the standalone pattern below. + // NOTE: This is a heuristic. Environment prefixes like `env CI=1 bun test` or + // `bash -c "make test"` won't match the run_and_report rule, but may be caught by + // the standalone pattern if chained with &&/;. The agent_report escape hatch (second + // attempt always passes) covers any remaining false negatives. + new RegExp(`${CMD_PREFIX}run_and_report\\s+\\S+\\s+${VALIDATION_COMMAND_RE.source}`), + // Validation commands at line start or after shell operators + new RegExp(`${CMD_PREFIX}${VALIDATION_COMMAND_RE.source}`), +]; + +export function looksLikeValidationCommand(script: string): boolean { + return VALIDATION_PATTERNS.some((pattern) => pattern.test(script)); +} + /** * Rewrite cmd.exe-style null-device redirects (e.g. `>nul`, `2>nul`) into `/dev/null`. * @@ -861,6 +888,13 @@ export const createBashTool: ToolFactory = (config: ToolConfiguration) => { const validationError = validateScript(script, config); if (validationError) return validationError; + // Mark validation attempts for the pre-completion verification guard. + // Only count foreground commands — background processes haven't produced + // results yet, so they don't count as "validation attempted". + if (!run_in_background && looksLikeValidationCommand(script)) { + config.verificationTracker?.markValidationAttempt(); + } + // Warn when the model appears to be reading files via bash output (cat/rg/grep). // Reading files via bash output is fragile (may be truncated or auto-filtered); // file_read supports paging and avoids silent context loss. diff --git a/src/node/services/tools/file_edit_insert.ts b/src/node/services/tools/file_edit_insert.ts index f0a15783d9..5bcc276aff 100644 --- a/src/node/services/tools/file_edit_insert.ts +++ b/src/node/services/tools/file_edit_insert.ts @@ -14,6 +14,8 @@ import { fileExists } from "@/node/utils/runtime/fileExists"; import { writeFileString } from "@/node/utils/runtime/helpers"; import { RuntimeError } from "@/node/runtime/Runtime"; import { getErrorMessage } from "@/common/utils/errors"; +import { attachModelOnlyToolNotifications } from "@/common/utils/tools/internalToolResultFields"; +import { DOOM_LOOP_EDIT_THRESHOLD } from "@/node/services/streamGuardrails/StreamEditTracker"; const READ_AND_RETRY_NOTE = `${EDIT_FAILED_NOTE_PREFIX} ${NOTE_READ_FILE_RETRY}`; @@ -97,8 +99,23 @@ export const createFileEditInsertTool: ToolFactory = (config: ToolConfiguration) } } + // Track new-file creation for doom-loop detection and verification guard. + // Reset verification state so pre-edit validation doesn't count. + let doomLoopNudge: string | undefined; + if (!config.planFileOnly && config.editTracker) { + const editCount = config.editTracker.recordEdit(resolvedPath); + config.verificationTracker?.resetValidation(); + if (config.editTracker.shouldNudge(resolvedPath, DOOM_LOOP_EDIT_THRESHOLD)) { + config.editTracker.markNudged(resolvedPath); + doomLoopNudge = + `Potential doom loop: you have edited ${resolvedPath} ${editCount} times this stream. ` + + `Step back and reconsider:\n- Re-read the latest error/output carefully.\n- Verify your assumptions about the problem.\n` + + `- Consider a fundamentally different approach (not a small variation of what you've been trying).`; + } + } + const diff = generateDiff(resolvedPath, "", content); - return { + const baseResult: FileEditInsertToolResult = { success: true, diff: FILE_EDIT_DIFF_OMITTED_MESSAGE, ui_only: { @@ -108,6 +125,12 @@ export const createFileEditInsertTool: ToolFactory = (config: ToolConfiguration) }, ...(pathWarning && { warning: pathWarning }), }; + if (doomLoopNudge) { + return attachModelOnlyToolNotifications(baseResult, [ + doomLoopNudge, + ]) as FileEditInsertToolResult; + } + return baseResult; } return executeFileEditOperation({ diff --git a/src/node/services/tools/file_edit_operation.test.ts b/src/node/services/tools/file_edit_operation.test.ts index d5d66b7fe0..f3da90c34e 100644 --- a/src/node/services/tools/file_edit_operation.test.ts +++ b/src/node/services/tools/file_edit_operation.test.ts @@ -4,6 +4,11 @@ import * as path from "path"; import { executeFileEditOperation } from "./file_edit_operation"; import type { Runtime } from "@/node/runtime/Runtime"; import { LocalRuntime } from "@/node/runtime/LocalRuntime"; +import { MODEL_ONLY_TOOL_NOTIFICATIONS_FIELD } from "@/common/utils/tools/internalToolResultFields"; +import { + DOOM_LOOP_EDIT_THRESHOLD, + StreamEditTracker, +} from "@/node/services/streamGuardrails/StreamEditTracker"; import { getTestDeps, TestTempDir } from "./testHelpers"; @@ -282,3 +287,81 @@ describe("executeFileEditOperation plan mode enforcement", () => { expect(resolvePathCalls).toContain("/home/user/.mux/sessions/ws/plan.md"); }); }); + +describe("executeFileEditOperation doom-loop guard", () => { + test("attaches a model-only notification when edit threshold is reached", async () => { + using tempDir = new TestTempDir("doom-loop-guard-test"); + + const filePath = path.join(tempDir.path, "main.ts"); + await fs.writeFile(filePath, "const x = 0;\n"); + + const runtime = new LocalRuntime(tempDir.path); + const editTracker = new StreamEditTracker(); + + let thresholdResult: unknown; + for (let i = 1; i <= DOOM_LOOP_EDIT_THRESHOLD; i += 1) { + thresholdResult = await executeFileEditOperation({ + config: { + cwd: tempDir.path, + runtime, + runtimeTempDir: tempDir.path, + editTracker, + }, + filePath, + operation: () => ({ success: true, newContent: `const x = ${i};\n`, metadata: {} }), + }); + } + + expect(thresholdResult).toBeDefined(); + const resultRecord = thresholdResult as Record; + + expect(resultRecord.success).toBe(true); + expect(Array.isArray(resultRecord[MODEL_ONLY_TOOL_NOTIFICATIONS_FIELD])).toBe(true); + + const notifications = resultRecord[MODEL_ONLY_TOOL_NOTIFICATIONS_FIELD] as string[]; + expect(notifications[0]).toContain("Potential doom loop"); + expect(notifications[0]).toContain(filePath); + }); + + test("nudges at most once per file for a stream", async () => { + using tempDir = new TestTempDir("doom-loop-guard-once-test"); + + const filePath = path.join(tempDir.path, "main.ts"); + await fs.writeFile(filePath, "const x = 0;\n"); + + const runtime = new LocalRuntime(tempDir.path); + const editTracker = new StreamEditTracker(); + + for (let i = 1; i <= DOOM_LOOP_EDIT_THRESHOLD; i += 1) { + await executeFileEditOperation({ + config: { + cwd: tempDir.path, + runtime, + runtimeTempDir: tempDir.path, + editTracker, + }, + filePath, + operation: () => ({ success: true, newContent: `const x = ${i};\n`, metadata: {} }), + }); + } + + const postThresholdResult = await executeFileEditOperation({ + config: { + cwd: tempDir.path, + runtime, + runtimeTempDir: tempDir.path, + editTracker, + }, + filePath, + operation: () => ({ + success: true, + newContent: `const x = ${DOOM_LOOP_EDIT_THRESHOLD + 1};\n`, + metadata: {}, + }), + }); + + const resultRecord = postThresholdResult as unknown as Record; + expect(resultRecord.success).toBe(true); + expect(resultRecord[MODEL_ONLY_TOOL_NOTIFICATIONS_FIELD]).toBeUndefined(); + }); +}); diff --git a/src/node/services/tools/file_edit_operation.ts b/src/node/services/tools/file_edit_operation.ts index 50a334c029..4e33745235 100644 --- a/src/node/services/tools/file_edit_operation.ts +++ b/src/node/services/tools/file_edit_operation.ts @@ -13,6 +13,8 @@ import { import { RuntimeError } from "@/node/runtime/Runtime"; import { readFileString, writeFileString } from "@/node/utils/runtime/helpers"; import { getErrorMessage } from "@/common/utils/errors"; +import { attachModelOnlyToolNotifications } from "@/common/utils/tools/internalToolResultFields"; +import { DOOM_LOOP_EDIT_THRESHOLD } from "@/node/services/streamGuardrails/StreamEditTracker"; type FileEditOperationResult = | { @@ -131,6 +133,8 @@ export async function executeFileEditOperation({ throw err; } + let doomLoopNudge: string | undefined; + // Record file state for post-compaction attachment tracking if (config.recordFileState) { try { @@ -144,9 +148,20 @@ export async function executeFileEditOperation({ } } + // Track repeated edits to detect potential doom loops in exec mode. + // Reset verification state so pre-edit validation doesn't count. + if (!config.planFileOnly && config.editTracker) { + const editCount = config.editTracker.recordEdit(resolvedPath); + config.verificationTracker?.resetValidation(); + if (config.editTracker.shouldNudge(resolvedPath, DOOM_LOOP_EDIT_THRESHOLD)) { + config.editTracker.markNudged(resolvedPath); + doomLoopNudge = `Potential doom loop: you have edited ${resolvedPath} ${editCount} times this stream. Step back and reconsider:\n- Re-read the latest error/output carefully.\n- Verify your assumptions about the problem.\n- Consider a fundamentally different approach (not a small variation of what you've been trying).`; + } + } + const diff = generateDiff(resolvedPath, originalContent, operationResult.newContent); - return { + const baseResult: FileEditDiffSuccessBase & TMetadata = { success: true, diff: FILE_EDIT_DIFF_OMITTED_MESSAGE, ui_only: { @@ -155,8 +170,14 @@ export async function executeFileEditOperation({ }, }, ...operationResult.metadata, - ...(pathWarning && { warning: pathWarning }), + ...(pathWarning ? { warning: pathWarning } : {}), }; + + if (doomLoopNudge) { + return attachModelOnlyToolNotifications(baseResult, [doomLoopNudge]) as typeof baseResult; + } + + return baseResult; } catch (error) { if (error && typeof error === "object" && "code" in error) { const nodeError = error as { code?: string };