web-infra-dev
diff --git a/‎packages/midscene/package.json
+3 b/‎packages/midscene/package.json
+3
diff --git a/‎packages/midscene/src/ai-model/automation/index.ts ‎packages/midscene/src/ai-model/automation.ts
+20-38 b/‎packages/midscene/src/ai-model/automation/index.ts ‎packages/midscene/src/ai-model/automation.ts
+20-38
diff --git a/‎packages/midscene/src/ai-model/common.ts
+4-17 b/‎packages/midscene/src/ai-model/common.ts
+4-17
@@ -33,14 +33,17 @@
     "test:ai": "AITEST=true npm run test",
     "computer": "TEST_COMPUTER=true npm run test:ai -- tests/ai/evaluate/computer.test.ts",
     "evaluate": "npm run test:ai -- tests/ai/evaluate/inspect.test.ts",
+    "prompt": "npm run test:ai -- tests/ai/inspect2.test.ts",
     "evaluate:update": "UPDATE_AI_DATA=true npm run test:ai -- tests/ai/evaluate/inspect.test.ts",
     "prepublishOnly": "npm run build"
   },
   "dependencies": {
     "@anthropic-ai/sdk": "0.33.1",
     "@azure/identity": "4.5.0",
+    "@langchain/core": "0.3.26",
     "@midscene/shared": "workspace:*",
     "dirty-json": "0.9.2",
+    "langchain": "0.3.8",
     "openai": "4.57.1",
     "optional": "0.1.4",
     "socks-proxy-agent": "8.0.4"
 
@@ -1,13 +1,13 @@
 import assert from 'node:assert';
 import type { AIUsageInfo, PlanningAIResponse, UIContext } from '@/types';
+import { PromptTemplate } from '@langchain/core/prompts';
+import { AIActionType, type AIArgs, callAiFn } from './common';
 import {
-  AIActionType,
-  type AIArgs,
-  callAiFn,
-  transformUserMessages,
-} from '../common';
-import { systemPromptToTaskPlanning } from '../prompt/planning';
-import { describeUserPage } from '../prompt/util';
+  automationUserPrompt,
+  systemPromptToTaskPlanning,
+  taskBackgroundContext,
+} from './prompt/planning';
+import { describeUserPage } from './prompt/util';
 
 export async function plan(
   userPrompt: string,
@@ -23,25 +23,21 @@ export async function plan(
   const { description: pageDescription, elementByPosition } =
     await describeUserPage(context);
 
-  const systemPrompt = systemPromptToTaskPlanning();
+  const systemPrompt = await systemPromptToTaskPlanning();
+  const userInstructionPrompt = await automationUserPrompt.format({
+    pageDescription,
+    userPrompt,
+    taskBackgroundContext: taskBackgroundContext(
+      opts.originalPrompt,
+      opts.whatHaveDone,
+    ),
+  });
 
-  let taskBackgroundContext = '';
-  if (opts.originalPrompt && opts.whatHaveDone) {
-    taskBackgroundContext = `For your information, this is a task that some important person handed to you. Here is the original task description and what have been done after the previous actions:
-=====================================
-Original task description:
-${opts.originalPrompt}
-=====================================
-What have been done:
-${opts.whatHaveDone}
-=====================================
-`;
-  }
   const msgs: AIArgs = [
     { role: 'system', content: systemPrompt },
     {
       role: 'user',
-      content: transformUserMessages([
+      content: [
         {
           type: 'image_url',
           image_url: {
@@ -51,28 +47,14 @@ ${opts.whatHaveDone}
         },
         {
           type: 'text',
-          text: `
-pageDescription:\n 
-${pageDescription}
-\n
-Here is the instruction:
-=====================================
-${userPrompt}
-=====================================
-
-${taskBackgroundContext}
-`.trim(),
+          text: userInstructionPrompt,
         },
-      ]),
+      ],
     },
   ];
 
   const call = callAI || callAiFn;
-  const { content, usage } = await call({
-    msgs,
-    AIActionType: AIActionType.PLAN,
-  });
-
+  const { content, usage } = await call(msgs, AIActionType.PLAN);
   const planFromAI = content;
 
   const actions = planFromAI?.actions || [];
 
@@ -21,11 +21,10 @@ export enum AIActionType {
   PLAN = 3,
 }
 
-export async function callAiFn<T>(options: {
-  msgs: AIArgs;
-  AIActionType: AIActionType;
-}): Promise<{ content: T; usage?: AIUsageInfo }> {
-  const { msgs, AIActionType: AIActionTypeValue } = options;
+export async function callAiFn<T>(
+  msgs: AIArgs,
+  AIActionTypeValue: AIActionType,
+): Promise<{ content: T; usage?: AIUsageInfo }> {
   assert(
     checkAIConfig(),
     'Cannot find config for AI model service. You should set it before using. https://midscenejs.com/model-provider.html',
@@ -37,15 +36,3 @@ export async function callAiFn<T>(options: {
   );
   return { content, usage };
 }
-
-export function transformUserMessages(msgs: ChatCompletionContentPart[]) {
-  const textOnly = Boolean(getAIConfig(MIDSCENE_MODEL_TEXT_ONLY));
-  if (!textOnly) return msgs;
-
-  return msgs.reduce((res, msg) => {
-    if (msg.type === 'text') {
-      res += msg.text;
-    }
-    return res;
-  }, '');
-}