Skip to content

Commit 691eb6e

Browse files
authored
feat(ai-model): support Image positioning and integrate langchain (#230)
* feat: add point img logic * feat: migrate prompt to langchain * chore: delete unless log * chore: optimize test case * chore: fix lint error * chore: delete httpAgent logic * chore: delete unless fn * chore: fix some comment * chore: fix ci error * chore: delete unless fn * chore: update prompt * chore: delete unless language
1 parent 6af7d21 commit 691eb6e

File tree

24 files changed

+866
-782
lines changed

24 files changed

+866
-782
lines changed

packages/midscene/package.json

+3
Original file line numberDiff line numberDiff line change
@@ -33,14 +33,17 @@
3333
"test:ai": "AITEST=true npm run test",
3434
"computer": "TEST_COMPUTER=true npm run test:ai -- tests/ai/evaluate/computer.test.ts",
3535
"evaluate": "npm run test:ai -- tests/ai/evaluate/inspect.test.ts",
36+
"prompt": "npm run test:ai -- tests/ai/inspect2.test.ts",
3637
"evaluate:update": "UPDATE_AI_DATA=true npm run test:ai -- tests/ai/evaluate/inspect.test.ts",
3738
"prepublishOnly": "npm run build"
3839
},
3940
"dependencies": {
4041
"@anthropic-ai/sdk": "0.33.1",
4142
"@azure/identity": "4.5.0",
43+
"@langchain/core": "0.3.26",
4244
"@midscene/shared": "workspace:*",
4345
"dirty-json": "0.9.2",
46+
"langchain": "0.3.8",
4447
"openai": "4.57.1",
4548
"optional": "0.1.4",
4649
"socks-proxy-agent": "8.0.4"

packages/midscene/src/ai-model/automation/index.ts packages/midscene/src/ai-model/automation.ts

+20-38
Original file line numberDiff line numberDiff line change
@@ -1,13 +1,13 @@
11
import assert from 'node:assert';
22
import type { AIUsageInfo, PlanningAIResponse, UIContext } from '@/types';
3+
import { PromptTemplate } from '@langchain/core/prompts';
4+
import { AIActionType, type AIArgs, callAiFn } from './common';
35
import {
4-
AIActionType,
5-
type AIArgs,
6-
callAiFn,
7-
transformUserMessages,
8-
} from '../common';
9-
import { systemPromptToTaskPlanning } from '../prompt/planning';
10-
import { describeUserPage } from '../prompt/util';
6+
automationUserPrompt,
7+
systemPromptToTaskPlanning,
8+
taskBackgroundContext,
9+
} from './prompt/planning';
10+
import { describeUserPage } from './prompt/util';
1111

1212
export async function plan(
1313
userPrompt: string,
@@ -23,25 +23,21 @@ export async function plan(
2323
const { description: pageDescription, elementByPosition } =
2424
await describeUserPage(context);
2525

26-
const systemPrompt = systemPromptToTaskPlanning();
26+
const systemPrompt = await systemPromptToTaskPlanning();
27+
const userInstructionPrompt = await automationUserPrompt.format({
28+
pageDescription,
29+
userPrompt,
30+
taskBackgroundContext: taskBackgroundContext(
31+
opts.originalPrompt,
32+
opts.whatHaveDone,
33+
),
34+
});
2735

28-
let taskBackgroundContext = '';
29-
if (opts.originalPrompt && opts.whatHaveDone) {
30-
taskBackgroundContext = `For your information, this is a task that some important person handed to you. Here is the original task description and what have been done after the previous actions:
31-
=====================================
32-
Original task description:
33-
${opts.originalPrompt}
34-
=====================================
35-
What have been done:
36-
${opts.whatHaveDone}
37-
=====================================
38-
`;
39-
}
4036
const msgs: AIArgs = [
4137
{ role: 'system', content: systemPrompt },
4238
{
4339
role: 'user',
44-
content: transformUserMessages([
40+
content: [
4541
{
4642
type: 'image_url',
4743
image_url: {
@@ -51,28 +47,14 @@ ${opts.whatHaveDone}
5147
},
5248
{
5349
type: 'text',
54-
text: `
55-
pageDescription:\n
56-
${pageDescription}
57-
\n
58-
Here is the instruction:
59-
=====================================
60-
${userPrompt}
61-
=====================================
62-
63-
${taskBackgroundContext}
64-
`.trim(),
50+
text: userInstructionPrompt,
6551
},
66-
]),
52+
],
6753
},
6854
];
6955

7056
const call = callAI || callAiFn;
71-
const { content, usage } = await call({
72-
msgs,
73-
AIActionType: AIActionType.PLAN,
74-
});
75-
57+
const { content, usage } = await call(msgs, AIActionType.PLAN);
7658
const planFromAI = content;
7759

7860
const actions = planFromAI?.actions || [];

packages/midscene/src/ai-model/common.ts

+4-17
Original file line numberDiff line numberDiff line change
@@ -21,11 +21,10 @@ export enum AIActionType {
2121
PLAN = 3,
2222
}
2323

24-
export async function callAiFn<T>(options: {
25-
msgs: AIArgs;
26-
AIActionType: AIActionType;
27-
}): Promise<{ content: T; usage?: AIUsageInfo }> {
28-
const { msgs, AIActionType: AIActionTypeValue } = options;
24+
export async function callAiFn<T>(
25+
msgs: AIArgs,
26+
AIActionTypeValue: AIActionType,
27+
): Promise<{ content: T; usage?: AIUsageInfo }> {
2928
assert(
3029
checkAIConfig(),
3130
'Cannot find config for AI model service. You should set it before using. https://midscenejs.com/model-provider.html',
@@ -37,15 +36,3 @@ export async function callAiFn<T>(options: {
3736
);
3837
return { content, usage };
3938
}
40-
41-
export function transformUserMessages(msgs: ChatCompletionContentPart[]) {
42-
const textOnly = Boolean(getAIConfig(MIDSCENE_MODEL_TEXT_ONLY));
43-
if (!textOnly) return msgs;
44-
45-
return msgs.reduce((res, msg) => {
46-
if (msg.type === 'text') {
47-
res += msg.text;
48-
}
49-
return res;
50-
}, '');
51-
}

0 commit comments

Comments
 (0)