web-infra-dev · yuyutaotao · Jan 24, 2025 · Jan 24, 2025 · Jan 24, 2025 · Jan 24, 2025
diff --git a/.github/workflows/ai.yml b/.github/workflows/ai.yml
@@ -124,8 +124,10 @@ jobs:
         if-no-files-found: ignore
 
     - name: Check if script failed
-      if: steps.test-ai.outcome == 'failure'
+      if: steps.test-ai.outcome == 'failure' || steps.e2e-tests.outcome == 'failure' || steps.e2e-tests-cache.outcome == 'failure' || steps.e2e-tests-report.outcome == 'failure'
       run: exit 1
 
 
 
+
+
diff --git a/packages/midscene/package.json b/packages/midscene/package.json
@@ -34,8 +34,9 @@
     "computer": "TEST_COMPUTER=true npm run test:ai -- tests/ai/evaluate/computer.test.ts",
     "evaluate": "npm run test:ai -- tests/ai/evaluate/inspect.test.ts",
     "evaluate:assertion": "npm run test:ai -- tests/ai/evaluate/assertion.test.ts",
-    "prompt": "npm run test:ai -- tests/ai/parse-action.test.ts",
+    "evaluate:plan": "npm run test:ai -- tests/ai/evaluate/plan/planning.test.ts",
     "evaluate:update": "UPDATE_AI_DATA=true npm run test:ai -- tests/ai/evaluate/inspect.test.ts",
+    "prompt": "npm run test:ai -- tests/ai/parse-action.test.ts",
     "prepublishOnly": "npm run build"
   },
   "dependencies": {

diff --git a/packages/midscene/tests/ai/evaluate/assertion.test.ts b/packages/midscene/tests/ai/evaluate/assertion.test.ts
@@ -9,8 +9,8 @@ import {
   repeatFile,
 } from './test-suite/util';
 import 'dotenv/config';
+import { repeatTime } from '../util';
 
-const repeatTime = 2;
 const testSources = [
   // 'todo',
   'online_order',

diff --git a/packages/midscene/tests/ai/evaluate/inspect.test.ts b/packages/midscene/tests/ai/evaluate/inspect.test.ts
@@ -14,8 +14,8 @@ import {
   runTestCases,
 } from './test-suite/util';
 import 'dotenv/config';
+import { repeatTime } from '../util';
 
-const repeatTime = 2;
 const relocateAfterPlanning = false;
 const failCaseThreshold = process.env.CI ? 1 : 0;
 const testSources = [

diff --git a/packages/midscene/tests/ai/evaluate/plan/__snapshots__/planning-input.test.ts.snap b/packages/midscene/tests/ai/evaluate/plan/__snapshots__/planning-input.test.ts.snap
@@ -0,0 +1,89 @@
+// Vitest Snapshot v1, https://vitest.dev/guide/snapshot.html
+
+exports[`automation - planning input > input value 1`] = `
+[
+  {
+    "locate": {
+      "id": "fbc2d002",
+      "prompt": "the input field with placeholder 'What needs to be done?'",
+    },
+    "param": {
+      "value": "learning english",
+    },
+    "thought": undefined,
+    "type": "Input",
+  },
+]
+`;
+
+exports[`automation - planning input > input value 2`] = `
+[
+  {
+    "locate": {
+      "id": "fbc2d002",
+      "prompt": "the input field labeled 'What needs to be done?'",
+    },
+    "param": {
+      "value": "learning english",
+    },
+    "thought": undefined,
+    "type": "Input",
+  },
+  {
+    "locate": null,
+    "param": {
+      "value": "Enter",
+    },
+    "thought": undefined,
+    "type": "KeyboardPress",
+  },
+]
+`;
+
+exports[`automation - planning input > input value Add, delete, correct and check 1`] = `
+[
+  {
+    "locate": {
+      "id": "fbc2d002",
+      "prompt": "the task input box with the content 'Learn English'",
+    },
+    "param": {
+      "value": "Learn English tomorrow",
+    },
+    "thought": undefined,
+    "type": "Input",
+  },
+]
+`;
+
+exports[`automation - planning input > input value Add, delete, correct and check 2`] = `
+[
+  {
+    "locate": {
+      "id": "fbc2d002",
+      "prompt": "the input box containing 'Learn English'",
+    },
+    "param": {
+      "value": "Learn Skiing",
+    },
+    "thought": undefined,
+    "type": "Input",
+  },
+]
+`;
+
+exports[`automation - planning input > input value Add, delete, correct and check 3`] = `
+[
+  {
+    "locate": {
+      "id": "fbc2d002",
+      "prompt": "the task input box containing 'Learn English'",
+    },
+    "param": {
+      "value": "Learn",
+    },
+    "thought": undefined,
+    "type": "Input",
+  },
+]
+`;
diff --git a/packages/midscene/tests/ai/evaluate/plan/__snapshots__/planning.test.ts.snap b/packages/midscene/tests/ai/evaluate/plan/__snapshots__/planning.test.ts.snap
@@ -0,0 +1,13 @@
+// Vitest Snapshot v1, https://vitest.dev/guide/snapshot.html
+
+exports[`automation - planning > basic run 1`] = `
+{
+  "timeMs": 3500,
+}
+`;
+
+exports[`automation - planning > basic run 2`] = `
+{
+  "value": "Enter",
+}
+`;
diff --git a/packages/midscene/tests/ai/evaluate/plan/planning-input.test.ts b/packages/midscene/tests/ai/evaluate/plan/planning-input.test.ts
@@ -1,7 +1,7 @@
 import { plan } from '@/ai-model';
 /* eslint-disable max-lines-per-function */
 import { describe, expect, it, vi } from 'vitest';
-import { makePlanResultStable } from '../../util';
+import { makePlanResultStable, repeatTime } from '../../util';
 import { getPageDataOfTestName, repeat } from './../test-suite/util';
 
 vi.setConfig({
@@ -10,7 +10,7 @@ vi.setConfig({
 });
 
 describe('automation - planning input', () => {
-  repeat(5, () =>
+  repeat(repeatTime, () =>
     it('input value', async () => {
       const { context } = await getPageDataOfTestName('todo');
       const instructions = [
@@ -26,7 +26,7 @@ describe('automation - planning input', () => {
     }),
   );
 
-  repeat(5, () =>
+  repeat(repeatTime, () =>
     it('input value Add, delete, correct and check', async () => {
       const { context } = await getPageDataOfTestName('todo-input-with-value');
       const instructions = [

diff --git a/packages/midscene/tests/ai/evaluate/plan/planning.test.ts b/packages/midscene/tests/ai/evaluate/plan/planning.test.ts
@@ -48,7 +48,7 @@ describe('automation - planning', () => {
   it('scroll some element', async () => {
     const { context } = await getPageDataOfTestName('todo');
     const { actions } = await plan(
-      'Scroll left the status filters (with a button named "complete")',
+      'Scroll left the status filters (with a button named "completed")',
       {
         context,
       },

diff --git a/packages/midscene/tests/ai/extract/__snapshots__/extract.test.ts.snap b/packages/midscene/tests/ai/extract/__snapshots__/extract.test.ts.snap
@@ -1,6 +1,6 @@
 // Vitest Snapshot v1, https://vitest.dev/guide/snapshot.html
 
-exports[`assert openAI > online order 1`] = `
+exports[`extract > online order 1`] = `
 {
   "data": [
     {
@@ -17,7 +17,7 @@ exports[`assert openAI > online order 1`] = `
 }
 `;
 
-exports[`assert openAI > todo 1`] = `
+exports[`extract > todo 1`] = `
 {
   "data": [
     "Learn Python",
@@ -29,7 +29,7 @@ exports[`assert openAI > todo 1`] = `
 }
 `;
 
-exports[`assert openAI > todo obj 1`] = `
+exports[`extract > todo obj 1`] = `
 {
   "data": [
     {

diff --git a/packages/midscene/tests/ai/plan/plan-to-target.test.ts b/packages/midscene/tests/ai/plan/plan-to-target.test.ts
@@ -1,58 +1,61 @@
 import path from 'node:path';
 import { vlmPlanning } from '@/ai-model/ui-tars-planning';
 import { savePositionImg } from '@midscene/shared/img';
-import { expect, test } from 'vitest';
+import { expect, it, test } from 'vitest';
 import { getPageTestData } from '../evaluate/test-suite/util';
 
-test('inspect with quick answer', async () => {
-  const { context } = await getPageTestData(
-    path.join(__dirname, '../evaluate/test-data/todo'),
-  );
+const isUiTars = process.env.MIDSCENE_USE_VLM_UI_TARS === '1';
 
-  const { width, height } = context.size;
-  const startTime = Date.now();
-  const { realActions } = await vlmPlanning({
-    userInstruction: '删除第二条任务',
-    conversationHistory: [
-      {
-        role: 'user',
-        content: [
-          {
-            type: 'image_url',
-            image_url: {
-              url: context.originalScreenshotBase64,
+test.skipIf(!isUiTars)('only run in ui-tars', () => {
+  it('plan to target', async () => {
+    const { context } = await getPageTestData(
+      path.join(__dirname, '../evaluate/test-data/todo'),
+    );
+
+    const { width, height } = context.size;
+    const startTime = Date.now();
+    const { realActions } = await vlmPlanning({
+      userInstruction: '删除第二条任务',
+      conversationHistory: [
+        {
+          role: 'user',
+          content: [
+            {
+              type: 'image_url',
+              image_url: {
+                url: context.originalScreenshotBase64,
+              },
             },
-          },
-        ],
+          ],
+        },
+      ],
+      size: {
+        width,
+        height,
       },
-    ],
-    size: {
-      width,
-      height,
-    },
-  });
+    });
 
-  const endTime = Date.now();
-  const cost = (endTime - startTime) / 1000;
-  const start_box =
-    'start_box' in realActions[0].action_inputs
-      ? realActions[0].action_inputs.start_box
-      : '[]';
-  const box = JSON.parse(start_box);
-  console.log('plan to target content:', {
-    box,
-    size: {
-      width,
-      height,
-    },
-    cost: `${cost}s`,
-  });
-  // expect(box).toEqual([0.397, 0.218, 0.397, 0.218]);
-  expect(true).toBe(true);
-  await savePositionImg({
-    inputImgBase64: context.originalScreenshotBase64,
-    rect: { x: box[0] * width, y: box[1] * height },
-    outputPath: path.join(__dirname, 'output.png'),
+    const endTime = Date.now();
+    const cost = (endTime - startTime) / 1000;
+    const start_box =
+      'start_box' in realActions[0].action_inputs
+        ? realActions[0].action_inputs.start_box
+        : '[]';
+    const box = JSON.parse(start_box);
+    console.log('plan to target content:', {
+      box,
+      size: {
+        width,
+        height,
+      },
+      cost: `${cost}s`,
+    });
+    // expect(box).toEqual([0.397, 0.218, 0.397, 0.218]);
+    expect(true).toBe(true);
+    await savePositionImg({
+      inputImgBase64: context.originalScreenshotBase64,
+      rect: { x: box[0] * width, y: box[1] * height },
+      outputPath: path.join(__dirname, 'output.png'),
+    });
   });
-  //   expect(cost).toBeLessThan(100);
 });
diff --git a/packages/midscene/tests/ai/util.ts b/packages/midscene/tests/ai/util.ts
@@ -1,15 +1,18 @@
 import type { PlanningAction } from '@/types';
 
+export const repeatTime = 1;
 export function makePlanResultStable(plans: PlanningAction[]) {
   return plans.map((plan) => {
     // Removing thinking makes the results stable for snapshot testing
     plan.thought = undefined;
     if (plan.param?.prompt) {
       plan.param.prompt = '';
     }
-    if (plan.quickAnswer) {
-      plan.quickAnswer.reason = '';
-      plan.quickAnswer.text = '';
+    if ('quickAnswer' in plan && plan.quickAnswer) {
+      plan.quickAnswer = {
+        reason: '',
+        text: '',
+      };
     }
     return plan;
   });

diff --git a/packages/web-integration/package.json b/packages/web-integration/package.json
@@ -107,6 +107,7 @@
     "test": "vitest --run",
     "test:u": "vitest --run -u",
     "test:ai": "AI_TEST_TYPE=web npm run test",
+    "test:ai:bridge": "BRIDGE_MODE=true npm run test --inspect packages/web-integration/tests/ai/bridge/agent.test.ts",
     "test:ai:cache": "MIDSCENE_CACHE=true AI_TEST_TYPE=web npm run test",
     "test:ai:all": "npm run test:ai:web && npm run test:ai:native",
     "test:ai:native": "MIDSCENE_CACHE=true AI_TEST_TYPE=native npm run test",

diff --git a/packages/web-integration/src/common/tasks.ts b/packages/web-integration/src/common/tasks.ts
@@ -349,9 +349,13 @@ export class PageTaskExecutor {
               } else if (scrollToEventName === 'untilLeft') {
                 await this.page.scrollUntilLeft(startingPoint);
               } else if (scrollToEventName === 'once' || !scrollToEventName) {
-                if (taskParam.direction === 'down' || !taskParam.direction) {
+                if (
+                  taskParam?.direction === 'down' ||
+                  !taskParam ||
+                  !taskParam.direction
+                ) {
                   await this.page.scrollDown(
-                    taskParam.distance || undefined,
+                    taskParam?.distance || undefined,
                     startingPoint,
                   );
                 } else if (taskParam.direction === 'up') {

diff --git a/packages/web-integration/tests/ai/bridge/agent.test.ts b/packages/web-integration/tests/ai/bridge/agent.test.ts
@@ -8,7 +8,8 @@ vi.setConfig({
   testTimeout: 60 * 1000,
 });
 const sleep = (ms: number) => new Promise((resolve) => setTimeout(resolve, ms));
-describe.skipIf(process.env.CI)(
+
+describe.skipIf(!process.env.BRIDGE_MODE)(
   'fully functional agent in server(cli) side',
   () => {
     it('basic', async () => {

diff --git a/packages/web-integration/tests/ai/web/playwright/ai-auto-todo.spec.ts b/packages/web-integration/tests/ai/web/playwright/ai-auto-todo.spec.ts
@@ -13,11 +13,7 @@ test('ai todo', async ({ ai, aiQuery }) => {
   }
 
   await ai('Enter "Happy Birthday" in the task box');
-  await ai('Enter "Learn" in the task box');
-
-  await ai(
-    'Add "JS today" to base on the existing content(important) of the task box, then press enter',
-  );
+  await ai('Enter "Learn JS today"in the task box, then press Enter to create');
 
   await ai(
     'Enter "Learn Rust tomorrow" in the task box, then press Enter to create',
@@ -27,6 +23,7 @@ test('ai todo', async ({ ai, aiQuery }) => {
   );
 
   const allTaskList = await aiQuery<string[]>('string[], tasks in the list');
+  console.log('allTaskList', allTaskList);
   expect(allTaskList.length).toBe(3);
   expect(allTaskList).toContain('Learn JS today');
   expect(allTaskList).toContain('Learn Rust tomorrow');