Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
32 changes: 27 additions & 5 deletions src/McpContext.ts
Original file line number Diff line number Diff line change
Expand Up @@ -272,6 +272,10 @@ export class McpContext implements Context {
return page.getDefaultNavigationTimeout();
}

getAXNodeByUid(uid: string) {
return this.#textSnapshot?.idToNode.get(uid);
}

async getElementByUid(uid: string): Promise<ElementHandle<Element>> {
if (!this.#textSnapshot?.idToNode.size) {
throw new Error(
Expand Down Expand Up @@ -326,19 +330,37 @@ export class McpContext implements Context {
// will be used for the tree serialization and mapping ids back to nodes.
let idCounter = 0;
const idToNode = new Map<string, TextSnapshotNode>();
const assignIds = (node: SerializedAXNode): TextSnapshotNode => {
const assignIds = async (
node: SerializedAXNode,
): Promise<TextSnapshotNode> => {
const nodeWithId: TextSnapshotNode = {
...node,
id: `${snapshotId}_${idCounter++}`,
children: node.children
? node.children.map(child => assignIds(child))
: [],
children: [],
};

// The AXNode for an option doesn't contain its `value`.
// Therefore, set text content of the option as value.
if (node.role === 'option') {
const handle = await node.elementHandle();
if (handle) {
const textContentHandle = await handle.getProperty('textContent');
const optionText = await textContentHandle.jsonValue();
if (optionText) {
nodeWithId.value = optionText.toString();
}
}
}

nodeWithId.children = node.children
? await Promise.all(node.children.map(child => assignIds(child)))
: [];

idToNode.set(nodeWithId.id, nodeWithId);
return nodeWithId;
};

const rootNodeWithId = assignIds(rootNode);
const rootNodeWithId = await assignIds(rootNode);
this.#textSnapshot = {
root: rootNodeWithId,
snapshotId: String(snapshotId),
Expand Down
2 changes: 2 additions & 0 deletions src/tools/ToolDefinition.ts
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,7 @@
import type {Dialog, ElementHandle, Page} from 'puppeteer-core';
import z from 'zod';

import type {TextSnapshotNode} from '../McpContext.js';
import type {TraceResult} from '../trace-processing/parse.js';

import type {ToolCategories} from './categories.js';
Expand Down Expand Up @@ -68,6 +69,7 @@ export type Context = Readonly<{
closePage(pageIdx: number): Promise<void>;
setSelectedPageIdx(idx: number): void;
getElementByUid(uid: string): Promise<ElementHandle<Element>>;
getAXNodeByUid(uid: string): TextSnapshotNode | undefined;
setNetworkConditions(conditions: string | null): void;
setCpuThrottlingRate(rate: number): void;
saveTemporaryFile(
Expand Down
70 changes: 52 additions & 18 deletions src/tools/input.ts
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,8 @@
import type {ElementHandle} from 'puppeteer-core';
import z from 'zod';

import type {McpContext} from '../McpContext.js';

import {ToolCategories} from './categories.js';
import {defineTool} from './ToolDefinition.js';

Expand Down Expand Up @@ -78,6 +80,40 @@ export const hover = defineTool({
},
});

async function fillFormElement(
uid: string,
value: string,
context: McpContext,
) {
const handle = await context.getElementByUid(uid);
try {
// The AXNode for an option doesn't contain its `value`. We set text content of the option as value.
// If the form is a combobox, we need to find the correct option by its text value.
// To do that, loop through the children while checking which child's text matches the requested value (requested value is actually the text content).
// When the correct option is found, use the element handle to get the real value.
const aXNode = context.getAXNodeByUid(uid);
if (aXNode && aXNode.role === 'combobox' && aXNode.children) {
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

maybe we could extract the logic for finding an option in a combobox into a helper function?

for (const child of aXNode.children) {
if (child.role === 'option' && child.name === value && child.value) {
const childHandle = await child.elementHandle();
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

we need to dispose the child handle after we use it

if (childHandle) {
const childValueHandle = await childHandle.getProperty('value');
const childValue = await childValueHandle.jsonValue();
if (childValue) {
await handle.asLocator().fill(childValue.toString());
}
break;
}
}
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

we should throw an error if no option was found

}
} else {
await handle.asLocator().fill(value);
}
} finally {
void handle.dispose();
}
}

export const fill = defineTool({
name: 'fill',
description: `Type text into a input, text area or select an option from a <select> element.`,
Expand All @@ -94,16 +130,15 @@ export const fill = defineTool({
value: z.string().describe('The value to fill in'),
},
handler: async (request, response, context) => {
const handle = await context.getElementByUid(request.params.uid);
try {
await context.waitForEventsAfterAction(async () => {
await handle.asLocator().fill(request.params.value);
});
response.appendResponseLine(`Successfully filled out the element`);
response.setIncludeSnapshot(true);
} finally {
void handle.dispose();
}
await context.waitForEventsAfterAction(async () => {
await fillFormElement(
request.params.uid,
request.params.value,
context as McpContext,
);
});
response.appendResponseLine(`Successfully filled out the element`);
response.setIncludeSnapshot(true);
},
});

Expand Down Expand Up @@ -155,14 +190,13 @@ export const fillForm = defineTool({
},
handler: async (request, response, context) => {
for (const element of request.params.elements) {
const handle = await context.getElementByUid(element.uid);
try {
await context.waitForEventsAfterAction(async () => {
await handle.asLocator().fill(element.value);
});
} finally {
void handle.dispose();
}
await context.waitForEventsAfterAction(async () => {
await fillFormElement(
element.uid,
element.value,
context as McpContext,
);
});
}
response.appendResponseLine(`Successfully filled out the form`);
response.setIncludeSnapshot(true);
Expand Down
29 changes: 29 additions & 0 deletions tests/tools/input.test.ts
Original file line number Diff line number Diff line change
Expand Up @@ -202,6 +202,35 @@ describe('input', () => {
assert.ok(await page.$('text/test'));
});
});

it('fills out a select by text', async () => {
await withBrowser(async (response, context) => {
const page = context.getSelectedPage();
await page.setContent(
`<!DOCTYPE html><select><option value="v1">one</option><option value="v2">two</option></select>`,
);
await context.createTextSnapshot();
await fill.handler(
{
params: {
uid: '1_1',
value: 'two',
},
},
response,
context,
);
assert.strictEqual(
response.responseLines[0],
'Successfully filled out the element',
);
assert.ok(response.includeSnapshot);
const selectedValue = await page.evaluate(
() => document.querySelector('select')!.value,
);
assert.strictEqual(selectedValue, 'v2');
});
});
});

describe('drags', () => {
Expand Down