diff --git a/src/McpContext.ts b/src/McpContext.ts index d1037935..5b29ce09 100644 --- a/src/McpContext.ts +++ b/src/McpContext.ts @@ -272,6 +272,10 @@ export class McpContext implements Context { return page.getDefaultNavigationTimeout(); } + getAXNodeByUid(uid: string) { + return this.#textSnapshot?.idToNode.get(uid); + } + async getElementByUid(uid: string): Promise> { if (!this.#textSnapshot?.idToNode.size) { throw new Error( @@ -326,19 +330,37 @@ export class McpContext implements Context { // will be used for the tree serialization and mapping ids back to nodes. let idCounter = 0; const idToNode = new Map(); - const assignIds = (node: SerializedAXNode): TextSnapshotNode => { + const assignIds = async ( + node: SerializedAXNode, + ): Promise => { const nodeWithId: TextSnapshotNode = { ...node, id: `${snapshotId}_${idCounter++}`, - children: node.children - ? node.children.map(child => assignIds(child)) - : [], + children: [], }; + + // The AXNode for an option doesn't contain its `value`. + // Therefore, set text content of the option as value. + if (node.role === 'option') { + const handle = await node.elementHandle(); + if (handle) { + const textContentHandle = await handle.getProperty('textContent'); + const optionText = await textContentHandle.jsonValue(); + if (optionText) { + nodeWithId.value = optionText.toString(); + } + } + } + + nodeWithId.children = node.children + ? await Promise.all(node.children.map(child => assignIds(child))) + : []; + idToNode.set(nodeWithId.id, nodeWithId); return nodeWithId; }; - const rootNodeWithId = assignIds(rootNode); + const rootNodeWithId = await assignIds(rootNode); this.#textSnapshot = { root: rootNodeWithId, snapshotId: String(snapshotId), diff --git a/src/tools/ToolDefinition.ts b/src/tools/ToolDefinition.ts index fe2fae7b..56fdb53a 100644 --- a/src/tools/ToolDefinition.ts +++ b/src/tools/ToolDefinition.ts @@ -7,6 +7,7 @@ import type {Dialog, ElementHandle, Page} from 'puppeteer-core'; import z from 'zod'; +import type {TextSnapshotNode} from '../McpContext.js'; import type {TraceResult} from '../trace-processing/parse.js'; import type {ToolCategories} from './categories.js'; @@ -68,6 +69,7 @@ export type Context = Readonly<{ closePage(pageIdx: number): Promise; setSelectedPageIdx(idx: number): void; getElementByUid(uid: string): Promise>; + getAXNodeByUid(uid: string): TextSnapshotNode | undefined; setNetworkConditions(conditions: string | null): void; setCpuThrottlingRate(rate: number): void; saveTemporaryFile( diff --git a/src/tools/input.ts b/src/tools/input.ts index eda04e80..1d595d0f 100644 --- a/src/tools/input.ts +++ b/src/tools/input.ts @@ -7,6 +7,8 @@ import type {ElementHandle} from 'puppeteer-core'; import z from 'zod'; +import type {McpContext} from '../McpContext.js'; + import {ToolCategories} from './categories.js'; import {defineTool} from './ToolDefinition.js'; @@ -78,6 +80,40 @@ export const hover = defineTool({ }, }); +async function fillFormElement( + uid: string, + value: string, + context: McpContext, +) { + const handle = await context.getElementByUid(uid); + try { + // The AXNode for an option doesn't contain its `value`. We set text content of the option as value. + // If the form is a combobox, we need to find the correct option by its text value. + // To do that, loop through the children while checking which child's text matches the requested value (requested value is actually the text content). + // When the correct option is found, use the element handle to get the real value. + const aXNode = context.getAXNodeByUid(uid); + if (aXNode && aXNode.role === 'combobox' && aXNode.children) { + for (const child of aXNode.children) { + if (child.role === 'option' && child.name === value && child.value) { + const childHandle = await child.elementHandle(); + if (childHandle) { + const childValueHandle = await childHandle.getProperty('value'); + const childValue = await childValueHandle.jsonValue(); + if (childValue) { + await handle.asLocator().fill(childValue.toString()); + } + break; + } + } + } + } else { + await handle.asLocator().fill(value); + } + } finally { + void handle.dispose(); + } +} + export const fill = defineTool({ name: 'fill', description: `Type text into a input, text area or select an option from a `, + ); + await context.createTextSnapshot(); + await fill.handler( + { + params: { + uid: '1_1', + value: 'two', + }, + }, + response, + context, + ); + assert.strictEqual( + response.responseLines[0], + 'Successfully filled out the element', + ); + assert.ok(response.includeSnapshot); + const selectedValue = await page.evaluate( + () => document.querySelector('select')!.value, + ); + assert.strictEqual(selectedValue, 'v2'); + }); + }); }); describe('drags', () => {