Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
5 changes: 5 additions & 0 deletions .changeset/swift-jokes-write.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,5 @@
---
"@browserbasehq/stagehand": patch
---

improved handling for OS level dropdowns
12 changes: 12 additions & 0 deletions evals/evals.config.json
Original file line number Diff line number Diff line change
Expand Up @@ -382,6 +382,18 @@
{
"name": "shadow_dom",
"categories": ["act"]
},
{
"name": "os_dropdown",
"categories": ["act"]
},
{
"name": "custom_dropdown",
"categories": ["act"]
},
{
"name": "hidden_input_dropdown",
"categories": ["act"]
}
]
}
60 changes: 60 additions & 0 deletions evals/tasks/custom_dropdown.ts
Original file line number Diff line number Diff line change
@@ -0,0 +1,60 @@
import { EvalFunction } from "@/types/evals";

export const custom_dropdown: EvalFunction = async ({
debugUrl,
sessionUrl,
stagehand,
logger,
}) => {
/**
* This eval is meant to test whether we do not incorrectly attempt
* the selectOptionFromDropdown method (defined in actHandlerUtils.ts) on a
* 'dropdown' that is not a <select> element.
*
* This kind of dropdown must be clicked to be expanded before being interacted
* with.
*/

try {
const page = stagehand.page;
await page.goto(
"https://browserbase.github.io/stagehand-eval-sites/sites/expand-dropdown/",
);

await page.act("click the 'Select a Country' dropdown");

// we are expecting stagehand to click the dropdown to expand it,
// and therefore the available options should now be contained in the full
// a11y tree.

// to test, we'll grab the full a11y tree, and make sure it contains 'Canada'
const extraction = await page.extract();
const fullTree = extraction.page_text;

if (fullTree.includes("Canada")) {
return {
_success: true,
debugUrl,
sessionUrl,
logs: logger.getLogs(),
};
}
return {
_success: false,
message: "unable to expand the dropdown",
debugUrl,
sessionUrl,
logs: logger.getLogs(),
};
} catch (error) {
return {
_success: false,
message: `error attempting to select an option from the dropdown: ${error.message}`,
debugUrl,
sessionUrl,
logs: logger.getLogs(),
};
} finally {
await stagehand.close();
}
};
60 changes: 60 additions & 0 deletions evals/tasks/hidden_input_dropdown.ts
Original file line number Diff line number Diff line change
@@ -0,0 +1,60 @@
import { EvalFunction } from "@/types/evals";

export const hidden_input_dropdown: EvalFunction = async ({
debugUrl,
sessionUrl,
stagehand,
logger,
}) => {
/**
* This eval is meant to test whether we do not incorrectly attempt
* the selectOptionFromDropdown method (defined in actHandlerUtils.ts) on a
* hidden input 'dropdown'.
*
* This kind of dropdown must be clicked to be expanded before being interacted
* with.
*/

try {
const page = stagehand.page;
await page.goto(
"https://browserbase.github.io/stagehand-eval-sites/sites/hidden-input-dropdown/",
);

await page.act("click to expand the 'Favourite Colour' dropdown");

// we are expecting stagehand to click the dropdown to expand it,
// and therefore the available options should now be contained in the full
// a11y tree.

// to test, we'll grab the full a11y tree, and make sure it contains 'Green'
const extraction = await page.extract();
const fullTree = extraction.page_text;

if (fullTree.includes("Green")) {
return {
_success: true,
debugUrl,
sessionUrl,
logs: logger.getLogs(),
};
}
return {
_success: false,
message: "unable to expand the dropdown",
debugUrl,
sessionUrl,
logs: logger.getLogs(),
};
} catch (error) {
return {
_success: false,
message: `error attempting click to expand the dropdown: ${error.message}`,
debugUrl,
sessionUrl,
logs: logger.getLogs(),
};
} finally {
await stagehand.close();
}
};
55 changes: 55 additions & 0 deletions evals/tasks/os_dropdown.ts
Original file line number Diff line number Diff line change
@@ -0,0 +1,55 @@
import { EvalFunction } from "@/types/evals";

export const os_dropdown: EvalFunction = async ({
debugUrl,
sessionUrl,
stagehand,
logger,
}) => {
/**
* This eval is meant to test whether we can correctly select an element
* from an OS level dropdown
*/

try {
const page = stagehand.page;
await page.goto(
"https://browserbase.github.io/stagehand-eval-sites/sites/nested-dropdown/",
);

await page.act(
"choose 'Smog Check Technician' from the 'License Type' dropdown",
);
const selectedOption = await page
.locator(
"xpath=/html/body/form/div[1]/div[3]/article/div[2]/div[1]/select[2] >> option:checked",
)
.textContent();

if (selectedOption === "Smog Check Technician") {
return {
_success: true,
debugUrl,
sessionUrl,
logs: logger.getLogs(),
};
}
return {
_success: false,
message: "incorrect option selected from the dropdown",
debugUrl,
sessionUrl,
logs: logger.getLogs(),
};
} catch (error) {
return {
_success: false,
message: `error attempting to select an option from the dropdown: ${error.message}`,
debugUrl,
sessionUrl,
logs: logger.getLogs(),
};
} finally {
await stagehand.close();
}
};
8 changes: 8 additions & 0 deletions lib/a11y/utils.ts
Original file line number Diff line number Diff line change
Expand Up @@ -303,6 +303,14 @@ async function cleanStructuralNodes(
if (tagName) node.role = tagName;
}

if (
node.role === "combobox" &&
node.encodedId !== undefined &&
tagNameMap[node.encodedId] === "select"
) {
node.role = "select";
}

// 5. drop redundant StaticText children
const pruned = removeRedundantStaticTextChildren(node, cleanedChildren);
if (!pruned.length && (node.role === "generic" || node.role === "none")) {
Expand Down
21 changes: 21 additions & 0 deletions lib/handlers/handlerUtils/actHandlerUtils.ts
Original file line number Diff line number Diff line change
Expand Up @@ -59,6 +59,7 @@ export const methodHandlerMap: Record<
click: clickElement,
nextChunk: scrollToNextChunk,
prevChunk: scrollToPreviousChunk,
selectOptionFromDropdown: selectOption,
};

export async function scrollToNextChunk(ctx: MethodHandlerContext) {
Expand Down Expand Up @@ -349,6 +350,26 @@ export async function pressKey(ctx: MethodHandlerContext) {
}
}

export async function selectOption(ctx: MethodHandlerContext) {
const { locator, xpath, args, logger } = ctx;
try {
const text = args[0]?.toString() || "";
await locator.selectOption(text, { timeout: 5000 });
} catch (e) {
logger({
category: "action",
message: "error selecting option",
level: 0,
auxiliary: {
error: { value: e.message, type: "string" },
trace: { value: e.stack, type: "string" },
xpath: { value: xpath, type: "string" },
},
});
throw new PlaywrightCommandException(e.message);
}
}

export async function clickElement(ctx: MethodHandlerContext) {
const {
locator,
Expand Down
4 changes: 3 additions & 1 deletion lib/prompt.ts
Original file line number Diff line number Diff line change
Expand Up @@ -159,7 +159,9 @@ export function buildActObservePrompt(
ONLY return one action. If multiple actions are relevant, return the most relevant one.
If the user is asking to scroll to a position on the page, e.g., 'halfway' or 0.75, etc, you must return the argument formatted as the correct percentage, e.g., '50%' or '75%', etc.
If the user is asking to scroll to the next chunk/previous chunk, choose the nextChunk/prevChunk method. No arguments are required here.
If the action implies a key press, e.g., 'press enter', 'press a', 'press space', etc., always choose the press method with the appropriate key as argument — e.g. 'a', 'Enter', 'Space'. Do not choose a click action on an on-screen keyboard. Capitalize the first character like 'Enter', 'Tab', 'Escape' only for special keys.`;
If the action implies a key press, e.g., 'press enter', 'press a', 'press space', etc., always choose the press method with the appropriate key as argument — e.g. 'a', 'Enter', 'Space'. Do not choose a click action on an on-screen keyboard. Capitalize the first character like 'Enter', 'Tab', 'Escape' only for special keys.
If the action implies choosing an option from a dropdown, AND the corresponding element is a 'select' element, choose the selectOptionFromDropdown method. The argument should be the text of the option to select.
If the action implies choosing an option from a dropdown, and the corresponding element is NOT a 'select' element, choose the click method.`;

// Add variable names (not values) to the instruction if any
if (variables && Object.keys(variables).length > 0) {
Expand Down
1 change: 1 addition & 0 deletions types/act.ts
Original file line number Diff line number Diff line change
Expand Up @@ -39,6 +39,7 @@ export enum SupportedPlaywrightAction {
SCROLL = "scrollTo",
NEXT_CHUNK = "nextChunk",
PREV_CHUNK = "prevChunk",
SELECT_OPTION_FROM_DROPDOWN = "selectOptionFromDropdown",
}

/**
Expand Down
Loading