Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
15 changes: 13 additions & 2 deletions src/android.ts
Original file line number Diff line number Diff line change
Expand Up @@ -18,6 +18,11 @@ interface UiAutomatorXmlNode {
bounds?: string;
hint?: string;
focused?: string;
clickable?: string;
focusable?: string;
enabled?: string;
selected?: string;
package?: string;
"content-desc"?: string;
"resource-id"?: string;
}
Expand Down Expand Up @@ -298,9 +303,15 @@ export class AndroidRobot implements Robot {
}
}

if (node.text || node["content-desc"] || node.hint) {
// Include elements with text/labels OR clickable/focusable elements (like icons, buttons)
const hasTextOrLabel = node.text || node["content-desc"] || node.hint || node["resource-id"];
const isInteractive = node.clickable === "true" || node.focusable === "true" ||
(node.class && (node.class.includes("Button") || node.class.includes("ImageView") ||
node.class.includes("ImageButton") || node.class.includes("View")));

if (hasTextOrLabel || isInteractive) {
const element: ScreenElement = {
type: node.class || "text",
type: node.class || "element",
text: node.text,
label: node["content-desc"] || node.hint || "",
rect: this.getScreenElementRect(node),
Expand Down
61 changes: 61 additions & 0 deletions src/server.ts
Original file line number Diff line number Diff line change
Expand Up @@ -529,5 +529,66 @@ export const createMcpServer = (): McpServer => {
}
);

tool(
"mobile_tap_element",
"Find an element on screen by query and tap it. This combines list_elements and tap functionality.",
{
device: z.string().describe("The device identifier to use. Use mobile_list_available_devices to find which devices are available to you."),
query: z.string().describe("Search query to find the element (matches against text, label, name, value, or identifier)")
},
async ({ device, query }) => {
const robot = getRobotFromDevice(device);
const elements = await robot.getElementsOnScreen();

// Find all matching elements by searching text, label, name, value, and identifier
const matchingElements = elements.filter(element => {
const searchFields = [
element.text,
element.label,
element.name,
element.value,
element.identifier
].filter(field => field && field.trim() !== "");

return searchFields.some(field =>
field && field.toLowerCase().includes(query.toLowerCase())
);
});

if (matchingElements.length === 0) {
throw new ActionableError(`No element found matching query: "${query}". Available elements: ${elements.map(e => e.text || e.label || e.name || e.value || e.identifier).filter(t => t).join(", ")}`);
}

if (matchingElements.length > 1) {
const matchingElementsJson = matchingElements.map(element => ({
type: element.type,
text: element.text,
label: element.label,
name: element.name,
value: element.value,
identifier: element.identifier,
coordinates: {
x: element.rect.x + (element.rect.width / 2),
y: element.rect.y + (element.rect.height / 2)
},
rect: element.rect
}));

throw new ActionableError(`Multiple elements found matching query: "${query}". Found ${matchingElements.length} matches:\n${JSON.stringify(matchingElementsJson, null, 2)}`);
}

const matchingElement = matchingElements[0];

// Calculate center coordinates of the element
const centerX = matchingElement.rect.x + (matchingElement.rect.width / 2);
const centerY = matchingElement.rect.y + (matchingElement.rect.height / 2);

// Tap the element
await robot.tap(centerX, centerY);

return `Tapped element "${matchingElement.text || matchingElement.label || matchingElement.name || matchingElement.value || matchingElement.identifier}" at coordinates: ${centerX}, ${centerY}`;
}
);

return server;
};