diff --git a/src/server.ts b/src/server.ts index 29a34ed..60612b6 100644 --- a/src/server.ts +++ b/src/server.ts @@ -1,6 +1,6 @@ #!/usr/bin/env node - -import { McpServer } from '@modelcontextprotocol/sdk/server/mcp.js'; +import pkg from '../package.json' with { type: 'json' }; +import { McpServer, ResourceTemplate } from '@modelcontextprotocol/sdk/server/mcp.js'; import { StdioServerTransport } from '@modelcontextprotocol/sdk/server/stdio.js'; import type { ToolDefinition } from './types/tool'; import type { ToolCallback } from '@modelcontextprotocol/sdk/server/mcp'; @@ -55,11 +55,10 @@ import { } from './tools/device.tool'; import { executeScriptTool, executeScriptToolDefinition } from './tools/execute-script.tool'; import { attachBrowserTool, attachBrowserToolDefinition } from './tools/attach-browser.tool'; +import { launchChromeTool, launchChromeToolDefinition } from './tools/launch-chrome.tool'; import { emulateDeviceTool, emulateDeviceToolDefinition } from './tools/emulate-device.tool'; -import pkg from '../package.json' with { type: 'json' }; -import { ResourceTemplate } from '@modelcontextprotocol/sdk/server/mcp.js'; import { withRecording } from './recording/step-recorder'; -import { buildSessionsIndex, buildCurrentSessionSteps, buildSessionStepsById } from './recording/resources'; +import { buildCurrentSessionSteps, buildSessionsIndex, buildSessionStepsById } from './recording/resources'; // IMPORTANT: Redirect all console output to stderr to avoid messing with MCP protocol (Chrome writes to console) const _originalConsoleLog = console.log; @@ -97,6 +96,7 @@ const registerTool = (definition: ToolDefinition, callback: ToolCallback) => registerTool(startBrowserToolDefinition, withRecording('start_browser', startBrowserTool)); registerTool(startAppToolDefinition, withRecording('start_app_session', startAppTool)); registerTool(closeSessionToolDefinition, closeSessionTool); +registerTool(launchChromeToolDefinition, withRecording('launch_chrome', launchChromeTool)); registerTool(attachBrowserToolDefinition, withRecording('attach_browser', attachBrowserTool)); registerTool(emulateDeviceToolDefinition, emulateDeviceTool); registerTool(navigateToolDefinition, withRecording('navigate', navigateTool)); @@ -159,7 +159,11 @@ server.registerResource( async () => { const payload = buildCurrentSessionSteps(); return { - contents: [{ uri: 'wdio://session/current/steps', mimeType: 'application/json', text: payload?.stepsJson ?? '{"error":"No active session"}' }], + contents: [{ + uri: 'wdio://session/current/steps', + mimeType: 'application/json', + text: payload?.stepsJson ?? '{"error":"No active session"}' + }], }; }, ); @@ -171,7 +175,11 @@ server.registerResource( async () => { const payload = buildCurrentSessionSteps(); return { - contents: [{ uri: 'wdio://session/current/code', mimeType: 'text/plain', text: payload?.generatedJs ?? '// No active session' }], + contents: [{ + uri: 'wdio://session/current/code', + mimeType: 'text/plain', + text: payload?.generatedJs ?? '// No active session' + }], }; }, ); @@ -183,7 +191,11 @@ server.registerResource( async (uri, { sessionId }) => { const payload = buildSessionStepsById(sessionId as string); return { - contents: [{ uri: uri.href, mimeType: 'application/json', text: payload?.stepsJson ?? `{"error":"Session not found: ${sessionId}"}` }], + contents: [{ + uri: uri.href, + mimeType: 'application/json', + text: payload?.stepsJson ?? `{"error":"Session not found: ${sessionId}"}` + }], }; }, ); @@ -195,7 +207,11 @@ server.registerResource( async (uri, { sessionId }) => { const payload = buildSessionStepsById(sessionId as string); return { - contents: [{ uri: uri.href, mimeType: 'text/plain', text: payload?.generatedJs ?? `// Session not found: ${sessionId}` }], + contents: [{ + uri: uri.href, + mimeType: 'text/plain', + text: payload?.generatedJs ?? `// Session not found: ${sessionId}` + }], }; }, ); diff --git a/src/tools/attach-browser.tool.ts b/src/tools/attach-browser.tool.ts index e299871..e576b05 100644 --- a/src/tools/attach-browser.tool.ts +++ b/src/tools/attach-browser.tool.ts @@ -9,67 +9,106 @@ export const attachBrowserToolDefinition: ToolDefinition = { name: 'attach_browser', description: `Attach to a Chrome instance already running with --remote-debugging-port. -Start Chrome first (quit any running Chrome instance before launching): - - macOS — with real profile (preserves extensions, cookies, logins): - pkill -x "Google Chrome" && sleep 1 - /Applications/Google Chrome.app/Contents/MacOS/Google Chrome --remote-debugging-port=9222 --user-data-dir="$HOME/Library/Application Support/Google/Chrome" --profile-directory=Default & - - macOS — with fresh profile (lightweight, no extensions): - pkill -x "Google Chrome" && sleep 1 - /Applications/Google Chrome.app/Contents/MacOS/Google Chrome --remote-debugging-port=9222 --user-data-dir=/tmp/chrome-debug & - - Linux — with real profile: - google-chrome --remote-debugging-port=9222 --user-data-dir="$HOME/.config/google-chrome" --profile-directory=Default & - - Linux — with fresh profile: - google-chrome --remote-debugging-port=9222 --user-data-dir=/tmp/chrome-debug & - -Verify Chrome is ready: curl http://localhost:9222/json/version - -Then call attach_browser() to hand control to the AI. All other tools (navigate, click, get_visible_elements, etc.) will work on the attached session. Use close_session() to detach without closing Chrome.`, +Use launch_chrome() first to prepare and launch Chrome with remote debugging enabled.`, inputSchema: { port: z.number().default(9222).describe('Chrome remote debugging port (default: 9222)'), host: z.string().default('localhost').describe('Host where Chrome is running (default: localhost)'), - userDataDir: z.string().default('/tmp/chrome-debug').describe('Chrome user data directory — must match the --user-data-dir used when launching Chrome. Use your real profile path (e.g. "$HOME/Library/Application Support/Google/Chrome") to preserve extensions and logins, or /tmp/chrome-debug for a fresh profile (default: /tmp/chrome-debug)'), navigationUrl: z.string().optional().describe('URL to navigate to immediately after attaching'), }, }; -async function getActiveTabUrl(host: string, port: number): Promise { +type TabSnapshot = { activeTabUrl: string | undefined; allTabUrls: string[] }; + +// ChromeDriver injects a BiDi-CDP Mapper page when creating a session. If the previous session +// was detached without proper cleanup, this target remains and causes "unexpected alert open" on +// the next attach attempt. Close any stale mappers before creating a new session. +// Returns the active tab URL (first real page tab) and all page tab URLs — Chrome lists the +// active/focused tab first in /json. +async function closeStaleMappers(host: string, port: number): Promise { try { const res = await fetch(`http://${host}:${port}/json`); - const tabs = await res.json() as { type: string; url: string }[]; - const page = tabs.find((t) => t.type === 'page' && t.url && !t.url.startsWith('devtools://')); - return page?.url ?? null; + const targets = await res.json() as { id: string; title: string; type: string; url: string }[]; + const mappers = targets.filter((t) => t.title?.includes('BiDi')); + await Promise.all(mappers.map((t) => fetch(`http://${host}:${port}/json/close/${t.id}`))); + const pages = targets.filter((t) => t.type === 'page' && !t.title?.includes('BiDi')); + return { activeTabUrl: pages[0]?.url, allTabUrls: pages.map((t) => t.url) }; } catch { - return null; + return { activeTabUrl: undefined, allTabUrls: [] }; + } +} + +// After CDP session init, Chrome blanks the first tab it takes over. This restores any tabs +// that became about:blank and then switches focus to the originally active tab. +async function restoreAndSwitchToActiveTab( + browser: WebdriverIO.Browser, + activeTabUrl: string, + allTabUrls: string[], +): Promise { + const handles = await browser.getWindowHandles(); + const currentUrls: string[] = []; + for (const handle of handles) { + await browser.switchToWindow(handle); + currentUrls.push(await browser.getUrl()); + } + + // Restore blank tabs that had a known URL before attaching. + const missingUrls = allTabUrls.filter((u) => !currentUrls.includes(u)); + let missingIdx = 0; + for (let i = 0; i < handles.length; i++) { + if (currentUrls[i] === 'about:blank' && missingIdx < missingUrls.length) { + await browser.switchToWindow(handles[i]); + await browser.url(missingUrls[missingIdx]); + currentUrls[i] = missingUrls[missingIdx++]; + } + } + + // Switch to the originally active tab. + for (let i = 0; i < handles.length; i++) { + if (currentUrls[i] === activeTabUrl) { + await browser.switchToWindow(handles[i]); + break; + } + } +} + +async function waitForCDP(host: string, port: number, timeoutMs = 10000): Promise { + const deadline = Date.now() + timeoutMs; + while (Date.now() < deadline) { + try { + const res = await fetch(`http://${host}:${port}/json/version`); + if (res.ok) return; + } catch { + // not ready yet + } + await new Promise((r) => setTimeout(r, 300)); } + throw new Error(`Chrome did not expose CDP on ${host}:${port} within ${timeoutMs}ms`); } export const attachBrowserTool: ToolCallback = async ({ port = 9222, host = 'localhost', - userDataDir = '/tmp/chrome-debug', navigationUrl, }: { port?: number; host?: string; - userDataDir?: string; navigationUrl?: string; }): Promise => { try { const state = (getBrowser as any).__state; - // Capture the active tab URL before WebDriver blanks it - const activeUrl = navigationUrl ?? await getActiveTabUrl(host, port); + await waitForCDP(host, port); + const { activeTabUrl, allTabUrls } = await closeStaleMappers(host, port); const browser = await remote({ + connectionRetryTimeout: 30000, + connectionRetryCount: 3, capabilities: { browserName: 'chrome', + unhandledPromptBehavior: 'dismiss', + webSocketUrl: false, 'goog:chromeOptions': { debuggerAddress: `${host}:${port}`, - args: [`--user-data-dir=${userDataDir}`], }, }, }); @@ -90,14 +129,15 @@ export const attachBrowserTool: ToolCallback = async ({ browserName: 'chrome', 'goog:chromeOptions': { debuggerAddress: `${host}:${port}`, - args: [`--user-data-dir=${userDataDir}`], }, }, steps: [], }); - if (activeUrl) { - await browser.url(activeUrl); + if (navigationUrl) { + await browser.url(navigationUrl); + } else if (activeTabUrl) { + await restoreAndSwitchToActiveTab(browser, activeTabUrl, allTabUrls); } const title = await browser.getTitle(); diff --git a/src/tools/launch-chrome.tool.ts b/src/tools/launch-chrome.tool.ts new file mode 100644 index 0000000..d03d42f --- /dev/null +++ b/src/tools/launch-chrome.tool.ts @@ -0,0 +1,147 @@ +import { spawn } from 'node:child_process'; +import { copyFileSync, cpSync, existsSync, mkdirSync, rmSync, writeFileSync } from 'node:fs'; +import { homedir, platform, tmpdir } from 'node:os'; +import { join } from 'node:path'; +import type { ToolCallback } from '@modelcontextprotocol/sdk/server/mcp'; +import type { CallToolResult } from '@modelcontextprotocol/sdk/types'; +import type { ToolDefinition } from '../types/tool'; +import { z } from 'zod'; + +const USER_DATA_DIR = join(tmpdir(), 'chrome-debug'); + +export const launchChromeToolDefinition: ToolDefinition = { + name: 'launch_chrome', + description: `Prepares and launches Chrome with remote debugging enabled so attach_browser() can connect. + +Two modes: + + newInstance (default): Opens a Chrome window alongside your existing one using a separate + profile dir. Your current Chrome session is untouched. + + freshSession: Launches Chrome with an empty profile (no cookies, no logins). + +Use copyProfileFiles: true to carry over your cookies and logins into the debug session. +Note: changes made during the session won't sync back to your main profile. + +After this tool succeeds, call attach_browser() to connect.`, + inputSchema: { + port: z.number().default(9222).describe('Remote debugging port (default: 9222)'), + mode: z.enum(['newInstance', 'freshSession']).default('newInstance').describe( + 'newInstance: open alongside existing Chrome | freshSession: clean profile' + ), + copyProfileFiles: z.boolean().default(false).describe( + 'Copy your Default Chrome profile (cookies, logins) into the debug session.' + ), + }, +}; + +function isMac(): boolean { + return platform() === 'darwin'; +} + +function chromeExec(): string { + if (isMac()) return '/Applications/Google Chrome.app/Contents/MacOS/Google Chrome'; + if (platform() === 'win32') { + const candidates = [ + join('C:', 'Program Files', 'Google', 'Chrome', 'Application', 'chrome.exe'), + join('C:', 'Program Files (x86)', 'Google', 'Chrome', 'Application', 'chrome.exe'), + ]; + return candidates.find((p) => existsSync(p)) ?? candidates[0]; + } + return 'google-chrome'; +} + +function defaultProfileDir(): string { + const home = homedir(); + if (isMac()) return join(home, 'Library', 'Application Support', 'Google', 'Chrome'); + if (platform() === 'win32') return join(home, 'AppData', 'Local', 'Google', 'Chrome', 'User Data'); + return join(home, '.config', 'google-chrome'); +} + +function copyProfile(): void { + const srcDir = defaultProfileDir(); + rmSync(USER_DATA_DIR, { recursive: true, force: true }); + mkdirSync(USER_DATA_DIR, { recursive: true }); + copyFileSync(join(srcDir, 'Local State'), join(USER_DATA_DIR, 'Local State')); + cpSync(join(srcDir, 'Default'), join(USER_DATA_DIR, 'Default'), { recursive: true }); + + // Remove singleton/lock files from the source Chrome instance. + for (const f of ['SingletonLock', 'SingletonCookie', 'SingletonSocket']) { + rmSync(join(USER_DATA_DIR, f), { force: true }); + } + + // Remove session files — they reference the original profile's state and trigger + // "Something went wrong when opening your profile" when Chrome opens the copy. + for (const f of ['Current Session', 'Current Tabs', 'Last Session', 'Last Tabs']) { + rmSync(join(USER_DATA_DIR, 'Default', f), { force: true }); + } + + // First Run sentinel tells Chrome this is a fresh start — suppresses first-run dialogs. + writeFileSync(join(USER_DATA_DIR, 'First Run'), ''); +} + +function launchChrome(port: number): void { + spawn(chromeExec(), [ + `--remote-debugging-port=${port}`, + `--user-data-dir=${USER_DATA_DIR}`, + '--profile-directory=Default', + '--no-first-run', + '--disable-session-crashed-bubble', + ], { detached: true, stdio: 'ignore' }).unref(); +} + +async function waitForCDP(port: number, timeoutMs = 15000): Promise { + const deadline = Date.now() + timeoutMs; + while (Date.now() < deadline) { + try { + const res = await fetch(`http://localhost:${port}/json/version`); + if (res.ok) return; + } catch { + // not ready yet + } + await new Promise((r) => setTimeout(r, 300)); + } + throw new Error(`Chrome did not expose CDP on port ${port} within ${timeoutMs}ms`); +} + +export const launchChromeTool: ToolCallback = async ({ + port = 9222, + mode = 'newInstance', + copyProfileFiles = false, +}: { + port?: number; + mode?: 'newInstance' | 'freshSession'; + copyProfileFiles?: boolean; +}): Promise => { + const warnings: string[] = []; + const notes: string[] = []; + + try { + if (copyProfileFiles) { + warnings.push('⚠️ Cookies and logins were copied at this moment. Changes during this session won\'t sync back to your main profile.'); + copyProfile(); + } else { + notes.push(mode === 'newInstance' + ? 'No profile copied — this instance starts with no cookies or logins.' + : 'Fresh profile — no existing cookies or logins.'); + rmSync(USER_DATA_DIR, { recursive: true, force: true }); + mkdirSync(USER_DATA_DIR, { recursive: true }); + } + + launchChrome(port); + await waitForCDP(port); + + const lines = [ + `Chrome launched on port ${port} (mode: ${mode}).`, + ...warnings, + ...notes, + ]; + + return { content: [{ type: 'text', text: lines.join('\n') }] }; + } catch (e) { + return { + isError: true, + content: [{ type: 'text', text: `Error launching Chrome: ${e}` }], + }; + } +}; \ No newline at end of file diff --git a/tests/tools/attach-browser-tool.test.ts b/tests/tools/attach-browser-tool.test.ts index 5783a9f..71f63a7 100644 --- a/tests/tools/attach-browser-tool.test.ts +++ b/tests/tools/attach-browser-tool.test.ts @@ -1,7 +1,8 @@ import { beforeEach, describe, expect, it, vi } from 'vitest'; -// Stub fetch so getActiveTabUrl doesn't make real network requests +// Stub fetch so getActiveTabUrl / closeStaleMappers / waitForCDP don't make real network requests vi.stubGlobal('fetch', vi.fn().mockResolvedValue({ + ok: true, json: vi.fn().mockResolvedValue([{ type: 'page', url: 'https://example.com' }]), })); @@ -11,6 +12,8 @@ const mockBrowser = vi.hoisted(() => ({ getTitle: vi.fn().mockResolvedValue('My App'), url: vi.fn().mockResolvedValue(undefined), getUrl: vi.fn().mockResolvedValue('https://example.com'), + getWindowHandles: vi.fn().mockResolvedValue(['handle-1']), + switchToWindow: vi.fn().mockResolvedValue(undefined), })); vi.mock('webdriverio', () => ({ @@ -38,7 +41,8 @@ import { getBrowser } from '../../src/tools/browser.tool'; import { attachBrowserTool } from '../../src/tools/attach-browser.tool'; type ToolFn = (args: Record) => Promise<{ content: { text: string }[] }>; -const callTool = attachBrowserTool as unknown as ToolFn; +const callTool = (args: Record = {}) => + (attachBrowserTool as unknown as ToolFn)(args); const mockRemote = remote as ReturnType; @@ -50,39 +54,37 @@ beforeEach(() => { state.sessionHistory.clear(); state.currentSession = null; mockRemote.mockResolvedValue(mockBrowser); + vi.stubGlobal('fetch', vi.fn().mockResolvedValue({ + ok: true, + json: vi.fn().mockResolvedValue([{ type: 'page', url: 'https://example.com' }]), + })); }); describe('attach_browser', () => { it('calls remote() with debuggerAddress using default port 9222', async () => { - await callTool({}); + await callTool(); expect(mockRemote).toHaveBeenCalledWith({ + connectionRetryTimeout: 30000, + connectionRetryCount: 3, capabilities: expect.objectContaining({ browserName: 'chrome', - 'goog:chromeOptions': { debuggerAddress: 'localhost:9222', args: ['--user-data-dir=/tmp/chrome-debug'] }, + unhandledPromptBehavior: 'dismiss', + 'goog:chromeOptions': { debuggerAddress: 'localhost:9222' }, }), }); }); it('uses provided host and port', async () => { await callTool({ host: '192.168.1.1', port: 9333 }); - expect(mockRemote).toHaveBeenCalledWith({ + expect(mockRemote).toHaveBeenCalledWith(expect.objectContaining({ capabilities: expect.objectContaining({ 'goog:chromeOptions': expect.objectContaining({ debuggerAddress: '192.168.1.1:9333' }), }), - }); - }); - - it('uses provided userDataDir', async () => { - await callTool({ userDataDir: '/custom/profile' }); - expect(mockRemote).toHaveBeenCalledWith({ - capabilities: expect.objectContaining({ - 'goog:chromeOptions': expect.objectContaining({ args: ['--user-data-dir=/custom/profile'] }), - }), - }); + })); }); it('registers session in state with isAttached: true', async () => { - await callTool({}); + await callTool(); const state = (getBrowser as any).__state; expect(state.currentSession).toBe('attached-session-id'); expect(state.sessionMetadata.get('attached-session-id')).toMatchObject({ @@ -92,7 +94,7 @@ describe('attach_browser', () => { }); it('returns session id, title and url', async () => { - const result = await callTool({}); + const result = await callTool(); const text = result.content[0].text; expect(text).toContain('attached-session-id'); expect(text).toContain('My App'); @@ -102,17 +104,80 @@ describe('attach_browser', () => { it('navigates to navigationUrl if provided', async () => { await callTool({ navigationUrl: 'https://app.example.com' }); expect(mockBrowser.url).toHaveBeenCalledWith('https://app.example.com'); + expect(mockBrowser.switchToWindow).not.toHaveBeenCalled(); + }); + + it('switches to the active tab (from /json) when no navigationUrl', async () => { + vi.stubGlobal('fetch', vi.fn().mockResolvedValue({ + ok: true, + json: vi.fn().mockResolvedValue([ + { type: 'page', url: 'https://active.example.com', title: 'Active', id: 't1' }, + { type: 'page', url: 'https://other.example.com', title: 'Other', id: 't2' }, + ]), + })); + mockBrowser.getWindowHandles.mockResolvedValue(['handle-1', 'handle-2']); + mockBrowser.getUrl + .mockResolvedValueOnce('https://other.example.com') // handle-1 + .mockResolvedValueOnce('https://active.example.com') // handle-2 — match + .mockResolvedValue('https://active.example.com'); // subsequent calls + + await callTool(); + + expect(mockBrowser.switchToWindow).toHaveBeenCalledWith('handle-1'); + expect(mockBrowser.switchToWindow).toHaveBeenCalledWith('handle-2'); + expect(mockBrowser.url).not.toHaveBeenCalled(); + }); + + it('restores single blanked tab when remote() blanks it during session init', async () => { + vi.stubGlobal('fetch', vi.fn().mockResolvedValue({ + ok: true, + json: vi.fn().mockResolvedValue([ + { type: 'page', url: 'https://active.example.com', title: 'Active', id: 't1' }, + ]), + })); + mockBrowser.getWindowHandles.mockResolvedValue(['handle-1']); + mockBrowser.getUrl + .mockResolvedValueOnce('about:blank') // scan: handle-1 blanked + .mockResolvedValue('https://active.example.com'); // final getUrl for result + + await callTool(); + + expect(mockBrowser.url).toHaveBeenCalledWith('https://active.example.com'); + expect(mockBrowser.switchToWindow).toHaveBeenCalledWith('handle-1'); + }); + + it('restores blanked first tab and switches to it when multiple tabs exist', async () => { + vi.stubGlobal('fetch', vi.fn().mockResolvedValue({ + ok: true, + json: vi.fn().mockResolvedValue([ + { type: 'page', url: 'https://active.example.com', title: 'Active', id: 't1' }, + { type: 'page', url: 'https://other.example.com', title: 'Other', id: 't2' }, + ]), + })); + mockBrowser.getWindowHandles.mockResolvedValue(['handle-1', 'handle-2']); + mockBrowser.getUrl + .mockResolvedValueOnce('about:blank') // scan: handle-1 blanked by CDP + .mockResolvedValueOnce('https://other.example.com') // scan: handle-2 intact + .mockResolvedValue('https://active.example.com'); // final getUrl for result + + await callTool(); + + // Restores handle-1 to the active URL + expect(mockBrowser.url).toHaveBeenCalledWith('https://active.example.com'); + // Switches to handle-1 (originally active tab, now restored) + const calls = mockBrowser.switchToWindow.mock.calls.map((c: unknown[]) => c[0]); + expect(calls[calls.length - 1]).toBe('handle-1'); }); it('initialises sessionHistory with constructed caps and empty steps', async () => { - await callTool({ host: 'myhost', port: 9333, userDataDir: '/my/profile' }); + await callTool({ host: 'myhost', port: 9333 }); const state = (getBrowser as any).__state; const history = state.sessionHistory.get('attached-session-id'); expect(history).toBeDefined(); expect(history.steps).toEqual([]); expect(history.capabilities).toMatchObject({ browserName: 'chrome', - 'goog:chromeOptions': { debuggerAddress: 'myhost:9333', args: ['--user-data-dir=/my/profile'] }, + 'goog:chromeOptions': { debuggerAddress: 'myhost:9333' }, }); });