Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
749 changes: 745 additions & 4 deletions packages/test/src/test/ai-provider/WebBrowserProvider.test.ts

Large diffs are not rendered by default.

37 changes: 35 additions & 2 deletions providers/chrome-ai/src/ai/WebBrowserProvider.ts
Original file line number Diff line number Diff line change
Expand Up @@ -12,9 +12,15 @@ import type {
} from "@workglow/ai/worker";
import { AiProvider } from "@workglow/ai/worker";
import {
CONSERVATIVE_PROBED_CAPABILITIES,
inferWebBrowserCapabilities,
webBrowserWorkerRunFnSpecs,
} from "./common/WebBrowser_Capabilities";
import {
probeWebBrowserCapabilities,
type WebBrowserProbeFactory,
type WebBrowserProbedCapabilities,
} from "./common/WebBrowser_CapabilityProbe";
import { WEB_BROWSER } from "./common/WebBrowser_Constants";
import type { WebBrowserModelConfig } from "./common/WebBrowser_ModelSchema";
import { deleteChromeSession } from "./common/WebBrowser_Sessions";
Expand All @@ -32,6 +38,15 @@ export class WebBrowserProvider extends AiProvider<WebBrowserModelConfig> {
readonly isLocal = true;
readonly supportsBrowser = true;

/**
* Result of {@link probeWebBrowserCapabilities}. Until the probe resolves
* we report the conservative subset (no `json-mode`, no `tool-use`) so we
* never advertise a capability a downstream task can't fulfil. Callers
* that need the final answer should await {@link ready}.
*/
private probedCaps: WebBrowserProbedCapabilities = CONSERVATIVE_PROBED_CAPABILITIES;
private readonly probeReady: Promise<void>;

constructor(
promiseRunFns?: readonly AiProviderRunFnRegistration<
// eslint-disable-next-line @typescript-eslint/no-explicit-any
Expand All @@ -44,13 +59,31 @@ export class WebBrowserProvider extends AiProvider<WebBrowserModelConfig> {
string,
// eslint-disable-next-line @typescript-eslint/no-explicit-any
AiProviderPreviewRunFn<any, any, WebBrowserModelConfig>
>
>,
/**
* Test seam: injectable probe factory. Production callers leave this
* undefined so the probe resolves against the real `LanguageModel`
* global.
*/
probeFactory?: WebBrowserProbeFactory
) {
super(promiseRunFns, previewTasks);
this.probeReady = probeWebBrowserCapabilities(probeFactory).then((result) => {
this.probedCaps = result;
});
}

/**
* Resolves once the capability probe has completed. After this point
* {@link inferCapabilities} reflects what the browser actually supports.
* Before this point it returns the conservative subset.
*/
ready(): Promise<void> {
return this.probeReady;
}

override inferCapabilities(model: ModelRecord): readonly Capability[] {
return inferWebBrowserCapabilities(model);
return inferWebBrowserCapabilities(model, this.probedCaps);
}

protected override workerRunFnSpecs(): readonly { serves: readonly Capability[] }[] {
Expand Down
67 changes: 57 additions & 10 deletions providers/chrome-ai/src/ai/common/WebBrowser_Capabilities.ts
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,12 @@
*/

import type { Capability, ModelRecord } from "@workglow/ai/worker";
import {
CONSERVATIVE_PROBED_CAPABILITIES,
probeWebBrowserCapabilities,
type WebBrowserProbeFactory,
type WebBrowserProbedCapabilities,
} from "./WebBrowser_CapabilityProbe";
import { WEB_BROWSER_CAPABILITY_SETS } from "./WebBrowser_CapabilitySets";

export const WEB_BROWSER_RUN_FN_SPECS = WEB_BROWSER_CAPABILITY_SETS.map((serves) => ({ serves }));
Expand All @@ -17,15 +23,41 @@ export function webBrowserWorkerRunFnSpecs(): readonly {

type CapabilityHints = Pick<ModelRecord, "model_id" | "provider_config" | "capabilities">;

/**
* Capabilities the `chrome-prompt`/`gemini-nano` family advertises
* *unconditionally*. `json-mode` and `tool-use` are gated separately because
* the `responseConstraint` and `tools` options on `LanguageModel.create` /
* `prompt` aren't universally supported across Chrome builds and channels.
*/
const PROMPT_BASE_CAPABILITIES = [
"text.generation",
"text.rewriter",
"text.summary",
"model.info",
"model.search",
] as const satisfies readonly Capability[];

/**
* Heuristic capability inference for Chrome Built-in AI {@link ModelRecord}.
*
* Chrome's Built-in AI exposes a fixed set of feature APIs identified by
* model_id (e.g. `chrome-prompt`, `chrome-summarizer`, `chrome-rewriter`,
* `chrome-translator`, `chrome-language-detector`). Declared capabilities
* win; fallback maps the canonical chrome-* prefixes.
*
* `json-mode` and `tool-use` are conditional on browser support — pass a
* known {@link WebBrowserProbedCapabilities} result via `probed` to gate
* them. Defaults to `{jsonMode: true, toolUse: true}` for back-compat with
* callers that haven't adopted the probe yet; new callers should pass
* either the probe result or `CONSERVATIVE_PROBED_CAPABILITIES`.
*
* For an async variant that drives the probe automatically see
* {@link inferWebBrowserCapabilitiesAsync}.
*/
export function inferWebBrowserCapabilities(model: CapabilityHints): readonly Capability[] {
export function inferWebBrowserCapabilities(
model: CapabilityHints,
probed: WebBrowserProbedCapabilities = { jsonMode: true, toolUse: true }
): readonly Capability[] {
const declared = (model.capabilities as readonly Capability[] | undefined) ?? [];
if (declared.length > 0) return declared;

Expand All @@ -37,15 +69,14 @@ export function inferWebBrowserCapabilities(model: CapabilityHints): readonly Ca
const baseName = id.toLowerCase();

if (/prompt|gemini[-_]?nano/.test(baseName)) {
return [
"text.generation",
"json-mode",
"tool-use",
"text.rewriter",
"text.summary",
"model.info",
"model.search",
];
const caps: Capability[] = [...PROMPT_BASE_CAPABILITIES];
if (probed.jsonMode) caps.splice(1, 0, "json-mode");
if (probed.toolUse) {
// Insert tool-use after json-mode (if present) for stable test ordering.
const insertAt = probed.jsonMode ? 2 : 1;
caps.splice(insertAt, 0, "tool-use");
}
return caps;
}
if (/summariz/.test(baseName)) {
return ["text.summary", "model.info", "model.search"];
Expand All @@ -62,3 +93,19 @@ export function inferWebBrowserCapabilities(model: CapabilityHints): readonly Ca

return ["model.search", "model.info"];
}

/**
* Probe-driven variant of {@link inferWebBrowserCapabilities}. Resolves the
* probed capability set (cached) before returning, so the result reflects
* the real browser surface rather than assuming both `json-mode` and
* `tool-use` are present.
*/
export async function inferWebBrowserCapabilitiesAsync(
model: CapabilityHints,
factory?: WebBrowserProbeFactory
): Promise<readonly Capability[]> {
const probed = await probeWebBrowserCapabilities(factory);
return inferWebBrowserCapabilities(model, probed);
}

export { CONSERVATIVE_PROBED_CAPABILITIES };
152 changes: 152 additions & 0 deletions providers/chrome-ai/src/ai/common/WebBrowser_CapabilityProbe.ts
Original file line number Diff line number Diff line change
@@ -0,0 +1,152 @@
/**
* @license
* Copyright 2026 Steven Roussey <sroussey@gmail.com>
* SPDX-License-Identifier: Apache-2.0
*/

import { getApi } from "./WebBrowser_ChromeHelpers";

/**
* Result of probing Chrome Built-in AI's `LanguageModel` for the optional
* capabilities our run-fns rely on. Chrome's surface evolves and some flags
* (json-mode via `responseConstraint`, tool-use via `tools`) are not
* universally available — feature detection is the only reliable signal.
*/
export interface WebBrowserProbedCapabilities {
readonly jsonMode: boolean;
readonly toolUse: boolean;
}

/**
* Minimal subset of the `LanguageModel` static surface the probe touches.
* Declared here so tests can pass a fake factory without depending on the
* `@types/dom-chromium-ai` ambient globals.
*/
export interface WebBrowserProbeFactory {
create(options?: unknown): Promise<{ destroy(): unknown }>;
params?(): Promise<unknown>;
}

/**
* Default conservative probe result. Used when the `LanguageModel` global
* is absent (e.g. running outside Chrome) so callers can still proceed —
* just without `json-mode` / `tool-use` capabilities exposed.
*/
export const CONSERVATIVE_PROBED_CAPABILITIES: WebBrowserProbedCapabilities = Object.freeze({
jsonMode: false,
toolUse: false,
});

/**
* Module-level coalescing slot so concurrent `probeWebBrowserCapabilities()`
* callers share the same in-flight promise. Cleared via {@link _resetProbeCache}
* for tests only.
*/
let probePromise: Promise<WebBrowserProbedCapabilities> | undefined;

/**
* Probe the running browser for `json-mode` and `tool-use` support on the
* `LanguageModel` API. Results are cached at module level; subsequent calls
* (and concurrent calls) return the same promise so we only pay for one set
* of create/destroy cycles per page load.
*
* The probe is intentionally conservative: any rejection from `factory.create`
* is interpreted as "not supported" rather than letting the exception
* propagate, because the alternative — surfacing transient failures into
* capability inference — would flip declared capabilities mid-session.
*
* Both probes immediately `destroy()` the smoke-tested session so we don't
* keep a model loaded just to satisfy feature detection.
*
* @param factory Optional injected factory for tests. Defaults to the real
* `LanguageModel` global when present.
*/
export function probeWebBrowserCapabilities(
factory?: WebBrowserProbeFactory
): Promise<WebBrowserProbedCapabilities> {
if (probePromise) return probePromise;

probePromise = (async (): Promise<WebBrowserProbedCapabilities> => {
let resolvedFactory: WebBrowserProbeFactory | undefined = factory;
if (!resolvedFactory) {
// Lazy-resolve the real global through getApi which surfaces a
// consistent error if `LanguageModel` is missing. We catch and treat
// "missing" as "no capabilities".
try {
// The ambient `LanguageModel` global may be undefined outside Chrome.
// eslint-disable-next-line @typescript-eslint/no-explicit-any
const lm = (
typeof (globalThis as any).LanguageModel !== "undefined"
? // eslint-disable-next-line @typescript-eslint/no-explicit-any
(globalThis as any).LanguageModel
: undefined
) as WebBrowserProbeFactory | undefined;
resolvedFactory = getApi("LanguageModel", lm);
} catch {
return CONSERVATIVE_PROBED_CAPABILITIES;
}
}

// Prefer `LanguageModel.params()` if exposed: it's the cheapest signal.
// Today the spec params surface doesn't actually report json/tool flags
// (only topK/temperature) so this is a forward-compat hook. If the
// method exists and resolves we still fall through to smoke-tests for
// the actual feature gates.
if (typeof resolvedFactory.params === "function") {
try {
await resolvedFactory.params();
} catch {
// Non-fatal — params() restricted to extensions and may reject on web.
}
}

const jsonMode = await probeOption(resolvedFactory, {
responseConstraint: { type: "object" },
});
const toolUse = await probeOption(resolvedFactory, {
tools: [
{
name: "_probe",
description: "",
inputSchema: { type: "object" },
execute: async (): Promise<string> => "",
},
],
});

return { jsonMode, toolUse };
})();

return probePromise;
}

/**
* Issue a smoke-test `factory.create(options)`. Any rejection means the
* option is unsupported in this Chrome build. On success we immediately
* `destroy()` the session — its only purpose was to confirm acceptance.
*/
async function probeOption(
factory: WebBrowserProbeFactory,
options: Record<string, unknown>
): Promise<boolean> {
try {
const session = await factory.create(options);
try {
session.destroy();
} catch {
// best-effort: destroy failures don't affect the probe outcome
}
return true;
} catch {
return false;
}
}

/**
* @internal Test-only escape hatch to clear the coalescing cache between
* test cases. Production code never resets the probe — the result is
* stable for the lifetime of the page.
*/
export function _resetProbeCache(): void {
probePromise = undefined;
}
14 changes: 14 additions & 0 deletions providers/chrome-ai/src/ai/common/WebBrowser_Sessions.ts
Original file line number Diff line number Diff line change
Expand Up @@ -21,6 +21,20 @@
export interface ChromeChatSessionState {
readonly session: LanguageModel;
readonly messageCount: number;
/**
* Stable fingerprint of the `outputSchema` the session was created for
* (StructuredGeneration runs). Reuse requires an exact match — a schema
* change forces a session rebuild because Chrome bakes the constraint
* into the session's response handling state.
*/
readonly schemaFingerprint?: string;
/**
* Stable fingerprint of the *sorted* tool name list the session was
* created with (ToolCalling runs). Tool-set changes invalidate the
* cached session because Chrome's tools are bound at `create()` time
* and can't be hot-swapped per turn.
*/
readonly toolsFingerprint?: string;
}

const chromeSessions = new Map<string, ChromeChatSessionState>();
Expand Down
Loading
Loading