diff --git a/packages/test/src/test/ai-provider/WebBrowserProvider.test.ts b/packages/test/src/test/ai-provider/WebBrowserProvider.test.ts
index e32b69891..1a371b76f 100644
--- a/packages/test/src/test/ai-provider/WebBrowserProvider.test.ts
+++ b/packages/test/src/test/ai-provider/WebBrowserProvider.test.ts
@@ -4,7 +4,13 @@
  * SPDX-License-Identifier: Apache-2.0
  */
 
-import type { ChatMessage, ModelRecord } from "@workglow/ai";
+import type {
+  ChatMessage,
+  ModelRecord,
+  StructuredGenerationTaskInput,
+  ToolCallingTaskInput,
+  ToolDefinition,
+} from "@workglow/ai";
 import { _testOnly } from "@workglow/chrome-ai/ai";
 import { afterEach, describe, expect, it, vi } from "vitest";
 
@@ -13,10 +19,21 @@ const {
   WEB_BROWSER_RUN_FN_SPECS,
   WEB_BROWSER_RUN_FNS,
   WebBrowser_TextGeneration_Unified,
+  WebBrowser_StructuredGeneration,
+  WebBrowser_ToolCalling,
   sessions,
   chatHistory,
+  probe,
 } = _testOnly;
 
+/**
+ * Test-time helpers: the chrome-ai run-fns we test take strongly-typed task
+ * inputs requiring a `model` field that's irrelevant to provider-level
+ * tests (the dispatcher fills it in upstream). We coerce that away here.
+ */
+const asSGI = (v: unknown): StructuredGenerationTaskInput => v as StructuredGenerationTaskInput;
+const asTCI = (v: unknown): ToolCallingTaskInput => v as ToolCallingTaskInput;
+
 function model(model_id: string, capabilities: readonly string[] = []): ModelRecord {
   return {
     model_id,
@@ -33,15 +50,47 @@ function model(model_id: string, capabilities: readonly string[] = []): ModelRec
 // Capability inference + parity
 // --------------------------------------------------------------------------
 
+/**
+ * Probe factory whose `create()` always resolves to a destroyable handle.
+ * Used to drive `WebBrowserProvider` past the conservative-pre-probe state
+ * so we can assert the post-probe inference shape.
+ */
+// eslint-disable-next-line @typescript-eslint/no-explicit-any
+function makeAcceptingProbeFactory(): any {
+  const destroy = vi.fn();
+  return {
+    create: vi.fn().mockResolvedValue({ destroy }),
+    params: vi.fn().mockResolvedValue({}),
+  };
+}
+
 describe("WebBrowserProvider.inferCapabilities", () => {
-  const provider = new WebBrowserProvider(WEB_BROWSER_RUN_FNS);
+  // Reset the module-level probe cache so each `new WebBrowserProvider`
+  // can drive a fresh probe with its injected factory.
+  afterEach(() => {
+    probe._resetProbeCache();
+  });
 
-  it("trusts declared capabilities", () => {
+  it("trusts declared capabilities (probe-independent)", () => {
+    const provider = new WebBrowserProvider(WEB_BROWSER_RUN_FNS);
     const caps = provider.inferCapabilities(model("anything", ["text.translation"]));
     expect(caps).toEqual(["text.translation"]);
   });
 
-  it("infers text-gen + json-mode + tool-use for chrome-prompt / gemini-nano", () => {
+  it("conservative pre-probe: drops json-mode and tool-use for chrome-prompt", () => {
+    // Probe is async — until it resolves, the provider must NOT advertise
+    // json-mode or tool-use, since the underlying API might not support them.
+    const provider = new WebBrowserProvider(WEB_BROWSER_RUN_FNS);
+    const caps = provider.inferCapabilities(model("chrome-prompt"));
+    expect(caps).toContain("text.generation");
+    expect(caps).not.toContain("json-mode");
+    expect(caps).not.toContain("tool-use");
+  });
+
+  it("post-probe: adds json-mode + tool-use when supported", async () => {
+    const factory = makeAcceptingProbeFactory();
+    const provider = new WebBrowserProvider(WEB_BROWSER_RUN_FNS, undefined, factory);
+    await provider.ready();
     const caps = provider.inferCapabilities(model("chrome-prompt"));
     expect(caps).toContain("text.generation");
     expect(caps).toContain("json-mode");
@@ -50,27 +99,32 @@ describe("WebBrowserProvider.inferCapabilities", () => {
   });
 
   it("infers text.summary for summarizer model", () => {
+    const provider = new WebBrowserProvider(WEB_BROWSER_RUN_FNS);
     const caps = provider.inferCapabilities(model("chrome-summarizer"));
     expect(caps).toContain("text.summary");
     expect(caps).not.toContain("text.generation");
   });
 
   it("infers text.rewriter for rewriter model", () => {
+    const provider = new WebBrowserProvider(WEB_BROWSER_RUN_FNS);
     const caps = provider.inferCapabilities(model("chrome-rewriter"));
     expect(caps).toContain("text.rewriter");
   });
 
   it("infers text.translation for translator model", () => {
+    const provider = new WebBrowserProvider(WEB_BROWSER_RUN_FNS);
     const caps = provider.inferCapabilities(model("chrome-translator"));
     expect(caps).toContain("text.translation");
   });
 
   it("infers text.language-detection for language-detector model", () => {
+    const provider = new WebBrowserProvider(WEB_BROWSER_RUN_FNS);
     const caps = provider.inferCapabilities(model("chrome-language-detector"));
     expect(caps).toContain("text.language-detection");
   });
 
   it("returns baseline meta-ops for unknown ids", () => {
+    const provider = new WebBrowserProvider(WEB_BROWSER_RUN_FNS);
     const caps = provider.inferCapabilities(model("unknown-id"));
     expect(caps).toEqual(["model.search", "model.info"]);
   });
@@ -351,3 +405,690 @@ describe("WebBrowser_ChatHistory helpers", () => {
     expect(chatHistory.buildInitialPromptsFromHistory([])).toEqual([]);
   });
 });
+
+// --------------------------------------------------------------------------
+// Capability probe
+// --------------------------------------------------------------------------
+
+/**
+ * Fake factory whose two `create()` codepaths can be independently controlled
+ * — pass `jsonModeOk: false` to reject when `responseConstraint` is passed,
+ * `toolUseOk: false` to reject when `tools` is passed. Records the total
+ * number of `create()` invocations so we can assert coalescing behavior.
+ */
+// eslint-disable-next-line @typescript-eslint/no-explicit-any
+function makeProbeFactory(opts: { jsonModeOk: boolean; toolUseOk: boolean }): any {
+  let destroys = 0;
+  const create = vi.fn(async (options?: Record<string, unknown>) => {
+    if (options && "responseConstraint" in options && !opts.jsonModeOk) {
+      throw new Error("responseConstraint not supported");
+    }
+    if (options && "tools" in options && !opts.toolUseOk) {
+      throw new Error("tools not supported");
+    }
+    return {
+      destroy: (): void => {
+        destroys += 1;
+      },
+    };
+  });
+  return { create, params: vi.fn(), destroyCount: () => destroys };
+}
+
+describe("probeWebBrowserCapabilities", () => {
+  // Each test injects its own factory; clear the cached coalesced promise
+  // so they don't share results.
+  afterEach(() => {
+    probe._resetProbeCache();
+  });
+
+  it("both true when factory accepts both responseConstraint and tools", async () => {
+    const f = makeProbeFactory({ jsonModeOk: true, toolUseOk: true });
+    const result = await probe.probeWebBrowserCapabilities(f);
+    expect(result).toEqual({ jsonMode: true, toolUse: true });
+  });
+
+  it("jsonMode false when factory rejects responseConstraint", async () => {
+    const f = makeProbeFactory({ jsonModeOk: false, toolUseOk: true });
+    const result = await probe.probeWebBrowserCapabilities(f);
+    expect(result).toEqual({ jsonMode: false, toolUse: true });
+  });
+
+  it("toolUse false when factory rejects tools", async () => {
+    const f = makeProbeFactory({ jsonModeOk: true, toolUseOk: false });
+    const result = await probe.probeWebBrowserCapabilities(f);
+    expect(result).toEqual({ jsonMode: true, toolUse: false });
+  });
+
+  it("both false when factory rejects both", async () => {
+    const f = makeProbeFactory({ jsonModeOk: false, toolUseOk: false });
+    const result = await probe.probeWebBrowserCapabilities(f);
+    expect(result).toEqual({ jsonMode: false, toolUse: false });
+  });
+
+  it("coalesces concurrent calls into a single probe", async () => {
+    const f = makeProbeFactory({ jsonModeOk: true, toolUseOk: true });
+    // Fire N concurrent probes through the public surface. They should all
+    // share the same in-flight promise and trigger at most the same set of
+    // create() calls a single probe would (one per feature, not N).
+    const results = await Promise.all(
+      Array.from({ length: 5 }, () => probe.probeWebBrowserCapabilities(f))
+    );
+    expect(results.every((r) => r.jsonMode && r.toolUse)).toBe(true);
+    // The probe issues exactly two create() calls — one for json-mode, one
+    // for tool-use. Concurrent callers must coalesce, not multiply.
+    expect(f.create).toHaveBeenCalledTimes(2);
+  });
+
+  it("provider.ready() reflects the probe result", async () => {
+    const f = makeProbeFactory({ jsonModeOk: true, toolUseOk: false });
+    const provider = new WebBrowserProvider(WEB_BROWSER_RUN_FNS, undefined, f);
+    // Pre-ready: conservative subset for chrome-prompt.
+    const preCaps = provider.inferCapabilities(model("chrome-prompt"));
+    expect(preCaps).not.toContain("json-mode");
+    expect(preCaps).not.toContain("tool-use");
+    await provider.ready();
+    // Post-ready: json-mode appears, tool-use stays gated.
+    const postCaps = provider.inferCapabilities(model("chrome-prompt"));
+    expect(postCaps).toContain("json-mode");
+    expect(postCaps).not.toContain("tool-use");
+  });
+});
+
+// --------------------------------------------------------------------------
+// StructuredGeneration session cache (H1)
+// --------------------------------------------------------------------------
+
+/**
+ * Install a fake `LanguageModel` global so the run-fn's `getApi` /
+ * `ensureAvailable` checks pass. Returns a teardown.
+ */
+function installLanguageModelGlobal(impl: unknown): () => void {
+  const prior = (globalThis as Record<string, unknown>).LanguageModel;
+  (globalThis as Record<string, unknown>).LanguageModel = impl;
+  return () => {
+    if (prior === undefined) {
+      delete (globalThis as Record<string, unknown>).LanguageModel;
+    } else {
+      (globalThis as Record<string, unknown>).LanguageModel = prior;
+    }
+  };
+}
+
+/**
+ * Fake `LanguageModel` factory + session that streams a single chunk of
+ * pre-canned text. `text` is the full JSON payload returned by the
+ * model's "response" in one snapshot — sufficient for our parse pipeline
+ * because Chrome's stream surface emits progressive snapshots.
+ */
+// eslint-disable-next-line @typescript-eslint/no-explicit-any
+function makeFakeLanguageModel(text: string | (() => string)): any {
+  let destroyed = 0;
+  const factory = {
+    availability: vi.fn().mockResolvedValue("available"),
+    create: vi.fn(async () => ({
+      promptStreaming: (_p: string, _o?: unknown) => {
+        const value = typeof text === "function" ? text() : text;
+        return new ReadableStream<string>({
+          start(controller) {
+            controller.enqueue(value);
+            controller.close();
+          },
+        });
+      },
+      destroy: () => {
+        destroyed += 1;
+      },
+    })),
+  };
+  return { factory, destroyed: () => destroyed };
+}
+
+describe("WebBrowser_StructuredGeneration session cache", () => {
+  const schema = {
+    type: "object",
+    properties: { x: { type: "number" } },
+    required: ["x"],
+    additionalProperties: false,
+  } as const;
+  const sid = "sg-test-1";
+
+  afterEach(() => {
+    sessions.deleteChromeSession(sid);
+  });
+
+  it("first call with sessionId seeds the cache", async () => {
+    const { factory } = makeFakeLanguageModel('{"x":1}');
+    const restore = installLanguageModelGlobal(factory);
+    try {
+      const emit = vi.fn();
+      await WebBrowser_StructuredGeneration(
+        asSGI({ prompt: "p", outputSchema: schema }),
+        undefined,
+        new AbortController().signal,
+        emit,
+        schema,
+        sid
+      );
+      expect(sessions.getChromeSession(sid)).toBeDefined();
+      expect(sessions.getChromeSession(sid)?.schemaFingerprint).toBeDefined();
+      expect(factory.create).toHaveBeenCalledTimes(1);
+    } finally {
+      restore();
+    }
+  });
+
+  it("second call with the same schema reuses the cached session", async () => {
+    const { factory } = makeFakeLanguageModel('{"x":1}');
+    const restore = installLanguageModelGlobal(factory);
+    try {
+      const emit = vi.fn();
+      await WebBrowser_StructuredGeneration(
+        asSGI({ prompt: "p1", outputSchema: schema }),
+        undefined,
+        new AbortController().signal,
+        emit,
+        schema,
+        sid
+      );
+      await WebBrowser_StructuredGeneration(
+        asSGI({ prompt: "p2", outputSchema: schema }),
+        undefined,
+        new AbortController().signal,
+        emit,
+        schema,
+        sid
+      );
+      // Only ONE create() — the second call reused the cached session.
+      expect(factory.create).toHaveBeenCalledTimes(1);
+    } finally {
+      restore();
+    }
+  });
+
+  it("mismatched schema fingerprint forces rebuild", async () => {
+    const { factory } = makeFakeLanguageModel('{"x":1}');
+    const restore = installLanguageModelGlobal(factory);
+    try {
+      const emit = vi.fn();
+      await WebBrowser_StructuredGeneration(
+        asSGI({ prompt: "p1", outputSchema: schema }),
+        undefined,
+        new AbortController().signal,
+        emit,
+        schema,
+        sid
+      );
+      const otherSchema = {
+        type: "object",
+        properties: { x: { type: "number" }, y: { type: "string" } },
+        required: ["x"],
+        additionalProperties: false,
+      } as const;
+      await WebBrowser_StructuredGeneration(
+        asSGI({ prompt: "p2", outputSchema: otherSchema }),
+        undefined,
+        new AbortController().signal,
+        emit,
+        // streaming text is a valid `{x:1}` which satisfies otherSchema too
+        otherSchema,
+        sid
+      );
+      // Two creates — schema fingerprint mismatch invalidated the cache.
+      expect(factory.create).toHaveBeenCalledTimes(2);
+    } finally {
+      restore();
+    }
+  });
+});
+
+describe("WebBrowser_StructuredGeneration cache poisoning", () => {
+  const schema = {
+    type: "object",
+    properties: { x: { type: "number" } },
+    required: ["x"],
+    additionalProperties: false,
+  } as const;
+  const sid = "sg-poison-1";
+
+  afterEach(() => {
+    sessions.deleteChromeSession(sid);
+  });
+
+  it("drops the cache entry when a follow-up turn throws on parse failure", async () => {
+    // First call seeds a cache with parseable output; second call streams
+    // garbage so JSON.parse and parsePartialJson both fail. The run-fn
+    // must throw and clear the cache entry so the next attempt rebuilds.
+    let seq = 0;
+    const seqText = (): string => {
+      seq += 1;
+      return seq === 1 ? '{"x":1}' : "not json {";
+    };
+    const { factory } = makeFakeLanguageModel(seqText);
+    const restore = installLanguageModelGlobal(factory);
+    try {
+      const emit = vi.fn();
+      await WebBrowser_StructuredGeneration(
+        asSGI({ prompt: "p1", outputSchema: schema }),
+        undefined,
+        new AbortController().signal,
+        emit,
+        schema,
+        sid
+      );
+      expect(sessions.getChromeSession(sid)).toBeDefined();
+      await expect(
+        WebBrowser_StructuredGeneration(
+          asSGI({ prompt: "p2", outputSchema: schema }),
+          undefined,
+          new AbortController().signal,
+          emit,
+          schema,
+          sid
+        )
+      ).rejects.toThrow(/unparseable|validation/i);
+      // Entry is dropped after the failed turn.
+      expect(sessions.getChromeSession(sid)).toBeUndefined();
+    } finally {
+      restore();
+    }
+  });
+});
+
+// --------------------------------------------------------------------------
+// StructuredGeneration final-JSON validation (H4)
+// --------------------------------------------------------------------------
+
+describe("WebBrowser_StructuredGeneration validation", () => {
+  const schema = {
+    type: "object",
+    properties: { x: { type: "number" } },
+    required: ["x"],
+    additionalProperties: false,
+  } as const;
+
+  it("emits finish on valid JSON that satisfies the schema", async () => {
+    const { factory } = makeFakeLanguageModel('{"x":1}');
+    const restore = installLanguageModelGlobal(factory);
+    try {
+      const events: unknown[] = [];
+      const emit = (e: unknown): void => {
+        events.push(e);
+      };
+      await WebBrowser_StructuredGeneration(
+        asSGI({ prompt: "p", outputSchema: schema }),
+        undefined,
+        new AbortController().signal,
+        emit,
+        schema
+      );
+      const finish = events.find((e) => (e as { type?: string }).type === "finish") as
+        | { data: { object: { x: number } } }
+        | undefined;
+      expect(finish).toBeDefined();
+      expect(finish?.data.object).toEqual({ x: 1 });
+    } finally {
+      restore();
+    }
+  });
+
+  it("throws PermanentJobError on unparseable JSON, no finish emitted", async () => {
+    const { factory } = makeFakeLanguageModel("definitely not json");
+    const restore = installLanguageModelGlobal(factory);
+    try {
+      const events: unknown[] = [];
+      const emit = (e: unknown): void => {
+        events.push(e);
+      };
+      await expect(
+        WebBrowser_StructuredGeneration(
+          asSGI({ prompt: "p", outputSchema: schema }),
+          undefined,
+          new AbortController().signal,
+          emit,
+          schema
+        )
+      ).rejects.toThrow(/unparseable/i);
+      expect(events.some((e) => (e as { type?: string }).type === "finish")).toBe(false);
+    } finally {
+      restore();
+    }
+  });
+
+  it("throws PermanentJobError when parsed object fails schema validation", async () => {
+    // Parses fine but `x` is a string, not a number — fails the schema.
+    const { factory } = makeFakeLanguageModel('{"x":"oops"}');
+    const restore = installLanguageModelGlobal(factory);
+    try {
+      const events: unknown[] = [];
+      const emit = (e: unknown): void => {
+        events.push(e);
+      };
+      await expect(
+        WebBrowser_StructuredGeneration(
+          asSGI({ prompt: "p", outputSchema: schema }),
+          undefined,
+          new AbortController().signal,
+          emit,
+          schema
+        )
+      ).rejects.toThrow(/schema validation/i);
+      expect(events.some((e) => (e as { type?: string }).type === "finish")).toBe(false);
+    } finally {
+      restore();
+    }
+  });
+});
+
+// --------------------------------------------------------------------------
+// ToolCalling session cache (H2)
+// --------------------------------------------------------------------------
+
+/**
+ * Fake `LanguageModel` for tool-calling tests. The session's
+ * `promptStreaming` immediately invokes each declared tool's `execute`
+ * callback so the run-fn captures the tool calls, then closes the stream.
+ *
+ * `callsBy[toolName]` supplies args for each capture; if omitted defaults
+ * to `{}`.
+ */
+// eslint-disable-next-line @typescript-eslint/no-explicit-any
+function makeFakeToolCallingModel(callsBy: Record<string, unknown> = {}): any {
+  const factory = {
+    availability: vi.fn().mockResolvedValue("available"),
+    create: vi.fn(
+      async (options?: {
+        tools?: Array<{ name: string; execute: (...args: unknown[]) => Promise<string> }>;
+      }) => {
+        const tools = options?.tools ?? [];
+        return {
+          promptStreaming: () =>
+            new ReadableStream<string>({
+              async start(controller) {
+                for (const t of tools) {
+                  if (t.name === "_probe") continue; // probe tool ignored here
+                  const args = callsBy[t.name] ?? {};
+                  await t.execute(args);
+                }
+                controller.close();
+              },
+            }),
+          destroy: vi.fn(),
+        };
+      }
+    ),
+  };
+  return { factory };
+}
+
+describe("WebBrowser_ToolCalling session cache", () => {
+  const sid = "tc-test-1";
+  const toolA: ToolDefinition = {
+    name: "tool_a",
+    description: "tool a",
+    inputSchema: { type: "object", properties: {}, additionalProperties: true },
+  };
+  const toolB: ToolDefinition = {
+    name: "tool_b",
+    description: "tool b",
+    inputSchema: { type: "object", properties: {}, additionalProperties: true },
+  };
+
+  afterEach(() => {
+    sessions.deleteChromeSession(sid);
+  });
+
+  it("reuses cache when sessionId + messages + tool set match", async () => {
+    const { factory } = makeFakeToolCallingModel();
+    const restore = installLanguageModelGlobal(factory);
+    try {
+      const emit = vi.fn();
+      const messages: ChatMessage[] = [
+        { role: "user", content: [{ type: "text", text: "do it" }] },
+      ];
+      await WebBrowser_ToolCalling(
+        asTCI({ prompt: "", tools: [toolA, toolB], messages }),
+        undefined,
+        new AbortController().signal,
+        emit,
+        undefined,
+        sid
+      );
+      const messages2: ChatMessage[] = [
+        ...messages,
+        { role: "assistant", content: [{ type: "text", text: "ok" }] },
+        { role: "user", content: [{ type: "text", text: "again" }] },
+      ];
+      await WebBrowser_ToolCalling(
+        asTCI({ prompt: "", tools: [toolA, toolB], messages: messages2 }),
+        undefined,
+        new AbortController().signal,
+        emit,
+        undefined,
+        sid
+      );
+      // Same tool set, same conversation thread → cache reuse, one create().
+      expect(factory.create).toHaveBeenCalledTimes(1);
+    } finally {
+      restore();
+    }
+  });
+
+  it("rebuilds when the tool set changes", async () => {
+    const { factory } = makeFakeToolCallingModel();
+    const restore = installLanguageModelGlobal(factory);
+    try {
+      const emit = vi.fn();
+      const messages: ChatMessage[] = [
+        { role: "user", content: [{ type: "text", text: "do it" }] },
+      ];
+      await WebBrowser_ToolCalling(
+        asTCI({ prompt: "", tools: [toolA], messages }),
+        undefined,
+        new AbortController().signal,
+        emit,
+        undefined,
+        sid
+      );
+      const messages2: ChatMessage[] = [
+        ...messages,
+        { role: "assistant", content: [{ type: "text", text: "ok" }] },
+        { role: "user", content: [{ type: "text", text: "again" }] },
+      ];
+      await WebBrowser_ToolCalling(
+        asTCI({ prompt: "", tools: [toolA, toolB], messages: messages2 }),
+        undefined,
+        new AbortController().signal,
+        emit,
+        undefined,
+        sid
+      );
+      // Different fingerprint → cache invalidated, two creates.
+      expect(factory.create).toHaveBeenCalledTimes(2);
+    } finally {
+      restore();
+    }
+  });
+
+  it("drops + destroys the cache entry on prompt failure", async () => {
+    // Sequenced session: first promptStreaming() returns a clean close,
+    // second errors. Same session handle returned from both create() calls
+    // (cache reuse exercises the same `session` object).
+    let promptCount = 0;
+    const sessionImpl = {
+      promptStreaming: (): ReadableStream<string> =>
+        new ReadableStream<string>({
+          start(controller) {
+            promptCount += 1;
+            if (promptCount === 1) {
+              controller.close();
+            } else {
+              controller.error(new Error("boom"));
+            }
+          },
+        }),
+      destroy: vi.fn(),
+    };
+    const factory = {
+      availability: vi.fn().mockResolvedValue("available"),
+      create: vi.fn(async () => sessionImpl),
+    };
+    const restore = installLanguageModelGlobal(factory);
+    try {
+      const messages: ChatMessage[] = [
+        { role: "user", content: [{ type: "text", text: "do it" }] },
+      ];
+      const emit = vi.fn();
+      // First turn seeds the cache successfully.
+      await WebBrowser_ToolCalling(
+        asTCI({ prompt: "", tools: [toolA], messages }),
+        undefined,
+        new AbortController().signal,
+        emit,
+        undefined,
+        sid
+      );
+      expect(sessions.getChromeSession(sid)).toBeDefined();
+      // Second turn reuses the cached session whose stream now errors.
+      const messages2: ChatMessage[] = [
+        ...messages,
+        { role: "assistant", content: [{ type: "text", text: "ok" }] },
+        { role: "user", content: [{ type: "text", text: "again" }] },
+      ];
+      await expect(
+        WebBrowser_ToolCalling(
+          asTCI({ prompt: "", tools: [toolA], messages: messages2 }),
+          undefined,
+          new AbortController().signal,
+          emit,
+          undefined,
+          sid
+        )
+      ).rejects.toThrow(/boom/);
+      // Cache cleaned up.
+      expect(sessions.getChromeSession(sid)).toBeUndefined();
+    } finally {
+      restore();
+    }
+  });
+});
+
+// --------------------------------------------------------------------------
+// ToolCalling argument validation (H3)
+// --------------------------------------------------------------------------
+
+describe("WebBrowser_ToolCalling argument validation", () => {
+  const strictTool: ToolDefinition = {
+    name: "echo",
+    description: "echo",
+    inputSchema: {
+      type: "object",
+      properties: { text: { type: "string" } },
+      required: ["text"],
+      additionalProperties: false,
+    },
+  };
+
+  it("passes through calls whose args satisfy the inputSchema", async () => {
+    const { factory } = makeFakeToolCallingModel({ echo: { text: "hello" } });
+    const restore = installLanguageModelGlobal(factory);
+    try {
+      const events: Array<{ type: string; port?: string; objectDelta?: unknown }> = [];
+      const emit = (e: unknown): void => {
+        events.push(e as { type: string; port?: string; objectDelta?: unknown });
+      };
+      await WebBrowser_ToolCalling(
+        asTCI({ prompt: "go", tools: [strictTool] }),
+        undefined,
+        new AbortController().signal,
+        emit
+      );
+      const tcEvent = events.find((e) => e.type === "object-delta" && e.port === "toolCalls");
+      expect(tcEvent).toBeDefined();
+      const calls = (tcEvent?.objectDelta as Array<{ name: string; input: unknown }>) ?? [];
+      expect(calls).toHaveLength(1);
+      expect(calls[0]?.input).toEqual({ text: "hello" });
+    } finally {
+      restore();
+    }
+  });
+
+  it("drops calls missing a required field", async () => {
+    // `text` is required but omitted.
+    const { factory } = makeFakeToolCallingModel({ echo: {} });
+    const restore = installLanguageModelGlobal(factory);
+    try {
+      const events: Array<{ type: string; port?: string }> = [];
+      const emit = (e: unknown): void => {
+        events.push(e as { type: string; port?: string });
+      };
+      await WebBrowser_ToolCalling(
+        asTCI({ prompt: "go", tools: [strictTool] }),
+        undefined,
+        new AbortController().signal,
+        emit
+      );
+      // No toolCalls event since the only call was dropped.
+      expect(events.some((e) => e.type === "object-delta" && e.port === "toolCalls")).toBe(false);
+    } finally {
+      restore();
+    }
+  });
+
+  it("drops calls with a wrong-typed field", async () => {
+    // `text` must be string; passing a number fails validation.
+    const { factory } = makeFakeToolCallingModel({ echo: { text: 42 } });
+    const restore = installLanguageModelGlobal(factory);
+    try {
+      const events: Array<{ type: string; port?: string }> = [];
+      const emit = (e: unknown): void => {
+        events.push(e as { type: string; port?: string });
+      };
+      await WebBrowser_ToolCalling(
+        asTCI({ prompt: "go", tools: [strictTool] }),
+        undefined,
+        new AbortController().signal,
+        emit
+      );
+      expect(events.some((e) => e.type === "object-delta" && e.port === "toolCalls")).toBe(false);
+    } finally {
+      restore();
+    }
+  });
+
+  it("falls through to name-check when inputSchema fails to compile", async () => {
+    // A schema that compileSchema can't handle. The malformed-schema tool
+    // should still see its call pass through (no crash, no validation), and
+    // hallucinated names still get filtered.
+    const malformedTool = {
+      name: "loose",
+      description: "loose",
+      // Garbage schema — type is invalid.
+      inputSchema: { type: "not_a_real_type" } as unknown,
+    } as { name: string; description: string; inputSchema: unknown };
+    const { factory } = makeFakeToolCallingModel({ loose: { anything: 1 } });
+    const restore = installLanguageModelGlobal(factory);
+    try {
+      const events: Array<{ type: string; port?: string; objectDelta?: unknown }> = [];
+      const emit = (e: unknown): void => {
+        events.push(e as { type: string; port?: string; objectDelta?: unknown });
+      };
+      await WebBrowser_ToolCalling(
+        asTCI({
+          prompt: "go",
+          tools: [malformedTool as unknown as typeof strictTool],
+        }),
+        undefined,
+        new AbortController().signal,
+        emit
+      );
+      // Either the schema compiled and validation passed (loose schema),
+      // or it failed to compile and the call fell through unchanged.
+      // Either way, no crash, and we see the tool call event.
+      const tcEvent = events.find((e) => e.type === "object-delta" && e.port === "toolCalls");
+      expect(tcEvent).toBeDefined();
+    } finally {
+      restore();
+    }
+  });
+});
diff --git a/providers/chrome-ai/src/ai/WebBrowserProvider.ts b/providers/chrome-ai/src/ai/WebBrowserProvider.ts
index 38f9f17ef..ebb2ca015 100644
--- a/providers/chrome-ai/src/ai/WebBrowserProvider.ts
+++ b/providers/chrome-ai/src/ai/WebBrowserProvider.ts
@@ -12,9 +12,15 @@ import type {
 } from "@workglow/ai/worker";
 import { AiProvider } from "@workglow/ai/worker";
 import {
+  CONSERVATIVE_PROBED_CAPABILITIES,
   inferWebBrowserCapabilities,
   webBrowserWorkerRunFnSpecs,
 } from "./common/WebBrowser_Capabilities";
+import {
+  probeWebBrowserCapabilities,
+  type WebBrowserProbeFactory,
+  type WebBrowserProbedCapabilities,
+} from "./common/WebBrowser_CapabilityProbe";
 import { WEB_BROWSER } from "./common/WebBrowser_Constants";
 import type { WebBrowserModelConfig } from "./common/WebBrowser_ModelSchema";
 import { deleteChromeSession } from "./common/WebBrowser_Sessions";
@@ -32,6 +38,15 @@ export class WebBrowserProvider extends AiProvider<WebBrowserModelConfig> {
   readonly isLocal = true;
   readonly supportsBrowser = true;
 
+  /**
+   * Result of {@link probeWebBrowserCapabilities}. Until the probe resolves
+   * we report the conservative subset (no `json-mode`, no `tool-use`) so we
+   * never advertise a capability a downstream task can't fulfil. Callers
+   * that need the final answer should await {@link ready}.
+   */
+  private probedCaps: WebBrowserProbedCapabilities = CONSERVATIVE_PROBED_CAPABILITIES;
+  private readonly probeReady: Promise<void>;
+
   constructor(
     promiseRunFns?: readonly AiProviderRunFnRegistration<
       // eslint-disable-next-line @typescript-eslint/no-explicit-any
@@ -44,13 +59,31 @@ export class WebBrowserProvider extends AiProvider<WebBrowserModelConfig> {
       string,
       // eslint-disable-next-line @typescript-eslint/no-explicit-any
       AiProviderPreviewRunFn<any, any, WebBrowserModelConfig>
-    >
+    >,
+    /**
+     * Test seam: injectable probe factory. Production callers leave this
+     * undefined so the probe resolves against the real `LanguageModel`
+     * global.
+     */
+    probeFactory?: WebBrowserProbeFactory
   ) {
     super(promiseRunFns, previewTasks);
+    this.probeReady = probeWebBrowserCapabilities(probeFactory).then((result) => {
+      this.probedCaps = result;
+    });
+  }
+
+  /**
+   * Resolves once the capability probe has completed. After this point
+   * {@link inferCapabilities} reflects what the browser actually supports.
+   * Before this point it returns the conservative subset.
+   */
+  ready(): Promise<void> {
+    return this.probeReady;
   }
 
   override inferCapabilities(model: ModelRecord): readonly Capability[] {
-    return inferWebBrowserCapabilities(model);
+    return inferWebBrowserCapabilities(model, this.probedCaps);
   }
 
   protected override workerRunFnSpecs(): readonly { serves: readonly Capability[] }[] {
diff --git a/providers/chrome-ai/src/ai/common/WebBrowser_Capabilities.ts b/providers/chrome-ai/src/ai/common/WebBrowser_Capabilities.ts
index c68810297..6000339c5 100644
--- a/providers/chrome-ai/src/ai/common/WebBrowser_Capabilities.ts
+++ b/providers/chrome-ai/src/ai/common/WebBrowser_Capabilities.ts
@@ -5,6 +5,12 @@
  */
 
 import type { Capability, ModelRecord } from "@workglow/ai/worker";
+import {
+  CONSERVATIVE_PROBED_CAPABILITIES,
+  probeWebBrowserCapabilities,
+  type WebBrowserProbeFactory,
+  type WebBrowserProbedCapabilities,
+} from "./WebBrowser_CapabilityProbe";
 import { WEB_BROWSER_CAPABILITY_SETS } from "./WebBrowser_CapabilitySets";
 
 export const WEB_BROWSER_RUN_FN_SPECS = WEB_BROWSER_CAPABILITY_SETS.map((serves) => ({ serves }));
@@ -17,6 +23,20 @@ export function webBrowserWorkerRunFnSpecs(): readonly {
 
 type CapabilityHints = Pick<ModelRecord, "model_id" | "provider_config" | "capabilities">;
 
+/**
+ * Capabilities the `chrome-prompt`/`gemini-nano` family advertises
+ * *unconditionally*. `json-mode` and `tool-use` are gated separately because
+ * the `responseConstraint` and `tools` options on `LanguageModel.create` /
+ * `prompt` aren't universally supported across Chrome builds and channels.
+ */
+const PROMPT_BASE_CAPABILITIES = [
+  "text.generation",
+  "text.rewriter",
+  "text.summary",
+  "model.info",
+  "model.search",
+] as const satisfies readonly Capability[];
+
 /**
  * Heuristic capability inference for Chrome Built-in AI {@link ModelRecord}.
  *
@@ -24,8 +44,20 @@ type CapabilityHints = Pick<ModelRecord, "model_id" | "provider_config" | "capab
  * model_id (e.g. `chrome-prompt`, `chrome-summarizer`, `chrome-rewriter`,
  * `chrome-translator`, `chrome-language-detector`). Declared capabilities
  * win; fallback maps the canonical chrome-* prefixes.
+ *
+ * `json-mode` and `tool-use` are conditional on browser support — pass a
+ * known {@link WebBrowserProbedCapabilities} result via `probed` to gate
+ * them. Defaults to `{jsonMode: true, toolUse: true}` for back-compat with
+ * callers that haven't adopted the probe yet; new callers should pass
+ * either the probe result or `CONSERVATIVE_PROBED_CAPABILITIES`.
+ *
+ * For an async variant that drives the probe automatically see
+ * {@link inferWebBrowserCapabilitiesAsync}.
  */
-export function inferWebBrowserCapabilities(model: CapabilityHints): readonly Capability[] {
+export function inferWebBrowserCapabilities(
+  model: CapabilityHints,
+  probed: WebBrowserProbedCapabilities = { jsonMode: true, toolUse: true }
+): readonly Capability[] {
   const declared = (model.capabilities as readonly Capability[] | undefined) ?? [];
   if (declared.length > 0) return declared;
 
@@ -37,15 +69,14 @@ export function inferWebBrowserCapabilities(model: CapabilityHints): readonly Ca
   const baseName = id.toLowerCase();
 
   if (/prompt|gemini[-_]?nano/.test(baseName)) {
-    return [
-      "text.generation",
-      "json-mode",
-      "tool-use",
-      "text.rewriter",
-      "text.summary",
-      "model.info",
-      "model.search",
-    ];
+    const caps: Capability[] = [...PROMPT_BASE_CAPABILITIES];
+    if (probed.jsonMode) caps.splice(1, 0, "json-mode");
+    if (probed.toolUse) {
+      // Insert tool-use after json-mode (if present) for stable test ordering.
+      const insertAt = probed.jsonMode ? 2 : 1;
+      caps.splice(insertAt, 0, "tool-use");
+    }
+    return caps;
   }
   if (/summariz/.test(baseName)) {
     return ["text.summary", "model.info", "model.search"];
@@ -62,3 +93,19 @@ export function inferWebBrowserCapabilities(model: CapabilityHints): readonly Ca
 
   return ["model.search", "model.info"];
 }
+
+/**
+ * Probe-driven variant of {@link inferWebBrowserCapabilities}. Resolves the
+ * probed capability set (cached) before returning, so the result reflects
+ * the real browser surface rather than assuming both `json-mode` and
+ * `tool-use` are present.
+ */
+export async function inferWebBrowserCapabilitiesAsync(
+  model: CapabilityHints,
+  factory?: WebBrowserProbeFactory
+): Promise<readonly Capability[]> {
+  const probed = await probeWebBrowserCapabilities(factory);
+  return inferWebBrowserCapabilities(model, probed);
+}
+
+export { CONSERVATIVE_PROBED_CAPABILITIES };
diff --git a/providers/chrome-ai/src/ai/common/WebBrowser_CapabilityProbe.ts b/providers/chrome-ai/src/ai/common/WebBrowser_CapabilityProbe.ts
new file mode 100644
index 000000000..08dae9be4
--- /dev/null
+++ b/providers/chrome-ai/src/ai/common/WebBrowser_CapabilityProbe.ts
@@ -0,0 +1,152 @@
+/**
+ * @license
+ * Copyright 2026 Steven Roussey <sroussey@gmail.com>
+ * SPDX-License-Identifier: Apache-2.0
+ */
+
+import { getApi } from "./WebBrowser_ChromeHelpers";
+
+/**
+ * Result of probing Chrome Built-in AI's `LanguageModel` for the optional
+ * capabilities our run-fns rely on. Chrome's surface evolves and some flags
+ * (json-mode via `responseConstraint`, tool-use via `tools`) are not
+ * universally available — feature detection is the only reliable signal.
+ */
+export interface WebBrowserProbedCapabilities {
+  readonly jsonMode: boolean;
+  readonly toolUse: boolean;
+}
+
+/**
+ * Minimal subset of the `LanguageModel` static surface the probe touches.
+ * Declared here so tests can pass a fake factory without depending on the
+ * `@types/dom-chromium-ai` ambient globals.
+ */
+export interface WebBrowserProbeFactory {
+  create(options?: unknown): Promise<{ destroy(): unknown }>;
+  params?(): Promise<unknown>;
+}
+
+/**
+ * Default conservative probe result. Used when the `LanguageModel` global
+ * is absent (e.g. running outside Chrome) so callers can still proceed —
+ * just without `json-mode` / `tool-use` capabilities exposed.
+ */
+export const CONSERVATIVE_PROBED_CAPABILITIES: WebBrowserProbedCapabilities = Object.freeze({
+  jsonMode: false,
+  toolUse: false,
+});
+
+/**
+ * Module-level coalescing slot so concurrent `probeWebBrowserCapabilities()`
+ * callers share the same in-flight promise. Cleared via {@link _resetProbeCache}
+ * for tests only.
+ */
+let probePromise: Promise<WebBrowserProbedCapabilities> | undefined;
+
+/**
+ * Probe the running browser for `json-mode` and `tool-use` support on the
+ * `LanguageModel` API. Results are cached at module level; subsequent calls
+ * (and concurrent calls) return the same promise so we only pay for one set
+ * of create/destroy cycles per page load.
+ *
+ * The probe is intentionally conservative: any rejection from `factory.create`
+ * is interpreted as "not supported" rather than letting the exception
+ * propagate, because the alternative — surfacing transient failures into
+ * capability inference — would flip declared capabilities mid-session.
+ *
+ * Both probes immediately `destroy()` the smoke-tested session so we don't
+ * keep a model loaded just to satisfy feature detection.
+ *
+ * @param factory Optional injected factory for tests. Defaults to the real
+ *                `LanguageModel` global when present.
+ */
+export function probeWebBrowserCapabilities(
+  factory?: WebBrowserProbeFactory
+): Promise<WebBrowserProbedCapabilities> {
+  if (probePromise) return probePromise;
+
+  probePromise = (async (): Promise<WebBrowserProbedCapabilities> => {
+    let resolvedFactory: WebBrowserProbeFactory | undefined = factory;
+    if (!resolvedFactory) {
+      // Lazy-resolve the real global through getApi which surfaces a
+      // consistent error if `LanguageModel` is missing. We catch and treat
+      // "missing" as "no capabilities".
+      try {
+        // The ambient `LanguageModel` global may be undefined outside Chrome.
+        // eslint-disable-next-line @typescript-eslint/no-explicit-any
+        const lm = (
+          typeof (globalThis as any).LanguageModel !== "undefined"
+            ? // eslint-disable-next-line @typescript-eslint/no-explicit-any
+              (globalThis as any).LanguageModel
+            : undefined
+        ) as WebBrowserProbeFactory | undefined;
+        resolvedFactory = getApi("LanguageModel", lm);
+      } catch {
+        return CONSERVATIVE_PROBED_CAPABILITIES;
+      }
+    }
+
+    // Prefer `LanguageModel.params()` if exposed: it's the cheapest signal.
+    // Today the spec params surface doesn't actually report json/tool flags
+    // (only topK/temperature) so this is a forward-compat hook. If the
+    // method exists and resolves we still fall through to smoke-tests for
+    // the actual feature gates.
+    if (typeof resolvedFactory.params === "function") {
+      try {
+        await resolvedFactory.params();
+      } catch {
+        // Non-fatal — params() restricted to extensions and may reject on web.
+      }
+    }
+
+    const jsonMode = await probeOption(resolvedFactory, {
+      responseConstraint: { type: "object" },
+    });
+    const toolUse = await probeOption(resolvedFactory, {
+      tools: [
+        {
+          name: "_probe",
+          description: "",
+          inputSchema: { type: "object" },
+          execute: async (): Promise<string> => "",
+        },
+      ],
+    });
+
+    return { jsonMode, toolUse };
+  })();
+
+  return probePromise;
+}
+
+/**
+ * Issue a smoke-test `factory.create(options)`. Any rejection means the
+ * option is unsupported in this Chrome build. On success we immediately
+ * `destroy()` the session — its only purpose was to confirm acceptance.
+ */
+async function probeOption(
+  factory: WebBrowserProbeFactory,
+  options: Record<string, unknown>
+): Promise<boolean> {
+  try {
+    const session = await factory.create(options);
+    try {
+      session.destroy();
+    } catch {
+      // best-effort: destroy failures don't affect the probe outcome
+    }
+    return true;
+  } catch {
+    return false;
+  }
+}
+
+/**
+ * @internal Test-only escape hatch to clear the coalescing cache between
+ * test cases. Production code never resets the probe — the result is
+ * stable for the lifetime of the page.
+ */
+export function _resetProbeCache(): void {
+  probePromise = undefined;
+}
diff --git a/providers/chrome-ai/src/ai/common/WebBrowser_Sessions.ts b/providers/chrome-ai/src/ai/common/WebBrowser_Sessions.ts
index 98d2cebbc..026acb29e 100644
--- a/providers/chrome-ai/src/ai/common/WebBrowser_Sessions.ts
+++ b/providers/chrome-ai/src/ai/common/WebBrowser_Sessions.ts
@@ -21,6 +21,20 @@
 export interface ChromeChatSessionState {
   readonly session: LanguageModel;
   readonly messageCount: number;
+  /**
+   * Stable fingerprint of the `outputSchema` the session was created for
+   * (StructuredGeneration runs). Reuse requires an exact match — a schema
+   * change forces a session rebuild because Chrome bakes the constraint
+   * into the session's response handling state.
+   */
+  readonly schemaFingerprint?: string;
+  /**
+   * Stable fingerprint of the *sorted* tool name list the session was
+   * created with (ToolCalling runs). Tool-set changes invalidate the
+   * cached session because Chrome's tools are bound at `create()` time
+   * and can't be hot-swapped per turn.
+   */
+  readonly toolsFingerprint?: string;
 }
 
 const chromeSessions = new Map<string, ChromeChatSessionState>();
diff --git a/providers/chrome-ai/src/ai/common/WebBrowser_StructuredGeneration.ts b/providers/chrome-ai/src/ai/common/WebBrowser_StructuredGeneration.ts
index a7ad55349..e529b824a 100644
--- a/providers/chrome-ai/src/ai/common/WebBrowser_StructuredGeneration.ts
+++ b/providers/chrome-ai/src/ai/common/WebBrowser_StructuredGeneration.ts
@@ -9,10 +9,48 @@ import type {
   StructuredGenerationTaskInput,
   StructuredGenerationTaskOutput,
 } from "@workglow/ai";
+import { PermanentJobError } from "@workglow/job-queue";
+import type { JsonSchema, SchemaNode } from "@workglow/util/schema";
+import { compileSchema } from "@workglow/util/schema";
 import { parsePartialJson } from "@workglow/util/worker";
 
 import { createDownloadMonitor, ensureAvailable, getApi } from "./WebBrowser_ChromeHelpers";
 import type { WebBrowserModelConfig } from "./WebBrowser_ModelSchema";
+import {
+  deleteChromeSession,
+  dropChromeSessionEntry,
+  getChromeSession,
+  setChromeSession,
+} from "./WebBrowser_Sessions";
+
+/**
+ * Stable fingerprint of an `outputSchema` value, used to decide whether a
+ * cached Chrome session can be reused. The schema is canonicalised by
+ * sorting object keys before stringification so that semantically-equal
+ * schemas with differently-ordered properties produce the same fingerprint.
+ *
+ * Implementation note: we intentionally do not hash this. A medium-length
+ * JSON string is fine as a cache key — the cache lives in-memory, scoped
+ * to a session id, and turn-over is low.
+ */
+function schemaFingerprint(schema: object): string {
+  return canonicalStringify(schema);
+}
+
+/**
+ * Recursively sorts object keys so `JSON.stringify` produces a stable
+ * representation independent of insertion order. Arrays preserve order
+ * (semantically meaningful in JSON Schema for e.g. `oneOf`/`enum`).
+ */
+function canonicalStringify(value: unknown): string {
+  if (value === null || typeof value !== "object") return JSON.stringify(value);
+  if (Array.isArray(value)) return `[${value.map(canonicalStringify).join(",")}]`;
+  const keys = Object.keys(value as Record<string, unknown>).sort();
+  const entries = keys.map(
+    (k) => `${JSON.stringify(k)}:${canonicalStringify((value as Record<string, unknown>)[k])}`
+  );
+  return `{${entries.join(",")}}`;
+}
 
 /**
  * Streaming run-fn for `["text.generation", "json-mode"]`.
@@ -33,12 +71,31 @@ import type { WebBrowserModelConfig } from "./WebBrowser_ModelSchema";
  * `temperature` is `@deprecated` for non-extension contexts in the current
  * Chrome spec and silently ignored on the open web. Passed through anyway
  * so extension callers still get the knob.
+ *
+ * ## Session reuse
+ *
+ * When `sessionId` is provided we cache the underlying `LanguageModel`
+ * keyed by it, mirroring `WebBrowser_Chat`. Sessions are reused by
+ * `sessionId`; however, if the `outputSchema` changes (detected via
+ * `schemaFingerprint`), we rebuild the Chrome session because
+ * `responseConstraint` state is bound to the schema first used with that
+ * session, and mixing schemas on a reused session is undefined behavior.
+ *
+ * ## Validation
+ *
+ * Chrome's `responseConstraint` is best-effort, not a hard guarantee.
+ * After streaming we validate both that the final accumulated text parses
+ * as JSON *and* that the parsed object satisfies `outputSchema`. Failures
+ * raise {@link PermanentJobError} — `StructuredGenerationTask` runs us
+ * inside a retry loop that catches per-attempt errors, so throwing here
+ * is the correct way to mark this attempt failed without misleading
+ * downstream consumers with a `finish` carrying garbage.
  */
 export const WebBrowser_StructuredGeneration: AiProviderRunFn<
   StructuredGenerationTaskInput,
   StructuredGenerationTaskOutput,
   WebBrowserModelConfig
-> = async (input, _model, signal, emit, outputSchema) => {
+> = async (input, _model, signal, emit, outputSchema, sessionId) => {
   const factory = getApi(
     "LanguageModel",
     typeof LanguageModel !== "undefined" ? LanguageModel : undefined
@@ -47,14 +104,49 @@ export const WebBrowser_StructuredGeneration: AiProviderRunFn<
 
   const schema = (input.outputSchema ?? outputSchema) as object | undefined;
   if (!schema) {
-    throw new Error("WebBrowser_StructuredGeneration: outputSchema is required");
+    throw new PermanentJobError("WebBrowser_StructuredGeneration: outputSchema is required");
+  }
+
+  // Compile validator up-front so a bad schema fails fast (cheap, ahead of
+  // any provider work). Re-thrown as PermanentJobError so the surrounding
+  // retry loop doesn't waste attempts on a malformed schema.
+  let validator: SchemaNode;
+  try {
+    validator = compileSchema(schema as JsonSchema);
+  } catch (err) {
+    const msg = err instanceof Error ? err.message : String(err);
+    throw new PermanentJobError(`WebBrowser_StructuredGeneration: invalid outputSchema — ${msg}`);
   }
 
-  const session = await factory.create({
-    signal,
-    temperature: input.temperature ?? undefined,
-    monitor: createDownloadMonitor(emit),
-  });
+  const fingerprint = schemaFingerprint(schema);
+
+  // StructuredGeneration has no message history of its own — successive
+  // calls with the same `sessionId` are independent prompts. We reuse the
+  // cached session purely to amortise the cost of `LanguageModel.create()`,
+  // gating only on schema fingerprint. The watermark we record is a
+  // monotonic call counter so existing fingerprint-aware cache consumers
+  // (and future evolution toward true multi-turn structured-gen) keep a
+  // consistent shape with `WebBrowser_Chat`.
+  let cached = sessionId ? getChromeSession(sessionId) : undefined;
+  if (sessionId !== undefined && cached && cached.schemaFingerprint !== fingerprint) {
+    deleteChromeSession(sessionId);
+    cached = undefined;
+  }
+  const priorMessageCount = cached?.messageCount ?? 0;
+
+  const usedCachedSession = cached !== undefined;
+  let session: LanguageModel;
+  if (cached) {
+    session = cached.session;
+  } else {
+    session = await factory.create({
+      signal,
+      temperature: input.temperature ?? undefined,
+      monitor: createDownloadMonitor(emit),
+    });
+  }
+
+  let cacheWritten = false;
   try {
     const stream = session.promptStreaming(input.prompt, {
       signal,
@@ -87,17 +179,57 @@ export const WebBrowser_StructuredGeneration: AiProviderRunFn<
       reader.releaseLock();
     }
 
+    // Validate the *final* output. `responseConstraint` is best-effort on
+    // Chrome — if the model produces an unparseable continuation or a
+    // shape mismatch, we surface a permanent (per-attempt) error rather
+    // than fabricate a `{}` result that downstream code can't distinguish
+    // from a legitimate empty object.
     let finalObject: Record<string, unknown>;
     try {
       finalObject = JSON.parse(accumulatedJson) as Record<string, unknown>;
     } catch {
-      finalObject = (parsePartialJson(accumulatedJson) ?? {}) as Record<string, unknown>;
+      const partial = parsePartialJson(accumulatedJson);
+      if (partial === undefined) {
+        throw new PermanentJobError("Chrome AI returned unparseable JSON");
+      }
+      finalObject = partial as Record<string, unknown>;
+    }
+
+    const validation = validator.validate(finalObject);
+    if (!validation.valid) {
+      const firstError = validation.errors[0];
+      const detail = firstError?.message ?? "unknown validation error";
+      throw new PermanentJobError(`Chrome AI output failed schema validation: ${detail}`);
+    }
+
+    if (sessionId !== undefined) {
+      // Ownership of `session` transfers to the cache; the provider's
+      // `disposeSession` reclaims it at end of run.
+      setChromeSession(sessionId, {
+        session,
+        messageCount: priorMessageCount + 1,
+        schemaFingerprint: fingerprint,
+      });
+      cacheWritten = true;
     }
     emit({
       type: "finish",
       data: { object: finalObject } as StructuredGenerationTaskOutput,
     });
   } finally {
-    session.destroy();
+    // Mirror WebBrowser_Chat's cache-poison handling. If we threw before
+    // writing the cache entry and we reused a cached session, the cache
+    // entry is now poisoned (partial state); drop it (only if it still
+    // points at our handle, to avoid trampling a replacement) and destroy.
+    if (!cacheWritten) {
+      if (sessionId !== undefined && usedCachedSession) {
+        dropChromeSessionEntry(sessionId, session);
+      }
+      try {
+        session.destroy();
+      } catch {
+        // best-effort
+      }
+    }
   }
 };
diff --git a/providers/chrome-ai/src/ai/common/WebBrowser_ToolCalling.ts b/providers/chrome-ai/src/ai/common/WebBrowser_ToolCalling.ts
index 25975f7b4..83a6d87b8 100644
--- a/providers/chrome-ai/src/ai/common/WebBrowser_ToolCalling.ts
+++ b/providers/chrome-ai/src/ai/common/WebBrowser_ToolCalling.ts
@@ -10,9 +10,13 @@ import type {
   ToolCall,
   ToolCallingTaskInput,
   ToolCallingTaskOutput,
+  ToolDefinition,
 } from "@workglow/ai";
 import { buildToolDescription, filterValidToolCalls } from "@workglow/ai";
 import { uuid4 } from "@workglow/util";
+import type { JsonSchema, SchemaNode } from "@workglow/util/schema";
+import { compileSchema } from "@workglow/util/schema";
+import { getLogger } from "@workglow/util/worker";
 
 import {
   buildInitialPromptsFromHistory,
@@ -26,6 +30,12 @@ import {
   snapshotStreamToTextDeltas,
 } from "./WebBrowser_ChromeHelpers";
 import type { WebBrowserModelConfig } from "./WebBrowser_ModelSchema";
+import {
+  deleteChromeSession,
+  dropChromeSessionEntry,
+  getChromeSession,
+  setChromeSession,
+} from "./WebBrowser_Sessions";
 
 function flattenPrompt(prompt: ToolCallingTaskInput["prompt"]): string {
   if (typeof prompt === "string") return prompt;
@@ -54,24 +64,47 @@ function flattenPrompt(prompt: ToolCallingTaskInput["prompt"]): string {
 function buildToolCallPrompt(input: ToolCallingTaskInput): {
   initialPrompts: LanguageModelCreateOptions["initialPrompts"];
   promptText: string;
+  priorMessageCount: number;
 } {
   const hasMessages = Array.isArray(input.messages) && input.messages.length > 0;
   if (hasMessages) {
     const messages = input.messages as readonly ChatMessage[];
     const lastUserIdx = findLastUserIndex(messages);
     if (lastUserIdx < 0) {
-      return { initialPrompts: [], promptText: flattenPrompt(input.prompt) };
+      return {
+        initialPrompts: [],
+        promptText: flattenPrompt(input.prompt),
+        priorMessageCount: messages.length,
+      };
     }
     return {
       initialPrompts: buildInitialPromptsFromHistory(messages.slice(0, lastUserIdx)),
       promptText: messageText(messages[lastUserIdx]),
+      priorMessageCount: lastUserIdx,
     };
   }
 
   const initialPrompts: LanguageModelCreateOptions["initialPrompts"] = input.systemPrompt
     ? [{ role: "system", content: input.systemPrompt }]
     : [];
-  return { initialPrompts, promptText: flattenPrompt(input.prompt) };
+  return { initialPrompts, promptText: flattenPrompt(input.prompt), priorMessageCount: 0 };
+}
+
+/**
+ * Stable fingerprint of the tool set bound at `create()` time. Tool sets
+ * are compared by sorted name list — Chrome can't hot-swap tools per turn,
+ * so any change to the set invalidates a cached session. We intentionally
+ * don't include each tool's `inputSchema` here: if the *names* match,
+ * reuse; a schema-only edit on a same-named tool is unusual enough that
+ * the modest correctness risk is preferable to the cache thrash of hashing
+ * full schemas every turn.
+ */
+function toolsFingerprint(tools: readonly ToolDefinition[]): string {
+  return tools
+    .map((t) => t.name)
+    .filter((n): n is string => typeof n === "string" && n.length > 0)
+    .sort()
+    .join(",");
 }
 
 /**
@@ -96,12 +129,35 @@ function buildToolCallPrompt(input: ToolCallingTaskInput): {
  * `temperature` is `@deprecated` for non-extension contexts in the current
  * Chrome spec and silently ignored on the open web. Passed through anyway
  * so extension callers still get the knob.
+ *
+ * ## Session reuse
+ *
+ * When `sessionId` is provided and `input.messages` is present we *may*
+ * cache the underlying `LanguageModel`. There's a real correctness risk
+ * here: Chrome's tool-calling loop appends tool-result turns to the
+ * session's internal state opaquely. Reusing the cached session across
+ * orchestrator turns would double-feed those results once the orchestrator
+ * also re-supplies them via `messages`. To stay safe:
+ *  - We only cache when the orchestrator is driving via `input.messages`.
+ *  - Cache reuse requires that the tool set hasn't changed.
+ *  - On any error we drop and destroy the cache entry — Chrome's internal
+ *    state may be in the middle of a tool-call cycle.
+ *
+ * ## Argument validation (H3)
+ *
+ * Chrome calls `execute` with `(args)` where `args[0]` is whatever the
+ * model produced. The model can hallucinate fields that don't match the
+ * tool's `inputSchema`. We compile each tool's schema once, validate the
+ * captured arguments before passing them to `filterValidToolCalls`, and
+ * drop+log calls that fail. Tools whose `inputSchema` fails to compile
+ * fall through to name-only validation (same as today's behavior) with
+ * a single warning so a malformed schema doesn't crash the run.
  */
 export const WebBrowser_ToolCalling: AiProviderRunFn<
   ToolCallingTaskInput,
   ToolCallingTaskOutput,
   WebBrowserModelConfig
-> = async (input, _model, signal, emit) => {
+> = async (input, _model, signal, emit, _outputSchema, sessionId) => {
   const factory = getApi(
     "LanguageModel",
     typeof LanguageModel !== "undefined" ? LanguageModel : undefined
@@ -110,6 +166,24 @@ export const WebBrowser_ToolCalling: AiProviderRunFn<
 
   const capturedCalls: ToolCall[] = [];
 
+  // Compile validators once per tool. A bad schema downgrades that tool
+  // to name-only validation rather than failing the whole run — the
+  // existing `filterValidToolCalls` name check is still applied below.
+  const validators = new Map<string, SchemaNode | null>();
+  for (const td of input.tools) {
+    try {
+      validators.set(td.name, compileSchema(td.inputSchema as JsonSchema));
+    } catch (err) {
+      const msg = err instanceof Error ? err.message : String(err);
+      getLogger().warn(
+        `WebBrowser_ToolCalling: tool "${td.name}" has invalid inputSchema, ` +
+          `falling back to name-only validation — ${msg}`,
+        { toolName: td.name }
+      );
+      validators.set(td.name, null);
+    }
+  }
+
   // `toolChoice: "none"` → omit tools entirely so the model can't call any.
   // Specific tool-name choices aren't expressible in Chrome's surface; we
   // pass all tools and let the model decide.
@@ -131,16 +205,40 @@ export const WebBrowser_ToolCalling: AiProviderRunFn<
           },
         }));
 
-  const { initialPrompts, promptText } = buildToolCallPrompt(input);
+  const { initialPrompts, promptText, priorMessageCount } = buildToolCallPrompt(input);
+  const fingerprint = toolsFingerprint(input.tools);
 
-  const session = await factory.create({
-    signal,
-    temperature: input.temperature ?? undefined,
-    tools: chromeTools.length > 0 ? chromeTools : undefined,
-    initialPrompts,
-    monitor: createDownloadMonitor(emit),
-  });
+  // Safety guard: only allow cache reuse when the orchestrator drives via
+  // `input.messages`. In the bare-prompt path Chrome's session may carry
+  // opaque tool-call state from a prior turn that we can't reason about.
+  const cacheable = sessionId !== undefined && Array.isArray(input.messages);
 
+  let cached = cacheable && sessionId ? getChromeSession(sessionId) : undefined;
+  if (
+    cacheable &&
+    sessionId &&
+    cached &&
+    (cached.messageCount !== priorMessageCount || cached.toolsFingerprint !== fingerprint)
+  ) {
+    deleteChromeSession(sessionId);
+    cached = undefined;
+  }
+
+  const usedCachedSession = cached !== undefined;
+  let session: LanguageModel;
+  if (cached) {
+    session = cached.session;
+  } else {
+    session = await factory.create({
+      signal,
+      temperature: input.temperature ?? undefined,
+      tools: chromeTools.length > 0 ? chromeTools : undefined,
+      initialPrompts,
+      monitor: createDownloadMonitor(emit),
+    });
+  }
+
+  let cacheWritten = false;
   try {
     const stream = session.promptStreaming(promptText, { signal });
     // Forward text-delta and snapshot events; swallow the inner `finish`
@@ -154,14 +252,50 @@ export const WebBrowser_ToolCalling: AiProviderRunFn<
       emit(e);
     }
 
+    // Validate each captured call's `input` against its tool's compiled
+    // schema. Calls with no compiled validator (schema compile failed)
+    // skip this step and rely on the name-only check below.
+    const argValidated = capturedCalls.filter((tc) => {
+      const v = validators.get(tc.name);
+      if (!v) return true;
+      const result = v.validate(tc.input);
+      if (result.valid) return true;
+      const firstError = result.errors[0];
+      const detail = firstError?.message ?? "unknown validation error";
+      getLogger().warn(
+        `WebBrowser_ToolCalling: dropping call to "${tc.name}" — args fail inputSchema (${detail})`,
+        { callId: tc.id, toolName: tc.name }
+      );
+      return false;
+    });
+
     // Defence in depth against hallucinated tool names — same shape as
     // OpenAI/Anthropic tool-calling run-fns.
-    const validated = filterValidToolCalls(capturedCalls, input.tools);
+    const validated = filterValidToolCalls(argValidated, input.tools);
     if (validated.length > 0) {
       emit({ type: "object-delta", port: "toolCalls", objectDelta: validated });
     }
+    if (cacheable && sessionId !== undefined) {
+      // Watermark post-turn count: prior history + 1 trailing user turn +
+      // 1 assistant turn. Matches WebBrowser_Chat's convention.
+      setChromeSession(sessionId, {
+        session,
+        messageCount: priorMessageCount + 2,
+        toolsFingerprint: fingerprint,
+      });
+      cacheWritten = true;
+    }
     emit({ type: "finish", data: {} as ToolCallingTaskOutput });
   } finally {
-    session.destroy();
+    if (!cacheWritten) {
+      if (cacheable && sessionId !== undefined && usedCachedSession) {
+        dropChromeSessionEntry(sessionId, session);
+      }
+      try {
+        session.destroy();
+      } catch {
+        // best-effort
+      }
+    }
   }
 };
diff --git a/providers/chrome-ai/src/ai/index.ts b/providers/chrome-ai/src/ai/index.ts
index 04ee3f084..378edc6e0 100644
--- a/providers/chrome-ai/src/ai/index.ts
+++ b/providers/chrome-ai/src/ai/index.ts
@@ -10,7 +10,13 @@ export * from "./common/WebBrowser_Constants";
 export * from "./common/WebBrowser_ModelSchema";
 export * from "./registerWebBrowser";
 
-import { WEB_BROWSER_RUN_FN_SPECS } from "./common/WebBrowser_Capabilities";
+import {
+  CONSERVATIVE_PROBED_CAPABILITIES,
+  inferWebBrowserCapabilities,
+  inferWebBrowserCapabilitiesAsync,
+  WEB_BROWSER_RUN_FN_SPECS,
+} from "./common/WebBrowser_Capabilities";
+import { _resetProbeCache, probeWebBrowserCapabilities } from "./common/WebBrowser_CapabilityProbe";
 import {
   buildInitialPromptsFromHistory,
   findLastUserIndex,
@@ -26,6 +32,8 @@ import {
   getChromeSession,
   setChromeSession,
 } from "./common/WebBrowser_Sessions";
+import { WebBrowser_StructuredGeneration } from "./common/WebBrowser_StructuredGeneration";
+import { WebBrowser_ToolCalling } from "./common/WebBrowser_ToolCalling";
 import { WebBrowserProvider } from "./WebBrowserProvider";
 
 /**
@@ -36,6 +44,8 @@ export const _testOnly = {
   WEB_BROWSER_RUN_FN_SPECS,
   WEB_BROWSER_RUN_FNS,
   WebBrowser_TextGeneration_Unified,
+  WebBrowser_StructuredGeneration,
+  WebBrowser_ToolCalling,
   sessions: {
     getChromeSession,
     setChromeSession,
@@ -47,4 +57,11 @@ export const _testOnly = {
     findLastUserIndex,
     buildInitialPromptsFromHistory,
   },
+  probe: {
+    probeWebBrowserCapabilities,
+    inferWebBrowserCapabilities,
+    inferWebBrowserCapabilitiesAsync,
+    CONSERVATIVE_PROBED_CAPABILITIES,
+    _resetProbeCache,
+  },
 } as const;