diff --git a/CLAUDE.md b/CLAUDE.md
new file mode 100644
index 0000000..a06df30
--- /dev/null
+++ b/CLAUDE.md
@@ -0,0 +1,25 @@
+# CLAUDE.md
+
+This file provides guidance to Claude Code (claude.ai/code) when working with code in this repository.
+
+## Build Commands
+
+```bash
+cargo build --release          # Production build
+cargo make check-format        # Format check (CI runs this)
+cargo make clippy              # Lint — canonical blocking gate (== CI and bacon)
+cargo make test                # Run all tests (== CI's nextest run)
+```
+
+Always prefer `cargo make <task>` over invoking `cargo clippy`/`cargo test` directly.
+The `Makefile.toml` tasks are the canonical, `--locked` commands that CI
+(`on-push.yml`, `on-merge.yml`'s `ci-flow`) and bacon (`cargo make monitor`) all
+share byte-for-byte, so a clean local run predicts a clean CI run. Plain `cargo
+clippy`/`cargo test` skip `--locked` and can silently pass locally on a resolution
+CI would reject.
+
+- `cargo make clippy` expands to `cargo clippy --all-targets --workspace --locked
+  -- -D warnings` (see the `[tasks.clippy]` comment in `Makefile.toml`).
+- `cargo make test` expands to `cargo nextest run --locked` — CI's `ci-flow` uses
+  nextest, not the built-in test harness, so `cargo test` alone doesn't fully
+  match CI.
diff --git a/Cargo.lock b/Cargo.lock
index 3439c27..cda6a5a 100644
--- a/Cargo.lock
+++ b/Cargo.lock
@@ -897,6 +897,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index"
 checksum = "5cee35f73844aa3014bb606320a6c1f010249dbdf43342fe54b5a4f6a8ed4b79"
 dependencies = [
  "memchr",
+ "regex-automata",
  "serde_core",
 ]
 
@@ -977,9 +978,8 @@ checksum = "4f4c707c6a209cbe82d10abd08e1ea8995e9ea937d2550646e02798948992be0"
 
 [[package]]
 name = "cersei-agent"
-version = "0.1.9"
-source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "a08fc0818d95193cd88b0c48614d0c4e365575c24dd661c77bfd1e7bed2aeb24"
+version = "0.2.6"
+source = "git+https://github.com/wack/cersei.git?branch=trunk#d485d50e31c055c8a2bc7ba5d9ad03b384088c0c"
 dependencies = [
  "anyhow",
  "async-trait",
@@ -1005,9 +1005,8 @@ dependencies = [
 
 [[package]]
 name = "cersei-compression"
-version = "0.1.9"
-source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "aebaa0636dc53483e65754b84c4c3e21f925eb993bb744fcf5e0a5f5af78dff2"
+version = "0.2.6"
+source = "git+https://github.com/wack/cersei.git?branch=trunk#d485d50e31c055c8a2bc7ba5d9ad03b384088c0c"
 dependencies = [
  "anyhow",
  "once_cell",
@@ -1020,9 +1019,8 @@ dependencies = [
 
 [[package]]
 name = "cersei-embeddings"
-version = "0.1.9"
-source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "10381161084bfe0e8667956569fb84c27bbae77ce7464418be7d494d778d83ea"
+version = "0.2.6"
+source = "git+https://github.com/wack/cersei.git?branch=trunk#d485d50e31c055c8a2bc7ba5d9ad03b384088c0c"
 dependencies = [
  "async-trait",
  "futures",
@@ -1037,9 +1035,8 @@ dependencies = [
 
 [[package]]
 name = "cersei-hooks"
-version = "0.1.9"
-source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "c7ba452fa659817bd9d156bfb76012db1e7b4742166a3fd306542df285889f0a"
+version = "0.2.6"
+source = "git+https://github.com/wack/cersei.git?branch=trunk#d485d50e31c055c8a2bc7ba5d9ad03b384088c0c"
 dependencies = [
  "async-trait",
  "cersei-types",
@@ -1050,9 +1047,8 @@ dependencies = [
 
 [[package]]
 name = "cersei-lsp"
-version = "0.1.9"
-source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "242d5870423770ca850cb26e2af874bf84d2fac0f370ebc398fd9a82fa33c468"
+version = "0.2.6"
+source = "git+https://github.com/wack/cersei.git?branch=trunk#d485d50e31c055c8a2bc7ba5d9ad03b384088c0c"
 dependencies = [
  "dashmap",
  "serde",
@@ -1065,9 +1061,8 @@ dependencies = [
 
 [[package]]
 name = "cersei-mcp"
-version = "0.1.9"
-source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "82a89c0952ebfdb2001cc915150e9a65fc1400caabd34b8c3d27bd94d8d39fdc"
+version = "0.2.6"
+source = "git+https://github.com/wack/cersei.git?branch=trunk#d485d50e31c055c8a2bc7ba5d9ad03b384088c0c"
 dependencies = [
  "async-trait",
  "cersei-types",
@@ -1080,9 +1075,8 @@ dependencies = [
 
 [[package]]
 name = "cersei-memory"
-version = "0.1.9"
-source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "df45f5f8e75c59ee65f063c961be9278eb051e0b1494b4b3ea6de53c066953bc"
+version = "0.2.6"
+source = "git+https://github.com/wack/cersei.git?branch=trunk#d485d50e31c055c8a2bc7ba5d9ad03b384088c0c"
 dependencies = [
  "async-trait",
  "cersei-types",
@@ -1099,13 +1093,15 @@ dependencies = [
 
 [[package]]
 name = "cersei-provider"
-version = "0.1.9"
+version = "0.2.6"
+source = "git+https://github.com/wack/cersei.git?branch=trunk#d485d50e31c055c8a2bc7ba5d9ad03b384088c0c"
 dependencies = [
  "async-trait",
  "base64 0.22.1",
  "cersei-types",
  "chrono",
  "futures",
+ "gcp_auth",
  "reqwest 0.12.28",
  "reqwest-eventsource",
  "serde",
@@ -1117,9 +1113,8 @@ dependencies = [
 
 [[package]]
 name = "cersei-tools"
-version = "0.1.9"
-source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "2b47981f73e04abfca64959ac2a82908c08f3027f03ba080e90de8042a438720"
+version = "0.2.6"
+source = "git+https://github.com/wack/cersei.git?branch=trunk#d485d50e31c055c8a2bc7ba5d9ad03b384088c0c"
 dependencies = [
  "async-trait",
  "base64 0.22.1",
@@ -1131,7 +1126,9 @@ dependencies = [
  "dashmap",
  "dirs",
  "glob",
+ "grep",
  "html2text",
+ "ignore",
  "nix 0.29.0",
  "notify",
  "once_cell",
@@ -1159,11 +1156,11 @@ dependencies = [
 
 [[package]]
 name = "cersei-types"
-version = "0.1.9"
-source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "93b96148387c0a203671ed399209327cbfca47d5fa10425260deeaf3a0595470"
+version = "0.2.6"
+source = "git+https://github.com/wack/cersei.git?branch=trunk#d485d50e31c055c8a2bc7ba5d9ad03b384088c0c"
 dependencies = [
  "anyhow",
+ "base64 0.22.1",
  "chrono",
  "reqwest 0.12.28",
  "serde",
@@ -1409,6 +1406,12 @@ dependencies = [
  "cfg-if",
 ]
 
+[[package]]
+name = "critical-section"
+version = "1.2.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "790eea4361631c5e7d22598ecd5723ff611904e3344ce8720784c93e3d83d40b"
+
 [[package]]
 name = "crossbeam-channel"
 version = "0.5.15"
@@ -1605,6 +1608,12 @@ dependencies = [
  "parking_lot_core",
 ]
 
+[[package]]
+name = "data-encoding"
+version = "2.11.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "a4ae5f15dda3c708c0ade84bfee31ccab44a3da4f88015ed22f63732abe300c8"
+
 [[package]]
 name = "debugid"
 version = "0.8.0"
@@ -1781,12 +1790,33 @@ dependencies = [
  "cfg-if",
 ]
 
+[[package]]
+name = "encoding_rs_io"
+version = "0.1.7"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "1cc3c5651fb62ab8aa3103998dade57efdd028544bd300516baa31840c252a83"
+dependencies = [
+ "encoding_rs",
+]
+
 [[package]]
 name = "entities"
 version = "1.0.1"
 source = "registry+https://github.com/rust-lang/crates.io-index"
 checksum = "b5320ae4c3782150d900b79807611a59a99fc9a1d61d686faafc24b93fc8d7ca"
 
+[[package]]
+name = "enum-as-inner"
+version = "0.6.1"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "a1e6a265c649f3f5979b601d26f1d05ada116434c87741c9493cb56218f76cbc"
+dependencies = [
+ "heck 0.5.0",
+ "proc-macro2",
+ "quote",
+ "syn 2.0.118",
+]
+
 [[package]]
 name = "env_home"
 version = "0.1.0"
@@ -2072,6 +2102,33 @@ dependencies = [
  "slab",
 ]
 
+[[package]]
+name = "gcp_auth"
+version = "0.12.7"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "26d27dbcc645b60b8e7f6e2868a9d7102ece97d1bb49c1288b5321fcc67f7260"
+dependencies = [
+ "async-trait",
+ "base64 0.22.1",
+ "bytes",
+ "chrono",
+ "http 1.4.2",
+ "http-body-util",
+ "hyper 1.10.1",
+ "hyper-rustls 0.27.9",
+ "hyper-util",
+ "ring",
+ "rustls 0.23.41",
+ "rustls-pki-types",
+ "serde",
+ "serde_json",
+ "thiserror 2.0.18",
+ "tokio",
+ "tracing",
+ "tracing-futures",
+ "url",
+]
+
 [[package]]
 name = "generic-array"
 version = "0.14.7"
@@ -2146,6 +2203,85 @@ dependencies = [
  "regex-syntax",
 ]
 
+[[package]]
+name = "grep"
+version = "0.4.1"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "309217bc53e2c691c314389c7fa91f9cd1a998cda19e25544ea47d94103880c3"
+dependencies = [
+ "grep-cli",
+ "grep-matcher",
+ "grep-printer",
+ "grep-regex",
+ "grep-searcher",
+]
+
+[[package]]
+name = "grep-cli"
+version = "0.1.12"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "cf32d263c5d5cc2a23ce587097f5ddafdb188492ba2e6fb638eaccdc22453631"
+dependencies = [
+ "bstr",
+ "globset",
+ "libc",
+ "log",
+ "termcolor",
+ "winapi-util",
+]
+
+[[package]]
+name = "grep-matcher"
+version = "0.1.8"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "36d7b71093325ab22d780b40d7df3066ae4aebb518ba719d38c697a8228a8023"
+dependencies = [
+ "memchr",
+]
+
+[[package]]
+name = "grep-printer"
+version = "0.3.1"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "fd76035e87871f51c1ee5b793e32122b3ccf9c692662d9622ef1686ff5321acb"
+dependencies = [
+ "bstr",
+ "grep-matcher",
+ "grep-searcher",
+ "log",
+ "serde",
+ "serde_json",
+ "termcolor",
+]
+
+[[package]]
+name = "grep-regex"
+version = "0.1.14"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "0ce0c256c3ad82bcc07b812c15a45ec1d398122e8e15124f96695234db7112ef"
+dependencies = [
+ "bstr",
+ "grep-matcher",
+ "log",
+ "regex-automata",
+ "regex-syntax",
+]
+
+[[package]]
+name = "grep-searcher"
+version = "0.1.16"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "ac63295322dc48ebb20a25348147905d816318888e64f531bfc2a2bc0577dc34"
+dependencies = [
+ "bstr",
+ "encoding_rs",
+ "encoding_rs_io",
+ "grep-matcher",
+ "log",
+ "memchr",
+ "memmap2",
+]
+
 [[package]]
 name = "h2"
 version = "0.3.27"
@@ -2262,6 +2398,52 @@ version = "0.4.3"
 source = "registry+https://github.com/rust-lang/crates.io-index"
 checksum = "7f24254aa9a54b5c858eaee2f5bccdb46aaf0e486a595ed5fd8f86ba55232a70"
 
+[[package]]
+name = "hickory-proto"
+version = "0.25.2"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "f8a6fe56c0038198998a6f217ca4e7ef3a5e51f46163bd6dd60b5c71ca6c6502"
+dependencies = [
+ "async-trait",
+ "cfg-if",
+ "data-encoding",
+ "enum-as-inner",
+ "futures-channel",
+ "futures-io",
+ "futures-util",
+ "idna",
+ "ipnet",
+ "once_cell",
+ "rand 0.9.4",
+ "ring",
+ "thiserror 2.0.18",
+ "tinyvec",
+ "tokio",
+ "tracing",
+ "url",
+]
+
+[[package]]
+name = "hickory-resolver"
+version = "0.25.2"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "dc62a9a99b0bfb44d2ab95a7208ac952d31060efc16241c87eaf36406fecf87a"
+dependencies = [
+ "cfg-if",
+ "futures-util",
+ "hickory-proto",
+ "ipconfig",
+ "moka",
+ "once_cell",
+ "parking_lot",
+ "rand 0.9.4",
+ "resolv-conf",
+ "smallvec",
+ "thiserror 2.0.18",
+ "tokio",
+ "tracing",
+]
+
 [[package]]
 name = "hmac"
 version = "0.13.0"
@@ -2719,6 +2901,19 @@ dependencies = [
  "web-sys",
 ]
 
+[[package]]
+name = "ipconfig"
+version = "0.3.4"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "4d40460c0ce33d6ce4b0630ad68ff63d6661961c48b6dba35e5a4d81cfb48222"
+dependencies = [
+ "socket2 0.6.4",
+ "widestring",
+ "windows-registry",
+ "windows-result",
+ "windows-sys 0.61.2",
+]
+
 [[package]]
 name = "ipnet"
 version = "2.12.0"
@@ -3125,6 +3320,23 @@ dependencies = [
  "syn 2.0.118",
 ]
 
+[[package]]
+name = "moka"
+version = "0.12.15"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "957228ad12042ee839f93c8f257b62b4c0ab5eaae1d4fa60de53b27c9d7c5046"
+dependencies = [
+ "crossbeam-channel",
+ "crossbeam-epoch",
+ "crossbeam-utils",
+ "equivalent",
+ "parking_lot",
+ "portable-atomic",
+ "smallvec",
+ "tagptr",
+ "uuid",
+]
+
 [[package]]
 name = "multi-core"
 version = "0.1.2"
@@ -3386,6 +3598,10 @@ name = "once_cell"
 version = "1.21.4"
 source = "registry+https://github.com/rust-lang/crates.io-index"
 checksum = "9f7c3e4beb33f85d45ae3e3a1792185706c8e16d043238c593331cc7cd313b50"
+dependencies = [
+ "critical-section",
+ "portable-atomic",
+]
 
 [[package]]
 name = "once_cell_polyfill"
@@ -3951,6 +4167,12 @@ version = "0.3.33"
 source = "registry+https://github.com/rust-lang/crates.io-index"
 checksum = "19f132c84eca552bf34cab8ec81f1c1dcc229b811638f9d283dceabe58c5569e"
 
+[[package]]
+name = "portable-atomic"
+version = "1.13.1"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "c33a9471896f1c69cecef8d20cbe2f7accd12527ce60845ff44c153bb2a21b49"
+
 [[package]]
 name = "potential_utf"
 version = "0.1.5"
@@ -4415,6 +4637,7 @@ dependencies = [
  "futures-core",
  "futures-util",
  "h2 0.4.15",
+ "hickory-resolver",
  "http 1.4.2",
  "http-body 1.0.1",
  "http-body-util",
@@ -4427,6 +4650,7 @@ dependencies = [
  "mime",
  "mime_guess",
  "native-tls",
+ "once_cell",
  "percent-encoding",
  "pin-project-lite",
  "quinn",
@@ -4467,6 +4691,12 @@ dependencies = [
  "thiserror 1.0.69",
 ]
 
+[[package]]
+name = "resolv-conf"
+version = "0.7.6"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "1e061d1b48cb8d38042de4ae0a7a6401009d6143dc80d2e2d6f31f0bdd6470c7"
+
 [[package]]
 name = "ring"
 version = "0.17.14"
@@ -5346,6 +5576,12 @@ dependencies = [
  "libc",
 ]
 
+[[package]]
+name = "tagptr"
+version = "0.2.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "7b2093cf4c8eb1e67749a6762251bc9cd836b6fc171623bd0a9d324d37af2417"
+
 [[package]]
 name = "tantivy"
 version = "0.22.1"
@@ -5904,6 +6140,16 @@ dependencies = [
  "valuable",
 ]
 
+[[package]]
+name = "tracing-futures"
+version = "0.2.5"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "97d095ae15e245a057c8e8451bab9b3ee1e1f68e9ba2b4fbc18d0ac5237835f2"
+dependencies = [
+ "pin-project",
+ "tracing",
+]
+
 [[package]]
 name = "tracing-log"
 version = "0.2.0"
@@ -5979,9 +6225,9 @@ dependencies = [
 
 [[package]]
 name = "tree-sitter"
-version = "0.25.10"
+version = "0.26.10"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "78f873475d258561b06f1c595d93308a7ed124d9977cb26b148c2084a4a3cc87"
+checksum = "3c343ed63e3f5c64d1acdecb5d2c13d4e169cb5fde0052106ebaa6c6f27f9e55"
 dependencies = [
  "cc",
  "regex",
@@ -6381,6 +6627,12 @@ dependencies = [
  "winsafe",
 ]
 
+[[package]]
+name = "widestring"
+version = "1.2.1"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "72069c3113ab32ab29e5584db3c6ec55d416895e60715417b5b883a357c3e471"
+
 [[package]]
 name = "winapi"
 version = "0.3.9"
diff --git a/Cargo.toml b/Cargo.toml
index 39d4562..79fbdb9 100644
--- a/Cargo.toml
+++ b/Cargo.toml
@@ -14,15 +14,6 @@ path = "src/bin/main.rs"
 [workspace]
 members = ["crates/multi-core"]
 
-# MultiTool local patch (MULTI-1367). cersei-provider 0.1.9 unconditionally sends
-# a stale `anthropic-beta: interleaved-thinking-2025-04-14` header that the
-# current Anthropic API rejects with HTTP 400, breaking every in-process check.
-# Vendor the crate with that header corrected until upstream releases a fix.
-# See third_party/cersei-provider/PATCH.md and
-# https://github.com/pacifio/cersei/issues/20.
-[patch.crates-io]
-cersei-provider = { path = "third_party/cersei-provider" }
-
 [workspace.package]
 authors = ["The MultiTool Team"]
 edition = "2024"
@@ -39,7 +30,7 @@ aws-smithy-runtime-api = "1.7.4"
 aws-smithy-types = "1.2.9"
 bigdecimal = { version = "0.4.7", features = ["serde-json"] }
 bon = "3.3.2"
-cersei-provider = "0.1.9"
+cersei-provider = { git = "https://github.com/wack/cersei.git", branch = "trunk" }
 chrono = "0.4.38"
 clap = { version = "4.3", features = ["derive", "env"] }
 console = "0.15.8"
@@ -91,9 +82,9 @@ tokio-util.workspace = true
 comrak = { version = "0.52", default-features = false }
 libc = "0.2.186"
 tempfile = "3"
-cersei-agent = "0.1.9"
-cersei-tools = "0.1.9"
-cersei-types = "0.1.9"
+cersei-agent = { git = "https://github.com/wack/cersei.git", branch = "trunk" }
+cersei-tools = { git = "https://github.com/wack/cersei.git", branch = "trunk" }
+cersei-types = { git = "https://github.com/wack/cersei.git", branch = "trunk" }
 
 [workspace.dependencies]
 pretty_assertions = "1.4"
diff --git a/src/utils/mod.rs b/src/utils/mod.rs
index b26ebcc..d584267 100644
--- a/src/utils/mod.rs
+++ b/src/utils/mod.rs
@@ -18,7 +18,7 @@ pub async fn load_default_aws_config() -> &'static SdkConfig {
 async fn load_config() -> SdkConfig {
     // We don't need a particular version, but we pin to one to ensure
     // it doesn't accidently slip if `latest` gets updated without our knowledge.
-    let behavior = BehaviorVersion::v2025_08_07();
+    let behavior = BehaviorVersion::v2026_01_12();
     aws_config::load_defaults(behavior).await
 }
 
diff --git a/third_party/cersei-provider/Cargo.toml b/third_party/cersei-provider/Cargo.toml
deleted file mode 100644
index a901cd6..0000000
--- a/third_party/cersei-provider/Cargo.toml
+++ /dev/null
@@ -1,76 +0,0 @@
-# THIS FILE IS AUTOMATICALLY GENERATED BY CARGO
-#
-# When uploading crates to the registry Cargo will automatically
-# "normalize" Cargo.toml files for maximal compatibility
-# with all versions of Cargo and also rewrite `path` dependencies
-# to registry (e.g., crates.io) dependencies.
-#
-# If you are reading this file be aware that the original Cargo.toml
-# will likely look very different (and much more reasonable).
-# See Cargo.toml.orig for the original contents.
-
-[package]
-edition = "2021"
-name = "cersei-provider"
-version = "0.1.9"
-authors = ["Adib Mohsin"]
-build = false
-autolib = false
-autobins = false
-autoexamples = false
-autotests = false
-autobenches = false
-description = "Provider trait and built-in LLM providers for the Cersei SDK"
-readme = "README.md"
-license = "MIT"
-repository = "https://github.com/pacifio/cersei"
-
-[lib]
-name = "cersei_provider"
-path = "src/lib.rs"
-
-[dependencies.async-trait]
-version = "0.1"
-
-[dependencies.base64]
-version = "0.22"
-
-[dependencies.cersei-types]
-version = "0.1.9"
-
-[dependencies.chrono]
-version = "0.4"
-features = ["serde"]
-
-[dependencies.futures]
-version = "0.3"
-
-[dependencies.reqwest]
-version = "0.12"
-features = [
-    "json",
-    "stream",
-    "rustls-tls",
-    "rustls-tls-webpki-roots",
-]
-default-features = false
-
-[dependencies.reqwest-eventsource]
-version = "0.6"
-
-[dependencies.serde]
-version = "1"
-features = ["derive"]
-
-[dependencies.serde_json]
-version = "1"
-
-[dependencies.tokio]
-version = "1.44"
-features = ["full"]
-
-[dependencies.tracing]
-version = "0.1"
-
-[dependencies.url]
-version = "2"
diff --git a/third_party/cersei-provider/PATCH.md b/third_party/cersei-provider/PATCH.md
deleted file mode 100644
index 9f6549e..0000000
--- a/third_party/cersei-provider/PATCH.md
+++ /dev/null
@@ -1,50 +0,0 @@
-# Vendored `cersei-provider` (local patch)
-
-This is a vendored copy of [`cersei-provider`](https://crates.io/crates/cersei-provider)
-`0.1.9`, applied via `[patch.crates-io]` in the workspace `Cargo.toml`.
-
-## Why
-
-Upstream `0.1.9` (and `main` as of 2026-06) sends this header on **every**
-Anthropic request, unconditionally:
-
-```
-anthropic-beta: interleaved-thinking-2025-04-14,token-efficient-tools-2025-02-19
-```
-
-The current Anthropic API rejects `interleaved-thinking-2025-04-14` with:
-
-```
-HTTP 400 invalid_request_error: Unexpected value(s) `interleaved-thinking-2025-04-14`
-for the `anthropic-beta` header.
-```
-
-Because the header is not gated on whether thinking is enabled, this breaks
-**every** request — making the in-process check executor (MULTI-1367) unusable
-against Anthropic. There is no builder/config knob to disable it.
-
-Upstream issue: https://github.com/pacifio/cersei/issues/20
-
-## The change
-
-`src/anthropic.rs`: `ANTHROPIC_BETA_HEADER` no longer includes the stale
-`interleaved-thinking-2025-04-14` value. Only the still-accepted
-`token-efficient-tools-2025-02-19` beta is sent. Extended thinking continues to
-work through the `thinking` request-body parameter (which is GA and needs no beta
-header). This is the single, localized diff from upstream `0.1.9`.
-
-## Removing this patch
-
-Delete `third_party/cersei-provider/`, drop the `[patch.crates-io]` block in the
-workspace `Cargo.toml`, and bump `cersei-provider` to the first upstream release
-that corrects (or makes configurable) the `anthropic-beta` header
-(https://github.com/pacifio/cersei/issues/20).
-
-## Related
-
-Extended thinking is also left disabled in the check executor because
-cersei-provider drops Anthropic thinking-block signatures off the stream
-(`signature_delta`), so thinking blocks round-trip with an empty signature and
-the API rejects them on the second turn. Tracked separately at
-https://github.com/pacifio/cersei/issues/21; see `effort_temperature` in
-`src/checks/executor/cersei.rs`.
diff --git a/third_party/cersei-provider/README.md b/third_party/cersei-provider/README.md
deleted file mode 100644
index b4e2ebf..0000000
--- a/third_party/cersei-provider/README.md
+++ /dev/null
@@ -1,526 +0,0 @@
-# Cersei
-
-The complete Rust SDK for building coding agents.
-
-Cersei gives you every building block of a production coding agent — tool execution, LLM streaming, sub-agent orchestration, persistent memory, skills, MCP integration — as composable library functions. Build a Claude Code replacement, embed an agent in your app, or create something entirely new.
-
-```rust
-use cersei::prelude::*;
-
-#[tokio::main]
-async fn main() -> anyhow::Result<()> {
-    let output = Agent::builder()
-        .provider(Anthropic::from_env()?)
-        .tools(cersei::tools::coding())
-        .permission_policy(AllowAll)
-        .run_with("Fix the failing tests in src/")
-        .await?;
-
-    println!("{}", output.text());
-    Ok(())
-}
-```
-
-**MIT License** | Built by [Adib Mohsin](https://github.com/pacifio) | [Docs](https://cersei.pacifio.dev/docs) | [GitHub](https://github.com/pacifio/cersei)
-
----
-
-## Why Cersei
-
-| | Claude Code | OpenCode | **Cersei SDK** | **Abstract CLI** |
-|---|---|---|---|---|
-| Form factor | CLI app | CLI app | **Library** | **CLI app** |
-| Embeddable | No | No | **Yes** | No (uses SDK) |
-| Provider | Anthropic only | Multi-provider | **Multi-provider** | **Multi-provider** |
-| Language | TypeScript | TypeScript | **Rust** | **Rust** |
-| Custom tools | Plugins | Plugins | **`impl Tool` / `#[derive(Tool)]`** | Via SDK |
-| Startup | ~269ms | ~300ms | N/A (library) | **~34ms** |
-| Binary / RSS | 174MB / 330MB | — | N/A | **5.8MB / 4.9MB** |
-| Memory | File-based | SQLite | **File + Graph** | **File + Graph** |
-| Skills | `.claude/commands/` | `.claude/skills/` | **Both formats** | **Both formats** |
-
-Cersei is built from the architecture of Claude Code (reverse-engineered Rust port) and designed so that anyone can build a complete, drop-in replacement for Claude Code, OpenCode, or any coding agent — as a library call.
-
----
-
-## Abstract — The CLI
-
-**Abstract** is a complete CLI coding agent built on Cersei. One binary, zero runtime dependencies, graph memory by default.
-
-```bash
-# Install
-cargo install --path crates/abstract-cli
-
-# Use
-abstract                           # Interactive REPL
-abstract "fix the failing tests"   # Single-shot
-abstract --resume                  # Resume last session
-abstract --model opus --max        # Opus with max thinking
-abstract --no-permissions --json   # CI mode with NDJSON output
-```
-
-### Abstract vs Claude Code
-
-All numbers from `run_tool_bench.sh --full`.
-
-| Metric | Abstract | Claude Code | Winner |
-|--------|----------|-------------|--------|
-| Startup (warm) | **32ms** | 266ms | Abstract (8.2x) |
-| Binary size | **6.0 MB** | 174 MB | Abstract (29x) |
-| Memory (RSS) | **4.9 MB** | 333 MB | Abstract (68x) |
-| Tool dispatch | **0.02-17ms** | 5-265ms+ | Abstract |
-| Memory recall | **98us** (graph) | 7,545ms (LLM) | Abstract (77,000x) |
-| Memory write | **30us** (graph) | 20,687ms (agent) | Abstract (689,000x) |
-| MEMORY.md load | **9.6us** | 17.1ms | Abstract (1,781x) |
-| Sequential throughput | **906ms/req** | 12,079ms/req | Abstract (13.3x) |
-| System prompt tokens | **~2,200** | ~8,000+ | Abstract (3.6x fewer) |
-| LLM call for recall | **Not needed** | Required (Sonnet) | Abstract |
-
-> Claude Code's memory recall calls Sonnet every turn to rank the top 5 files by relevance (7.5s measured).
-> Abstract's graph does indexed lookups in 98 microseconds — same capability, no LLM call, no API cost.
-
-Full benchmark: [`crates/abstract-cli/benchmarks/REPORT.md`](crates/abstract-cli/benchmarks/REPORT.md)
-
-### Features
-
-- 34 built-in tools (file, shell, web, planning, orchestration, scheduling)
-- Multi-provider: Anthropic + OpenAI (+ Ollama, Azure, vLLM)
-- Graph memory (Grafeo) on by default
-- Auto-compact, auto-dream, effort levels (Low/Medium/High/Max)
-- MCP server support
-- Session persistence (Claude Code-compatible JSONL)
-- Interactive permissions with session caching
-- 12 slash commands (`/help`, `/commit`, `/review`, `/memory`, `/model`, `/diff`, etc.)
-- Streaming markdown rendering with syntax highlighting
-- TOML config: `~/.abstract/config.toml` + `.abstract/config.toml`
-- JSON output mode for piping (`--json`)
-
----
-
-## Install
-
-```toml
-[dependencies]
-cersei = { git = "https://github.com/pacifio/cersei" }
-tokio = { version = "1", features = ["full"] }
-anyhow = "1"
-```
-
-For graph-backed memory (optional):
-```toml
-cersei-memory = { git = "https://github.com/pacifio/cersei", features = ["graph"] }
-```
-
----
-
-## Architecture
-
-```
-cersei                    Facade crate — use cersei::prelude::*;
-  cersei-types            Provider-agnostic messages, errors, stream events
-  cersei-provider         Provider trait + Anthropic/OpenAI implementations
-  cersei-tools            30+ tools, permissions, bash classifier, skills, git utils
-  cersei-tools-derive     #[derive(Tool)] proc macro
-  cersei-agent            Agent builder, agentic loop, compact, coordinator, effort
-  cersei-memory           Memory trait, memdir, CLAUDE.md, sessions, Grafeo graph
-  cersei-hooks            Hook/middleware system
-  cersei-mcp              MCP client (JSON-RPC 2.0, stdio transport)
-abstract-cli              CLI coding agent ("abstract") — REPL, commands, config, permissions
-```
-
----
-
-## Core Concepts
-
-### Provider
-
-Any LLM backend. Built-in: Anthropic (with OAuth), OpenAI (compatible with Ollama, Azure, vLLM).
-
-```rust
-Agent::builder().provider(Anthropic::from_env()?)           // Anthropic API key
-Agent::builder().provider(OpenAi::builder()
-    .base_url("http://localhost:11434/v1")                   // Ollama
-    .model("llama3.1:70b").api_key("ollama").build()?)
-Agent::builder().provider(MyCustomProvider)                  // impl Provider
-```
-
-### Tools (30+)
-
-Every tool a coding agent needs, organized into sets:
-
-```rust
-cersei::tools::all()           // 30+ tools
-cersei::tools::coding()        // filesystem + shell + web
-cersei::tools::filesystem()    // Read, Write, Edit, Glob, Grep, NotebookEdit
-cersei::tools::shell()         // Bash, PowerShell
-cersei::tools::web()           // WebFetch, WebSearch
-cersei::tools::planning()      // EnterPlanMode, ExitPlanMode, TodoWrite
-cersei::tools::scheduling()    // CronCreate/List/Delete, Sleep, RemoteTrigger
-cersei::tools::orchestration() // SendMessage, Tasks (6 tools), Worktree
-```
-
-Custom tools in 10 lines:
-
-> The `#[derive(Tool)]` macro generates code with `#[async_trait::async_trait]` and `cercei-tools`, to make it work add both of it to depending on your project.
->  ```toml
-> async-trait = "0.1"
-> cersei = { path = "path/to/cersei" } # or git
-> cersei-tools = { path = "path/to/cersei/crates/cersei-tools" }
-> ```
-> or write `use cersei::tools as cersei_tools;` when using `derive(Tool)`;
-    
-
-```rust
-#[derive(Tool)]
-#[tool(name = "search", description = "Search docs", permission = "read_only")]
-struct SearchTool;
-
-#[async_trait]
-impl ToolExecute for SearchTool {
-    type Input = SearchInput; // derives Deserialize + JsonSchema
-    async fn run(&self, input: SearchInput, ctx: &ToolContext) -> ToolResult {
-        ToolResult::success(format!("Found: {}", input.query))
-    }
-}
-```
-
-### Sub-Agent Orchestration
-
-Spawn parallel workers, coordinate tasks, pass messages between agents:
-
-```rust
-// AgentTool — model spawns sub-agents autonomously
-Agent::builder()
-    .tool(AgentTool::new(|| Box::new(Anthropic::from_env()?), cersei::tools::coding()))
-
-// Coordinator mode — orchestrate parallel workers
-Agent::builder()
-    .tools(cersei::tools::all())  // includes Agent, Tasks, SendMessage
-    // Workers get filtered tools (no Agent — prevents recursion)
-
-// Task system
-// TaskCreate → TaskUpdate → TaskGet → TaskList → TaskStop → TaskOutput
-```
-
-### Memory (Three-Tier)
-
-```rust
-use cersei::memory::manager::MemoryManager;
-
-let mm = MemoryManager::new(project_root)
-    .with_graph(Path::new("./memory.grafeo"))?;  // optional graph layer
-
-// Tier 1: Flat files (~/.claude/projects/<root>/memory/)
-let metas = mm.scan();                    // scan .md files with frontmatter
-let content = mm.build_context();         // build system prompt injection
-
-// Tier 2: CLAUDE.md hierarchy (managed > user > project > local)
-// Automatically merged into build_context()
-
-// Tier 3: Graph memory (Grafeo, optional)
-let id = mm.store_memory("User prefers Rust", MemoryType::User, 0.9)?;
-mm.tag_memory(&id, "preferences");
-let results = mm.recall("Rust", 5);       // graph query with fallback to text match
-
-// Session persistence (JSONL, append-only, tombstone soft-delete)
-mm.write_user_message("session-1", Message::user("Hello"))?;
-let messages = mm.load_session_messages("session-1")?;
-```
-
-### Skills (Claude Code + OpenCode Compatible)
-
-```rust
-// Auto-discovers skills from:
-//   .claude/commands/*.md      (Claude Code format)
-//   .claude/skills/*/SKILL.md  (OpenCode format)
-//   ~/.claude/commands/*.md    (user-level)
-//   Bundled skills             (simplify, debug, commit, verify, stuck, remember, loop)
-
-let skill_tool = SkillTool::new().with_project_root(".");
-// skill="list" → lists all available skills
-// skill="debug" args="tests are flaky" → expands $ARGUMENTS template
-```
-
-### Realtime Events
-
-Three observation mechanisms:
-
-```rust
-// 1. Callback
-Agent::builder().on_event(|e| match e {
-    AgentEvent::TextDelta(t) => print!("{}", t),
-    AgentEvent::ToolStart { name, .. } => eprintln!("[{}]", name),
-    _ => {}
-})
-
-// 2. Broadcast (multi-consumer)
-let agent = Agent::builder().enable_broadcast(256).build()?;
-let mut rx = agent.subscribe().unwrap();
-tokio::spawn(async move { while let Ok(e) = rx.recv().await { /* ... */ } });
-
-// 3. Stream (bidirectional control)
-let mut stream = agent.run_stream("Deploy");
-while let Some(e) = stream.next().await {
-    if let AgentEvent::PermissionRequired(req) = e {
-        stream.respond_permission(req.id, PermissionDecision::Allow);
-    }
-}
-```
-
-### Context Management
-
-```rust
-Agent::builder()
-    .auto_compact(true)          // summarize old messages at 90% context usage
-    .compact_threshold(0.9)      // trigger threshold
-    .tool_result_budget(50_000)  // truncate oldest tool results above 50K chars
-    .thinking_budget(8192)       // extended thinking tokens
-    .effort(EffortLevel::High)   // Low/Medium/High/Max
-```
-
-### MCP (Model Context Protocol)
-
-```rust
-let mcp = McpManager::connect(&[
-    McpServerConfig::stdio("db", "npx", &["-y", "@my/db-mcp"]),
-    McpServerConfig::sse("docs", "https://mcp.example.com"),
-]).await?;
-
-Agent::builder().tools(mcp.tool_definitions().await)
-```
-
-### OAuth (Anthropic Native)
-
-```rust
-// Opens browser, PKCE flow, token storage, refresh
-cargo run --example oauth_login
-```
-
----
-
-## Agent Builder — Complete API
-
-```rust
-Agent::builder()
-    // Provider (required)
-    .provider(Anthropic::from_env()?)
-
-    // Tools
-    .tool(MyTool)
-    .tools(cersei::tools::coding())
-
-    // Model & generation
-    .model("claude-sonnet-4-6")
-    .max_turns(10)
-    .max_tokens(16384)
-    .temperature(0.7)
-    .thinking_budget(8192)
-
-    // Prompt
-    .system_prompt("You are a helpful assistant.")
-    .append_system_prompt("Extra context.")
-
-    // Environment
-    .working_dir("./my-project")
-    .permission_policy(AllowAll)          // or AllowReadOnly, DenyAll, RuleBased, Interactive
-
-    // Memory
-    .memory(JsonlMemory::new("./sessions"))
-    .session_id("my-session")
-
-    // Hooks & events
-    .hook(CostGuard { max_usd: 5.0 })
-    .on_event(|e| { /* ... */ })
-    .enable_broadcast(256)
-    .reporter(ConsoleReporter { verbose: true })
-
-    // Context management
-    .auto_compact(true)
-    .compact_threshold(0.9)
-    .tool_result_budget(50_000)
-
-    // Execute
-    .build()?                             // -> Agent
-    .run_with("Fix the tests")            // -> AgentOutput (shorthand)
-```
-
----
-
-## Benchmarks
-
-Measured on Apple Silicon, release build, 100 iterations with 3 warmup runs.
-
-### Tool I/O
-
-| Tool | Avg | Min | Max |
-|------|-----|-----|-----|
-| Edit | 0.04ms | 0.02ms | 0.05ms |
-| Glob | 0.05ms | 0.05ms | 0.07ms |
-| Write | 0.09ms | 0.07ms | 0.11ms |
-| Read | 0.09ms | 0.08ms | 0.11ms |
-| Grep | 5.85ms | 5.34ms | 8.51ms |
-| Bash | 15.64ms | 14.50ms | 16.19ms |
-
-### vs Claude Code CLI
-
-> **Note:** Cersei is a library — tool dispatch happens in-process. Claude Code is a CLI where
-> each sub-agent fork pays full startup cost. These are different layers; the comparison below
-> shows the gap between in-process dispatch and CLI process overhead.
-
-| Metric | Cersei (SDK) | Claude Code (CLI) | Notes |
-|--------|-------------|-------------------|-------|
-| Tool dispatch (Read) | 0.09ms | ~5-15ms (est.) | In-process vs Node.js fs |
-| CLI startup | N/A (library) | 269ms | Claude `--version` warm avg |
-| Sub-agent spawn | ~1ms (in-process) | ~300ms (fork) | Agent tool overhead |
-
-For an apples-to-apples CLI comparison, see [Abstract CLI benchmarks](crates/abstract-cli/benchmarks/REPORT.md).
-
-### Memory I/O
-
-| Operation | Abstract (Cersei) | Claude Code (measured) | Ratio |
-|-----------|------------------|----------------------|-------|
-| Scan 100 files | **1.2ms** | 26.6ms (`find`) | 22x |
-| Load MEMORY.md | **9.6μs** | 17.1ms | 1,781x |
-| Memory recall (graph) | **98μs** | 7,545ms (LLM call) | 77,000x |
-| Memory recall (text) | **1.3ms** | 17.5ms (`grep`) | 13x |
-| Session write | **27μs/entry** | N/A | — |
-| Session load (100) | **268μs** | N/A | — |
-| Graph store | **30μs/node** | N/A (no graph) | — |
-| Topic query | **77μs** | N/A (no graph) | — |
-
-### Benchmark suites
-
-Each bench lives in its own self-contained directory with its own runner and result schema. Add new benches as siblings.
-
-| Suite | Path | What it measures | Runner |
-|---|---|---|---|
-| **General-agent frameworks** | [`bench/general-agents/`](bench/general-agents/) | Per-agent memory, instantiation time, max concurrent agents — Cersei vs Agno / PydanticAI / LangGraph / CrewAI. | `./bench/general-agents/run.sh` |
-| **Terminal Bench 2.0** | [`bench/term-bench/`](bench/term-bench/) | End-to-end coding tasks inside Daytona sandboxes using the full `abstract` CLI (Linux x86_64 / arm64 binaries shipped in-tree). | `./bench/term-bench/run.sh` |
-| **LongMemEval (long-term memory)** | [`bench/long-mem/`](bench/long-mem/) | Recall accuracy on the ICLR-25 LongMemEval 500-question benchmark — head-to-head vs Mastra / Zep / Supermemory with identical prompts and LLM-as-judge rubric. Four Cersei configs: full-context baseline, usearch-HNSW semantic, grafeo-graph substring, hybrid w/ LLM fact extraction + RRF fusion. | `cargo run --release -p longmem-bench -- --dataset s --config all` |
-| **Compression (real LLMs)** | `crates/cersei-agent/tests/e2e_openai_compression.rs` | Input-token savings from `cersei-compression` on OpenAI (`gpt-4o-mini`) and Gemini (`gemini-2.5-flash`). `#[ignore]`, runs with real API keys. | `cargo test -p cersei-agent --test e2e_openai_compression -- --ignored --nocapture` |
-| **SDK Tool I/O** | `examples/benchmark_io.rs` | In-process tool dispatch latency for Read / Write / Edit / Grep / Bash / Glob. | `cargo run --example benchmark_io --release` |
-| **SDK Memory I/O** | `crates/abstract-cli/examples/memory_bench.rs` | Graph-memory vs filesystem vs Claude Code-style paths. | `cargo run -p abstract-cli --example memory_bench --release` |
-| **vs Claude Code CLI** | `run_tool_bench_claude.sh` · `run_tool_bench_codex.sh` | CLI-vs-CLI startup, memory, and dispatch overhead. | `./run_tool_bench.sh --iterations 20 --full` |
-
-### Run benchmarks
-
-```bash
-# Rust-side SDK benches (no external services)
-cargo run --example benchmark_io --release
-cargo run --release -p abstract-cli --example memory_bench
-
-# vs Claude Code / Codex CLIs
-./run_tool_bench.sh --iterations 20 --full
-
-# Python-harness benches (uv-managed; each dir self-contained)
-./bench/general-agents/run.sh          # Cersei vs Agno / PydanticAI / LangGraph / CrewAI
-./bench/term-bench/run.sh              # Terminal Bench 2.0 via Daytona
-
-# LongMemEval memory benchmark (head-to-head vs Mastra / Zep / Supermemory)
-./bench/long-mem/setup.sh              # downloads oracle + s datasets
-OPENAI_API_KEY=sk-… cargo run --release -p longmem-bench -- \
-  --dataset s --config all --concurrency 8
-
-# Real-LLM compression savings (requires API keys)
-OPENAI_API_KEY=sk-… cargo test -p cersei-agent \
-  --test e2e_openai_compression -- --ignored --nocapture
-```
-
----
-
-## Stress Tests
-
-```bash
-cargo run --example stress_core_infrastructure --release  # system prompt, compact, context, bash classifier
-cargo run --example stress_tools --release                 # all 30+ tools, registry, performance
-cargo run --example stress_orchestration --release         # sub-agents, coordinator, tasks, messaging
-cargo run --example stress_skills --release                # bundled + disk skills, Claude Code + OpenCode format
-cargo run --example stress_memory --release                # memdir, CLAUDE.md, sessions, extraction, auto-dream
-```
-
----
-
-## Examples
-
-| Example | Description |
-|---------|-------------|
-| [`simple_agent`](examples/simple_agent.rs) | Minimal agent in 3 lines |
-| [`custom_tools`](examples/custom_tools.rs) | Define and register custom tools |
-| [`streaming_events`](examples/streaming_events.rs) | Real-time `run_stream()` with colored output |
-| [`multi_listener`](examples/multi_listener.rs) | Broadcast channel with multiple consumers |
-| [`resumable_session`](examples/resumable_session.rs) | Persist and resume with `JsonlMemory` |
-| [`custom_provider`](examples/custom_provider.rs) | Echo provider + OpenAI-compatible endpoints |
-| [`hooks_middleware`](examples/hooks_middleware.rs) | Cost guard + audit logger + tool blocker |
-| [`benchmark_io`](examples/benchmark_io.rs) | Full I/O benchmark suite |
-| [`usage_report`](examples/usage_report.rs) | Token/cost tracking and billing estimates |
-| [`coding_agent`](examples/coding_agent.rs) | Build a Python todo CLI (end-to-end) |
-| [`oauth_login`](examples/oauth_login.rs) | Anthropic OAuth PKCE login flow |
-
-```bash
-cargo run --example simple_agent --release
-```
-
----
-
-## Test Suite
-
-```bash
-# Run all 160 unit tests
-cargo test --workspace
-
-# Run with graph memory (requires grafeo)
-cargo test --workspace --features graph
-
-# Run specific crate
-cargo test -p cersei-tools
-cargo test -p cersei-agent
-cargo test -p cersei-memory
-cargo test -p cersei-mcp
-```
-
-**160 unit tests** | **262 stress checks** | **0 failures** | **Zero I/O regression**
-
----
-
-## Extension Points
-
-| What | How | Example |
-|------|-----|---------|
-| Custom provider | `impl Provider` | Local LLM, Azure, Bedrock |
-| Custom tool | `#[derive(Tool)]` or `impl Tool` | DB query, deploy, search |
-| Custom permissions | `impl PermissionPolicy` | RBAC, OAuth-scoped |
-| Custom memory | `impl Memory` | PostgreSQL, Redis, S3 |
-| Custom hooks | `impl Hook` | Cost gating, audit logging |
-| Custom reporters | `impl Reporter` | Dashboards, WebSocket relay |
-| MCP servers | `McpServerConfig` via builder | Any MCP-compatible server |
-| Skills | `.claude/commands/*.md` | Custom prompt templates |
-| Graph memory | `features = ["graph"]` | Grafeo relationship tracking |
-
----
-
-## Documentation
-
-**[cersei.pacifio.dev/docs](https://cersei.pacifio.dev/docs)** — full docs with API reference, architecture, cookbooks, benchmarks, and llms.txt support.
-
-| Section | Content |
-|---------|---------|
-| [Quick Start](https://cersei.pacifio.dev/docs/quick-start) | First agent in 10 lines |
-| [API Reference](https://cersei.pacifio.dev/docs/api-agent) | Agent, Provider, Tools, Memory, Hooks, MCP |
-| [Architecture](https://cersei.pacifio.dev/docs/architecture) | Crate map, data flow, design principles |
-| [Cookbooks](https://cersei.pacifio.dev/docs/cookbook-custom-tools) | Custom tools, deployment, embedding |
-| [Abstract CLI](https://cersei.pacifio.dev/docs/abstract) | Reference CLI built on Cersei |
-| [Benchmarks](https://cersei.pacifio.dev/docs/bench-vs-claude-code) | vs Claude Code vs Codex |
-
----
-
-## License
-
-MIT License
-
-Copyright (c) 2025 Adib Mohsin
-
-Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the "Software"), to deal in the Software without restriction, including without limitation the rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and to permit persons to whom the Software is furnished to do so, subject to the following conditions:
-
-The above copyright notice and this permission notice shall be included in all copies or substantial portions of the Software.
-
-THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
diff --git a/third_party/cersei-provider/src/anthropic.rs b/third_party/cersei-provider/src/anthropic.rs
deleted file mode 100644
index 98e7bdf..0000000
--- a/third_party/cersei-provider/src/anthropic.rs
+++ /dev/null
@@ -1,406 +0,0 @@
-//! Anthropic provider: Claude API client with streaming SSE support.
-
-use crate::*;
-use cersei_types::*;
-use futures::StreamExt;
-use tokio::sync::mpsc;
-
-const ANTHROPIC_API_BASE: &str = "https://api.anthropic.com";
-const ANTHROPIC_API_VERSION: &str = "2023-06-01";
-// MultiTool local patch (MULTI-1367): upstream cersei-provider 0.1.9 sends
-// `interleaved-thinking-2025-04-14` unconditionally, which the current Anthropic
-// API rejects with HTTP 400 ("Unexpected value(s) for the `anthropic-beta`
-// header"), breaking every request. Drop the stale interleaved-thinking beta and
-// keep only the still-accepted token-efficient-tools beta. See
-// third_party/cersei-provider/PATCH.md and the upstream issue
-// https://github.com/pacifio/cersei/issues/20. Remove once upstream ships a fix.
-const ANTHROPIC_BETA_HEADER: &str = "token-efficient-tools-2025-02-19";
-
-// ─── Anthropic provider ──────────────────────────────────────────────────────
-
-#[allow(dead_code)]
-pub struct Anthropic {
-    auth: Auth,
-    base_url: String,
-    default_model: String,
-    thinking_budget: Option<u32>,
-    max_retries: u32,
-    client: reqwest::Client,
-}
-
-impl Anthropic {
-    pub fn new(auth: Auth) -> Self {
-        let base_url = std::env::var("ANTHROPIC_BASE_URL")
-            .ok()
-            .filter(|u| !u.is_empty())
-            .unwrap_or_else(|| ANTHROPIC_API_BASE.to_string());
-        Self {
-            auth,
-            base_url,
-            default_model: "claude-sonnet-4-6".to_string(),
-            thinking_budget: None,
-            max_retries: 5,
-            client: reqwest::Client::new(),
-        }
-    }
-
-    /// Create from `ANTHROPIC_API_KEY` environment variable.
-    pub fn from_env() -> Result<Self> {
-        let key = std::env::var("ANTHROPIC_API_KEY")
-            .map_err(|_| CerseiError::Auth("ANTHROPIC_API_KEY not set".into()))?;
-        Ok(Self::new(Auth::ApiKey(key)))
-    }
-
-    pub fn builder() -> AnthropicBuilder {
-        AnthropicBuilder::default()
-    }
-
-    async fn auth_headers(&self) -> Result<Vec<(String, String)>> {
-        match &self.auth {
-            Auth::ApiKey(key) => Ok(vec![("x-api-key".into(), key.clone())]),
-            Auth::Bearer(token) => Ok(vec![("authorization".into(), format!("Bearer {}", token))]),
-            Auth::OAuth { token, .. } => Ok(vec![(
-                "authorization".into(),
-                format!("Bearer {}", token.access_token),
-            )]),
-            Auth::Custom(provider) => {
-                let (name, value) = provider.get_credentials().await?;
-                Ok(vec![(name, value)])
-            }
-        }
-    }
-}
-
-#[async_trait::async_trait]
-impl Provider for Anthropic {
-    fn name(&self) -> &str {
-        "anthropic"
-    }
-
-    fn context_window(&self, model: &str) -> u64 {
-        match model {
-            m if m.contains("opus") => 200_000,
-            m if m.contains("sonnet") => 200_000,
-            m if m.contains("haiku") => 200_000,
-            _ => 200_000,
-        }
-    }
-
-    fn capabilities(&self, _model: &str) -> ProviderCapabilities {
-        ProviderCapabilities {
-            streaming: true,
-            tool_use: true,
-            vision: true,
-            thinking: true,
-            system_prompt: true,
-            caching: true,
-        }
-    }
-
-    async fn complete(&self, request: CompletionRequest) -> Result<CompletionStream> {
-        let model = if request.model.is_empty() {
-            self.default_model.clone()
-        } else {
-            request.model.clone()
-        };
-
-        // Build API messages
-        let api_messages: Vec<serde_json::Value> = request
-            .messages
-            .iter()
-            .filter(|m| m.role != Role::System)
-            .map(|m| {
-                serde_json::json!({
-                    "role": m.role,
-                    "content": m.content,
-                })
-            })
-            .collect();
-
-        // Build request body
-        let mut body = serde_json::json!({
-            "model": model,
-            "max_tokens": request.max_tokens,
-            "messages": api_messages,
-            "stream": true,
-        });
-
-        if let Some(system) = &request.system {
-            body["system"] = serde_json::Value::String(system.clone());
-        }
-
-        if !request.tools.is_empty() {
-            let api_tools: Vec<serde_json::Value> = request
-                .tools
-                .iter()
-                .map(|t| {
-                    serde_json::json!({
-                        "name": t.name,
-                        "description": t.description,
-                        "input_schema": t.input_schema,
-                    })
-                })
-                .collect();
-            body["tools"] = serde_json::Value::Array(api_tools);
-        }
-
-        if let Some(temp) = request.temperature {
-            body["temperature"] = serde_json::json!(temp);
-        }
-
-        if !request.stop_sequences.is_empty() {
-            body["stop_sequences"] = serde_json::json!(request.stop_sequences);
-        }
-
-        // Thinking config
-        let thinking_budget = request
-            .options
-            .get::<u32>("thinking_budget")
-            .or(self.thinking_budget);
-        if let Some(budget) = thinking_budget {
-            body["thinking"] = serde_json::json!({
-                "type": "enabled",
-                "budget_tokens": budget,
-            });
-        }
-
-        // Build HTTP request
-        let url = format!("{}/v1/messages", self.base_url);
-        let mut req_builder = self
-            .client
-            .post(&url)
-            .header("anthropic-version", ANTHROPIC_API_VERSION)
-            .header("anthropic-beta", ANTHROPIC_BETA_HEADER)
-            .header("content-type", "application/json");
-
-        for (name, value) in self.auth_headers().await? {
-            req_builder = req_builder.header(&name, &value);
-        }
-
-        let (tx, rx) = mpsc::channel(256);
-
-        let request = req_builder.json(&body).build().map_err(CerseiError::Http)?;
-        let client = self.client.clone();
-
-        // Spawn SSE consumer
-        tokio::spawn(async move {
-            match client.execute(request).await {
-                Ok(response) => {
-                    if !response.status().is_success() {
-                        let status = response.status().as_u16();
-                        let body = response.text().await.unwrap_or_default();
-                        let _ = tx
-                            .send(StreamEvent::Error {
-                                message: format!("HTTP {}: {}", status, body),
-                            })
-                            .await;
-                        return;
-                    }
-
-                    let mut stream = response.bytes_stream();
-                    let mut buffer = String::new();
-
-                    while let Some(chunk) = stream.next().await {
-                        match chunk {
-                            Ok(bytes) => {
-                                buffer.push_str(&String::from_utf8_lossy(&bytes));
-                                // Process complete SSE events
-                                while let Some(pos) = buffer.find("\n\n") {
-                                    let event_str = buffer[..pos].to_string();
-                                    buffer = buffer[pos + 2..].to_string();
-
-                                    if let Some(event) = parse_sse_event(&event_str) {
-                                        if tx.send(event).await.is_err() {
-                                            return;
-                                        }
-                                    }
-                                }
-                            }
-                            Err(e) => {
-                                let _ = tx
-                                    .send(StreamEvent::Error {
-                                        message: e.to_string(),
-                                    })
-                                    .await;
-                                return;
-                            }
-                        }
-                    }
-                }
-                Err(e) => {
-                    let _ = tx
-                        .send(StreamEvent::Error {
-                            message: e.to_string(),
-                        })
-                        .await;
-                }
-            }
-        });
-
-        Ok(CompletionStream::new(rx))
-    }
-}
-
-// ─── SSE parser ──────────────────────────────────────────────────────────────
-
-fn parse_sse_event(raw: &str) -> Option<StreamEvent> {
-    let mut event_type = String::new();
-    let mut data = String::new();
-
-    for line in raw.lines() {
-        if let Some(rest) = line.strip_prefix("event: ") {
-            event_type = rest.trim().to_string();
-        } else if let Some(rest) = line.strip_prefix("data: ") {
-            data = rest.trim().to_string();
-        }
-    }
-
-    let json: serde_json::Value = serde_json::from_str(&data).ok()?;
-
-    match event_type.as_str() {
-        "message_start" => {
-            let msg = &json["message"];
-            Some(StreamEvent::MessageStart {
-                id: msg["id"].as_str().unwrap_or("").to_string(),
-                model: msg["model"].as_str().unwrap_or("").to_string(),
-            })
-        }
-        "content_block_start" => {
-            let index = json["index"].as_u64().unwrap_or(0) as usize;
-            let block_type = json["content_block"]["type"]
-                .as_str()
-                .unwrap_or("text")
-                .to_string();
-            Some(StreamEvent::ContentBlockStart {
-                index,
-                block_type,
-                id: json["content_block"]["id"].as_str().map(String::from),
-                name: json["content_block"]["name"].as_str().map(String::from),
-            })
-        }
-        "content_block_delta" => {
-            let index = json["index"].as_u64().unwrap_or(0) as usize;
-            let delta = &json["delta"];
-            let delta_type = delta["type"].as_str().unwrap_or("");
-            match delta_type {
-                "text_delta" => Some(StreamEvent::TextDelta {
-                    index,
-                    text: delta["text"].as_str().unwrap_or("").to_string(),
-                }),
-                "input_json_delta" => Some(StreamEvent::InputJsonDelta {
-                    index,
-                    partial_json: delta["partial_json"].as_str().unwrap_or("").to_string(),
-                }),
-                "thinking_delta" => Some(StreamEvent::ThinkingDelta {
-                    index,
-                    thinking: delta["thinking"].as_str().unwrap_or("").to_string(),
-                }),
-                _ => None,
-            }
-        }
-        "content_block_stop" => {
-            let index = json["index"].as_u64().unwrap_or(0) as usize;
-            Some(StreamEvent::ContentBlockStop { index })
-        }
-        "message_delta" => {
-            let stop_reason = json["delta"]["stop_reason"].as_str().and_then(|s| match s {
-                "end_turn" => Some(StopReason::EndTurn),
-                "max_tokens" => Some(StopReason::MaxTokens),
-                "tool_use" => Some(StopReason::ToolUse),
-                "stop_sequence" => Some(StopReason::StopSequence),
-                _ => None,
-            });
-            let usage = if let Some(u) = json["usage"].as_object() {
-                Some(Usage {
-                    input_tokens: u.get("input_tokens").and_then(|v| v.as_u64()).unwrap_or(0),
-                    output_tokens: u.get("output_tokens").and_then(|v| v.as_u64()).unwrap_or(0),
-                    ..Default::default()
-                })
-            } else {
-                None
-            };
-            Some(StreamEvent::MessageDelta { stop_reason, usage })
-        }
-        "message_stop" => Some(StreamEvent::MessageStop),
-        "ping" => Some(StreamEvent::Ping),
-        "error" => Some(StreamEvent::Error {
-            message: json["error"]["message"]
-                .as_str()
-                .unwrap_or("Unknown error")
-                .to_string(),
-        }),
-        _ => None,
-    }
-}
-
-// ─── Builder ─────────────────────────────────────────────────────────────────
-
-#[derive(Default)]
-pub struct AnthropicBuilder {
-    api_key: Option<String>,
-    base_url: Option<String>,
-    model: Option<String>,
-    thinking_budget: Option<u32>,
-    oauth_token: Option<OAuthToken>,
-    max_retries: Option<u32>,
-}
-
-impl AnthropicBuilder {
-    pub fn api_key(mut self, key: impl Into<String>) -> Self {
-        self.api_key = Some(key.into());
-        self
-    }
-
-    pub fn base_url(mut self, url: impl Into<String>) -> Self {
-        self.base_url = Some(url.into());
-        self
-    }
-
-    pub fn model(mut self, model: impl Into<String>) -> Self {
-        self.model = Some(model.into());
-        self
-    }
-
-    pub fn thinking(mut self, budget_tokens: u32) -> Self {
-        self.thinking_budget = Some(budget_tokens);
-        self
-    }
-
-    pub fn oauth(mut self, token: OAuthToken) -> Self {
-        self.oauth_token = Some(token);
-        self
-    }
-
-    pub fn max_retries(mut self, n: u32) -> Self {
-        self.max_retries = Some(n);
-        self
-    }
-
-    pub fn build(self) -> Result<Anthropic> {
-        let auth = if let Some(token) = self.oauth_token {
-            Auth::OAuth {
-                client_id: String::new(),
-                token,
-            }
-        } else if let Some(key) = self.api_key {
-            Auth::ApiKey(key)
-        } else {
-            return Err(CerseiError::Auth(
-                "No API key or OAuth token provided. Set ANTHROPIC_API_KEY or use .oauth()".into(),
-            ));
-        };
-
-        Ok(Anthropic {
-            auth,
-            base_url: self
-                .base_url
-                .unwrap_or_else(|| ANTHROPIC_API_BASE.to_string()),
-            default_model: self
-                .model
-                .unwrap_or_else(|| "claude-sonnet-4-6".to_string()),
-            thinking_budget: self.thinking_budget,
-            max_retries: self.max_retries.unwrap_or(5),
-            client: reqwest::Client::new(),
-        })
-    }
-}
diff --git a/third_party/cersei-provider/src/gemini.rs b/third_party/cersei-provider/src/gemini.rs
deleted file mode 100644
index 2a7e22e..0000000
--- a/third_party/cersei-provider/src/gemini.rs
+++ /dev/null
@@ -1,556 +0,0 @@
-//! Google Gemini provider: native Gemini API client with streaming support.
-//!
-//! Uses Google's `generateContent` API directly rather than the OpenAI-compatible
-//! shim, enabling access to native Gemini features like safety settings,
-//! grounding, and proper multimodal support.
-
-use crate::*;
-use cersei_types::*;
-use futures::StreamExt;
-use tokio::sync::mpsc;
-
-const GEMINI_API_BASE: &str = "https://generativelanguage.googleapis.com/v1beta";
-
-// ─── Gemini provider ────────────────────────────────────────────────────────
-
-pub struct Gemini {
-    api_key: String,
-    base_url: String,
-    default_model: String,
-    client: reqwest::Client,
-}
-
-impl Gemini {
-    pub fn new(api_key: impl Into<String>) -> Self {
-        let base_url = std::env::var("GEMINI_BASE_URL")
-            .ok()
-            .filter(|u| !u.is_empty())
-            .unwrap_or_else(|| GEMINI_API_BASE.to_string());
-        Self {
-            api_key: api_key.into(),
-            base_url,
-            default_model: "gemini-3.1-pro-preview".to_string(),
-            client: reqwest::Client::new(),
-        }
-    }
-
-    /// Create from `GOOGLE_API_KEY` or `GEMINI_API_KEY` environment variable.
-    pub fn from_env() -> Result<Self> {
-        let key = std::env::var("GOOGLE_API_KEY")
-            .or_else(|_| std::env::var("GEMINI_API_KEY"))
-            .map_err(|_| CerseiError::Auth("GOOGLE_API_KEY or GEMINI_API_KEY not set".into()))?;
-        Ok(Self::new(key))
-    }
-
-    pub fn builder() -> GeminiBuilder {
-        GeminiBuilder::default()
-    }
-}
-
-#[async_trait::async_trait]
-impl Provider for Gemini {
-    fn name(&self) -> &str {
-        "google"
-    }
-
-    fn context_window(&self, model: &str) -> u64 {
-        match model {
-            m if m.contains("gemini-3.1") => 2_000_000,
-            m if m.contains("gemini-3.0") => 1_000_000,
-            m if m.contains("gemini-2.0") => 1_000_000,
-            m if m.contains("gemini-1.5-pro") => 2_000_000,
-            m if m.contains("gemini-1.5-flash") => 1_000_000,
-            _ => 1_000_000,
-        }
-    }
-
-    fn capabilities(&self, _model: &str) -> ProviderCapabilities {
-        ProviderCapabilities {
-            streaming: true,
-            tool_use: true,
-            vision: true,
-            thinking: false,
-            system_prompt: true,
-            caching: false,
-        }
-    }
-
-    async fn complete(&self, request: CompletionRequest) -> Result<CompletionStream> {
-        let model = if request.model.is_empty() {
-            self.default_model.clone()
-        } else {
-            request.model.clone()
-        };
-
-        // Build a map of tool_use_id → tool_name from conversation history
-        let tool_name_map: std::collections::HashMap<String, String> = request
-            .messages
-            .iter()
-            .flat_map(|m| match &m.content {
-                MessageContent::Blocks(blocks) => blocks
-                    .iter()
-                    .filter_map(|b| {
-                        if let ContentBlock::ToolUse { id, name, .. } = b {
-                            Some((id.clone(), name.clone()))
-                        } else {
-                            None
-                        }
-                    })
-                    .collect::<Vec<_>>(),
-                _ => vec![],
-            })
-            .collect();
-
-        // Build Gemini-native contents array
-        let mut contents: Vec<serde_json::Value> = Vec::new();
-
-        for msg in &request.messages {
-            match msg.role {
-                Role::User => {
-                    let mut parts: Vec<serde_json::Value> = Vec::new();
-
-                    if let MessageContent::Blocks(blocks) = &msg.content {
-                        for block in blocks {
-                            match block {
-                                ContentBlock::Text { text } => {
-                                    parts.push(serde_json::json!({ "text": text }));
-                                }
-                                ContentBlock::ToolResult {
-                                    tool_use_id,
-                                    content,
-                                    ..
-                                } => {
-                                    // Gemini requires the function NAME, not the call ID
-                                    let func_name = tool_name_map
-                                        .get(tool_use_id)
-                                        .cloned()
-                                        .unwrap_or_else(|| tool_use_id.clone());
-                                    let content_str = match content {
-                                        ToolResultContent::Text(s) => s.clone(),
-                                        ToolResultContent::Blocks(blocks) => blocks
-                                            .iter()
-                                            .filter_map(|b| {
-                                                if let ContentBlock::Text { text } = b {
-                                                    Some(text.as_str())
-                                                } else {
-                                                    None
-                                                }
-                                            })
-                                            .collect::<Vec<_>>()
-                                            .join("\n"),
-                                    };
-                                    parts.push(serde_json::json!({
-                                        "functionResponse": {
-                                            "name": func_name,
-                                            "response": { "content": content_str },
-                                        }
-                                    }));
-                                }
-                                _ => {}
-                            }
-                        }
-                    } else {
-                        parts.push(serde_json::json!({ "text": msg.get_all_text() }));
-                    }
-
-                    if !parts.is_empty() {
-                        contents.push(serde_json::json!({
-                            "role": "user",
-                            "parts": parts,
-                        }));
-                    }
-                }
-                Role::Assistant => {
-                    let mut parts: Vec<serde_json::Value> = Vec::new();
-
-                    if let MessageContent::Blocks(blocks) = &msg.content {
-                        for block in blocks {
-                            match block {
-                                ContentBlock::Text { text } => {
-                                    parts.push(serde_json::json!({ "text": text }));
-                                }
-                                ContentBlock::ToolUse { id, name, input } => {
-                                    // Extract fc_id and thoughtSignature from encoded tool_id
-                                    // Format: "gemini-tool-N::fc_id::thoughtSignature" or "gemini-tool-N"
-                                    let segments: Vec<&str> = id.splitn(3, "::").collect();
-                                    let mut fc = serde_json::json!({
-                                        "name": name,
-                                        "args": input,
-                                    });
-                                    let mut part_obj = serde_json::Map::new();
-                                    if segments.len() >= 3 {
-                                        // Has fc_id and thoughtSignature
-                                        fc["id"] =
-                                            serde_json::Value::String(segments[1].to_string());
-                                        part_obj.insert("functionCall".to_string(), fc);
-                                        part_obj.insert(
-                                            "thoughtSignature".to_string(),
-                                            serde_json::Value::String(segments[2].to_string()),
-                                        );
-                                    } else {
-                                        part_obj.insert("functionCall".to_string(), fc);
-                                    }
-                                    parts.push(serde_json::Value::Object(part_obj));
-                                }
-                                _ => {}
-                            }
-                        }
-                    } else {
-                        parts.push(serde_json::json!({ "text": msg.get_all_text() }));
-                    }
-
-                    if !parts.is_empty() {
-                        contents.push(serde_json::json!({
-                            "role": "model",
-                            "parts": parts,
-                        }));
-                    }
-                }
-                Role::System => {
-                    // System messages handled separately via systemInstruction
-                }
-            }
-        }
-
-        // Build request body
-        let mut body = serde_json::json!({
-            "contents": contents,
-            "generationConfig": {
-                "maxOutputTokens": request.max_tokens,
-            },
-        });
-
-        // System instruction (Gemini's equivalent of system prompt)
-        if let Some(system) = &request.system {
-            body["systemInstruction"] = serde_json::json!({
-                "parts": [{ "text": system }],
-            });
-        }
-
-        if let Some(temp) = request.temperature {
-            body["generationConfig"]["temperature"] = serde_json::json!(temp);
-        }
-
-        if !request.stop_sequences.is_empty() {
-            body["generationConfig"]["stopSequences"] = serde_json::json!(request.stop_sequences);
-        }
-
-        // Tool declarations
-        if !request.tools.is_empty() {
-            let function_declarations: Vec<serde_json::Value> = request
-                .tools
-                .iter()
-                .map(|t| {
-                    serde_json::json!({
-                        "name": t.name,
-                        "description": t.description,
-                        "parameters": t.input_schema,
-                    })
-                })
-                .collect();
-            body["tools"] = serde_json::json!([{
-                "functionDeclarations": function_declarations,
-            }]);
-        }
-
-        // Safety settings: use least restrictive defaults to avoid unexpected blocks
-        body["safetySettings"] = serde_json::json!([
-            { "category": "HARM_CATEGORY_HARASSMENT", "threshold": "BLOCK_ONLY_HIGH" },
-            { "category": "HARM_CATEGORY_HATE_SPEECH", "threshold": "BLOCK_ONLY_HIGH" },
-            { "category": "HARM_CATEGORY_SEXUALLY_EXPLICIT", "threshold": "BLOCK_ONLY_HIGH" },
-            { "category": "HARM_CATEGORY_DANGEROUS_CONTENT", "threshold": "BLOCK_ONLY_HIGH" },
-        ]);
-
-        // SECURITY: never put the API key in the URL. Use the
-        // `x-goog-api-key` header so that reqwest's error `Display` (which
-        // prints the URL) cannot leak the secret into logs or error-wrapped
-        // output.
-        let url = format!(
-            "{}/models/{}:streamGenerateContent?alt=sse",
-            self.base_url, model
-        );
-
-        let (tx, rx) = mpsc::channel(256);
-
-        let req = self
-            .client
-            .post(&url)
-            .header("x-goog-api-key", &self.api_key)
-            .header("content-type", "application/json")
-            .json(&body)
-            .build()
-            .map_err(CerseiError::Http)?;
-
-        let client = self.client.clone();
-
-        tokio::spawn(async move {
-            match client.execute(req).await {
-                Ok(response) => {
-                    if !response.status().is_success() {
-                        let status = response.status().as_u16();
-                        let body = response.text().await.unwrap_or_default();
-                        let _ = tx
-                            .send(StreamEvent::Error {
-                                message: format!("HTTP {}: {}", status, body),
-                            })
-                            .await;
-                        return;
-                    }
-
-                    let _ = tx
-                        .send(StreamEvent::MessageStart {
-                            id: String::new(),
-                            model: String::new(),
-                        })
-                        .await;
-
-                    let mut stream = response.bytes_stream();
-                    let mut buffer = String::new();
-                    let mut block_index: usize = 0;
-                    let mut total_input_tokens: u64 = 0;
-                    let mut total_output_tokens: u64 = 0;
-                    let mut saw_function_calls = false;
-
-                    while let Some(chunk) = stream.next().await {
-                        match chunk {
-                            Ok(bytes) => {
-                                buffer.push_str(&String::from_utf8_lossy(&bytes));
-
-                                while let Some(pos) = buffer.find("\n") {
-                                    let line = buffer[..pos].to_string();
-                                    buffer = buffer[pos + 1..].to_string();
-
-                                    if let Some(data) = line.strip_prefix("data: ") {
-                                        let data = data.trim();
-                                        if data.is_empty() {
-                                            continue;
-                                        }
-
-                                        if let Ok(json) =
-                                            serde_json::from_str::<serde_json::Value>(data)
-                                        {
-                                            // Extract usage metadata
-                                            if let Some(metadata) = json.get("usageMetadata") {
-                                                total_input_tokens = metadata
-                                                    .get("promptTokenCount")
-                                                    .and_then(|v| v.as_u64())
-                                                    .unwrap_or(total_input_tokens);
-                                                total_output_tokens = metadata
-                                                    .get("candidatesTokenCount")
-                                                    .and_then(|v| v.as_u64())
-                                                    .unwrap_or(total_output_tokens);
-                                            }
-
-                                            // Process candidates
-                                            if let Some(candidates) =
-                                                json.get("candidates").and_then(|c| c.as_array())
-                                            {
-                                                for candidate in candidates {
-                                                    if let Some(parts) = candidate
-                                                        .get("content")
-                                                        .and_then(|c| c.get("parts"))
-                                                        .and_then(|p| p.as_array())
-                                                    {
-                                                        for part in parts {
-                                                            if let Some(text) = part
-                                                                .get("text")
-                                                                .and_then(|t| t.as_str())
-                                                            {
-                                                                let _ = tx
-                                                                    .send(StreamEvent::ContentBlockStart {
-                                                                        index: block_index,
-                                                                        block_type: "text".into(),
-                                                                        id: None,
-                                                                        name: None,
-                                                                    })
-                                                                    .await;
-                                                                let _ = tx
-                                                                    .send(StreamEvent::TextDelta {
-                                                                        index: block_index,
-                                                                        text: text.to_string(),
-                                                                    })
-                                                                    .await;
-                                                                let _ = tx
-                                                                    .send(StreamEvent::ContentBlockStop {
-                                                                        index: block_index,
-                                                                    })
-                                                                    .await;
-                                                                block_index += 1;
-                                                            }
-
-                                                            if let Some(fc) =
-                                                                part.get("functionCall")
-                                                            {
-                                                                saw_function_calls = true;
-                                                                let name = fc
-                                                                    .get("name")
-                                                                    .and_then(|n| n.as_str())
-                                                                    .unwrap_or("")
-                                                                    .to_string();
-                                                                let args = fc
-                                                                    .get("args")
-                                                                    .cloned()
-                                                                    .unwrap_or(
-                                                                        serde_json::Value::Object(
-                                                                            Default::default(),
-                                                                        ),
-                                                                    );
-                                                                // Capture thoughtSignature (sibling of functionCall at part level, Gemini 3.1+)
-                                                                let thought_sig = part
-                                                                    .get("thoughtSignature")
-                                                                    .and_then(|s| s.as_str())
-                                                                    .unwrap_or("");
-                                                                // Capture functionCall.id if present
-                                                                let fc_id = fc
-                                                                    .get("id")
-                                                                    .and_then(|s| s.as_str())
-                                                                    .unwrap_or("");
-                                                                // Encode both in tool_id for roundtrip
-                                                                let tool_id =
-                                                                    if thought_sig.is_empty() {
-                                                                        format!(
-                                                                            "gemini-tool-{}",
-                                                                            block_index
-                                                                        )
-                                                                    } else {
-                                                                        format!(
-                                                                        "gemini-tool-{}::{}::{}",
-                                                                        block_index,
-                                                                        fc_id,
-                                                                        thought_sig
-                                                                    )
-                                                                    };
-
-                                                                let _ = tx
-                                                                    .send(StreamEvent::ContentBlockStart {
-                                                                        index: block_index,
-                                                                        block_type: "tool_use".into(),
-                                                                        id: Some(tool_id),
-                                                                        name: Some(name),
-                                                                    })
-                                                                    .await;
-                                                                let _ = tx
-                                                                    .send(StreamEvent::InputJsonDelta {
-                                                                        index: block_index,
-                                                                        partial_json: serde_json::to_string(&args)
-                                                                            .unwrap_or_default(),
-                                                                    })
-                                                                    .await;
-                                                                let _ = tx
-                                                                    .send(StreamEvent::ContentBlockStop {
-                                                                        index: block_index,
-                                                                    })
-                                                                    .await;
-                                                                block_index += 1;
-                                                            }
-                                                        }
-                                                    }
-
-                                                    // Check finish reason
-                                                    let finish_reason = candidate
-                                                        .get("finishReason")
-                                                        .and_then(|r| r.as_str());
-                                                    if let Some(reason) = finish_reason {
-                                                        let stop = if saw_function_calls {
-                                                            StopReason::ToolUse
-                                                        } else {
-                                                            match reason {
-                                                                "STOP" => StopReason::EndTurn,
-                                                                "MAX_TOKENS" => {
-                                                                    StopReason::MaxTokens
-                                                                }
-                                                                "SAFETY" => StopReason::EndTurn,
-                                                                _ => StopReason::EndTurn,
-                                                            }
-                                                        };
-                                                        let _ = tx
-                                                            .send(StreamEvent::MessageDelta {
-                                                                stop_reason: Some(stop),
-                                                                usage: Some(Usage {
-                                                                    input_tokens:
-                                                                        total_input_tokens,
-                                                                    output_tokens:
-                                                                        total_output_tokens,
-                                                                    ..Default::default()
-                                                                }),
-                                                            })
-                                                            .await;
-                                                    }
-                                                }
-                                            }
-                                        }
-                                    }
-                                }
-                            }
-                            Err(e) => {
-                                let _ = tx
-                                    .send(StreamEvent::Error {
-                                        message: e.to_string(),
-                                    })
-                                    .await;
-                                return;
-                            }
-                        }
-                    }
-
-                    let _ = tx.send(StreamEvent::MessageStop).await;
-                }
-                Err(e) => {
-                    let _ = tx
-                        .send(StreamEvent::Error {
-                            message: e.to_string(),
-                        })
-                        .await;
-                }
-            }
-        });
-
-        Ok(CompletionStream::new(rx))
-    }
-}
-
-// ─── Builder ─────────────────────────────────────────────────────────────────
-
-#[derive(Default)]
-pub struct GeminiBuilder {
-    api_key: Option<String>,
-    base_url: Option<String>,
-    model: Option<String>,
-}
-
-impl GeminiBuilder {
-    pub fn api_key(mut self, key: impl Into<String>) -> Self {
-        self.api_key = Some(key.into());
-        self
-    }
-
-    pub fn base_url(mut self, url: impl Into<String>) -> Self {
-        self.base_url = Some(url.into());
-        self
-    }
-
-    pub fn model(mut self, model: impl Into<String>) -> Self {
-        self.model = Some(model.into());
-        self
-    }
-
-    pub fn build(self) -> Result<Gemini> {
-        let api_key = if let Some(key) = self.api_key {
-            key
-        } else {
-            return Err(CerseiError::Auth(
-                "No API key provided. Set GOOGLE_API_KEY or GEMINI_API_KEY or use .api_key()"
-                    .into(),
-            ));
-        };
-
-        Ok(Gemini {
-            api_key,
-            base_url: self.base_url.unwrap_or_else(|| GEMINI_API_BASE.to_string()),
-            default_model: self
-                .model
-                .unwrap_or_else(|| "gemini-3.1-pro-preview".to_string()),
-            client: reqwest::Client::new(),
-        })
-    }
-}
diff --git a/third_party/cersei-provider/src/lib.rs b/third_party/cersei-provider/src/lib.rs
deleted file mode 100644
index 1efba1e..0000000
--- a/third_party/cersei-provider/src/lib.rs
+++ /dev/null
@@ -1,220 +0,0 @@
-//! cersei-provider: Provider trait and built-in LLM providers.
-//!
-//! Providers abstract over different LLM backends (Anthropic, OpenAI, local models).
-//! Each provider implements streaming completion, token counting, and capability discovery.
-
-pub mod anthropic;
-pub mod gemini;
-pub mod openai;
-pub mod registry;
-pub mod router;
-mod stream;
-
-use async_trait::async_trait;
-use cersei_types::*;
-use serde::{Deserialize, Serialize};
-use std::collections::HashMap;
-use tokio::sync::mpsc;
-
-// Re-exports
-pub use anthropic::Anthropic;
-pub use gemini::Gemini;
-pub use openai::OpenAi;
-pub use router::from_model_string;
-pub use stream::StreamAccumulator;
-
-// ─── Provider trait ──────────────────────────────────────────────────────────
-
-#[async_trait]
-pub trait Provider: Send + Sync {
-    /// Human-readable provider name (e.g., "anthropic", "openai").
-    fn name(&self) -> &str;
-
-    /// Context window size for the given model.
-    fn context_window(&self, model: &str) -> u64;
-
-    /// Capabilities supported by the given model.
-    fn capabilities(&self, model: &str) -> ProviderCapabilities;
-
-    /// Send a streaming completion request.
-    async fn complete(&self, request: CompletionRequest) -> Result<CompletionStream>;
-
-    /// Send a blocking (non-streaming) completion request.
-    async fn complete_blocking(&self, request: CompletionRequest) -> Result<CompletionResponse> {
-        self.complete(request).await?.collect().await
-    }
-
-    /// Count tokens for a message list. Returns an estimate if exact counting is unavailable.
-    async fn count_tokens(&self, messages: &[Message], _model: &str) -> Result<u64> {
-        // Default: rough estimate based on character count
-        let chars: usize = messages.iter().map(|m| m.get_all_text().len()).sum();
-        Ok((chars as u64) / 4) // ~4 chars per token
-    }
-}
-
-// Blanket impl: Box<dyn Provider> is itself a Provider.
-#[async_trait]
-impl Provider for Box<dyn Provider> {
-    fn name(&self) -> &str {
-        (**self).name()
-    }
-    fn context_window(&self, model: &str) -> u64 {
-        (**self).context_window(model)
-    }
-    fn capabilities(&self, model: &str) -> ProviderCapabilities {
-        (**self).capabilities(model)
-    }
-    async fn complete(&self, request: CompletionRequest) -> Result<CompletionStream> {
-        (**self).complete(request).await
-    }
-    async fn complete_blocking(&self, request: CompletionRequest) -> Result<CompletionResponse> {
-        (**self).complete_blocking(request).await
-    }
-    async fn count_tokens(&self, messages: &[Message], model: &str) -> Result<u64> {
-        (**self).count_tokens(messages, model).await
-    }
-}
-
-// ─── Authentication ──────────────────────────────────────────────────────────
-
-#[derive(Debug, Clone)]
-pub enum Auth {
-    /// API key sent as `x-api-key` header (Anthropic Console) or `Authorization: Bearer` (OpenAI).
-    ApiKey(String),
-    /// Bearer token sent as `Authorization: Bearer <token>`.
-    Bearer(String),
-    /// OAuth flow with client ID and token.
-    OAuth {
-        client_id: String,
-        token: OAuthToken,
-    },
-    /// Custom auth provider for non-standard flows.
-    Custom(std::sync::Arc<dyn AuthProvider>),
-}
-
-#[derive(Debug, Clone, Serialize, Deserialize)]
-pub struct OAuthToken {
-    pub access_token: String,
-    pub refresh_token: Option<String>,
-    pub expires_at_ms: Option<i64>,
-    pub scopes: Vec<String>,
-}
-
-impl OAuthToken {
-    pub fn is_expired(&self) -> bool {
-        if let Some(exp) = self.expires_at_ms {
-            chrono::Utc::now().timestamp_millis() >= exp
-        } else {
-            false
-        }
-    }
-}
-
-#[async_trait]
-pub trait AuthProvider: Send + Sync + std::fmt::Debug {
-    /// Returns (header_name, header_value) for the request.
-    async fn get_credentials(&self) -> Result<(String, String)>;
-
-    /// Refresh credentials if they have expired.
-    async fn refresh(&self) -> Result<()>;
-}
-
-// ─── Completion request/response ─────────────────────────────────────────────
-
-#[derive(Debug, Clone)]
-pub struct CompletionRequest {
-    pub model: String,
-    pub messages: Vec<Message>,
-    pub system: Option<String>,
-    pub tools: Vec<ToolDefinition>,
-    pub max_tokens: u32,
-    pub temperature: Option<f32>,
-    pub stop_sequences: Vec<String>,
-    /// Provider-specific options (thinking budget, top_p, etc.)
-    pub options: ProviderOptions,
-}
-
-impl CompletionRequest {
-    pub fn new(model: impl Into<String>) -> Self {
-        Self {
-            model: model.into(),
-            messages: Vec::new(),
-            system: None,
-            tools: Vec::new(),
-            max_tokens: 16384,
-            temperature: None,
-            stop_sequences: Vec::new(),
-            options: ProviderOptions::default(),
-        }
-    }
-}
-
-#[derive(Debug, Clone, Default)]
-pub struct ProviderOptions {
-    entries: HashMap<String, serde_json::Value>,
-}
-
-impl ProviderOptions {
-    pub fn set(&mut self, key: impl Into<String>, value: impl Serialize) {
-        if let Ok(v) = serde_json::to_value(value) {
-            self.entries.insert(key.into(), v);
-        }
-    }
-
-    pub fn get<T: for<'de> Deserialize<'de>>(&self, key: &str) -> Option<T> {
-        self.entries
-            .get(key)
-            .and_then(|v| serde_json::from_value(v.clone()).ok())
-    }
-
-    pub fn has(&self, key: &str) -> bool {
-        self.entries.contains_key(key)
-    }
-}
-
-#[derive(Debug, Clone)]
-pub struct CompletionResponse {
-    pub message: Message,
-    pub usage: Usage,
-    pub stop_reason: StopReason,
-}
-
-#[derive(Debug, Clone, Default)]
-pub struct ProviderCapabilities {
-    pub streaming: bool,
-    pub tool_use: bool,
-    pub vision: bool,
-    pub thinking: bool,
-    pub system_prompt: bool,
-    pub caching: bool,
-}
-
-// ─── Completion stream ───────────────────────────────────────────────────────
-
-/// A streaming response from a provider. Wraps a channel of StreamEvents.
-pub struct CompletionStream {
-    rx: mpsc::Receiver<StreamEvent>,
-}
-
-impl CompletionStream {
-    pub fn new(rx: mpsc::Receiver<StreamEvent>) -> Self {
-        Self { rx }
-    }
-
-    /// Consume the stream and collect into a complete response.
-    pub async fn collect(mut self) -> Result<CompletionResponse> {
-        let mut acc = StreamAccumulator::new();
-        while let Some(event) = self.rx.recv().await {
-            if let StreamEvent::Error { message } = &event {
-                return Err(CerseiError::Provider(message.clone()));
-            }
-            acc.process_event(event);
-        }
-        acc.into_response()
-    }
-
-    /// Access the underlying receiver for real-time event processing.
-    pub fn into_receiver(self) -> mpsc::Receiver<StreamEvent> {
-        self.rx
-    }
-}
diff --git a/third_party/cersei-provider/src/openai.rs b/third_party/cersei-provider/src/openai.rs
deleted file mode 100644
index 2ae6272..0000000
--- a/third_party/cersei-provider/src/openai.rs
+++ /dev/null
@@ -1,521 +0,0 @@
-//! OpenAI-compatible provider (works with OpenAI, Azure, Ollama, etc.)
-
-use crate::*;
-use cersei_types::*;
-use futures::StreamExt;
-use tokio::sync::mpsc;
-
-const OPENAI_API_BASE: &str = "https://api.openai.com/v1";
-
-pub struct OpenAi {
-    auth: Auth,
-    base_url: String,
-    default_model: String,
-    client: reqwest::Client,
-}
-
-impl OpenAi {
-    pub fn new(auth: Auth) -> Self {
-        let base_url = std::env::var("OPENAI_BASE_URL")
-            .ok()
-            .filter(|u| !u.is_empty())
-            .unwrap_or_else(|| OPENAI_API_BASE.to_string());
-        Self {
-            auth,
-            base_url,
-            default_model: "gpt-4o".to_string(),
-            client: reqwest::Client::new(),
-        }
-    }
-
-    pub fn from_env() -> Result<Self> {
-        let key = std::env::var("OPENAI_API_KEY")
-            .map_err(|_| CerseiError::Auth("OPENAI_API_KEY not set".into()))?;
-        Ok(Self::new(Auth::ApiKey(key)))
-    }
-
-    pub fn builder() -> OpenAiBuilder {
-        OpenAiBuilder::default()
-    }
-}
-
-#[async_trait::async_trait]
-impl Provider for OpenAi {
-    fn name(&self) -> &str {
-        "openai"
-    }
-
-    fn context_window(&self, model: &str) -> u64 {
-        match model {
-            m if m.contains("gpt-5") => 1_000_000,
-            m if m.starts_with("o1") || m.starts_with("o3") => 200_000,
-            m if m.contains("gpt-4o") => 128_000,
-            m if m.contains("gpt-4-turbo") => 128_000,
-            m if m.contains("gpt-4") => 8_192,
-            m if m.contains("gpt-3.5") => 16_385,
-            _ => 128_000,
-        }
-    }
-
-    fn capabilities(&self, _model: &str) -> ProviderCapabilities {
-        ProviderCapabilities {
-            streaming: true,
-            tool_use: true,
-            vision: true,
-            thinking: false,
-            system_prompt: true,
-            caching: false,
-        }
-    }
-
-    async fn complete(&self, request: CompletionRequest) -> Result<CompletionStream> {
-        let model = if request.model.is_empty() {
-            self.default_model.clone()
-        } else {
-            request.model.clone()
-        };
-
-        // Build OpenAI-format messages
-        let mut api_messages: Vec<serde_json::Value> = Vec::new();
-
-        if let Some(system) = &request.system {
-            api_messages.push(serde_json::json!({
-                "role": "system",
-                "content": system,
-            }));
-        }
-
-        for msg in &request.messages {
-            match msg.role {
-                Role::User => {
-                    // Check if this is a tool result message
-                    if let MessageContent::Blocks(blocks) = &msg.content {
-                        for block in blocks {
-                            if let ContentBlock::ToolResult {
-                                tool_use_id,
-                                content,
-                                is_error,
-                            } = block
-                            {
-                                api_messages.push(serde_json::json!({
-                                    "role": "tool",
-                                    "tool_call_id": tool_use_id,
-                                    "content": content,
-                                }));
-                            }
-                        }
-                        // Also include any text blocks as a user message
-                        let text: String = blocks
-                            .iter()
-                            .filter_map(|b| {
-                                if let ContentBlock::Text { text } = b {
-                                    Some(text.as_str())
-                                } else {
-                                    None
-                                }
-                            })
-                            .collect::<Vec<_>>()
-                            .join("\n");
-                        if !text.is_empty() {
-                            api_messages.push(serde_json::json!({
-                                "role": "user",
-                                "content": text,
-                            }));
-                        }
-                    } else {
-                        api_messages.push(serde_json::json!({
-                            "role": "user",
-                            "content": msg.get_all_text(),
-                        }));
-                    }
-                }
-                Role::Assistant => {
-                    // Check for tool_use blocks — serialize as tool_calls
-                    if let MessageContent::Blocks(blocks) = &msg.content {
-                        let tool_uses: Vec<&ContentBlock> = blocks
-                            .iter()
-                            .filter(|b| matches!(b, ContentBlock::ToolUse { .. }))
-                            .collect();
-                        if !tool_uses.is_empty() {
-                            let tool_calls: Vec<serde_json::Value> = tool_uses
-                                .iter()
-                                .map(|b| {
-                                    if let ContentBlock::ToolUse { id, name, input } = b {
-                                        serde_json::json!({
-                                            "id": id,
-                                            "type": "function",
-                                            "function": {
-                                                "name": name,
-                                                "arguments": input.to_string(),
-                                            }
-                                        })
-                                    } else {
-                                        serde_json::json!({})
-                                    }
-                                })
-                                .collect();
-
-                            let text_content: String = blocks
-                                .iter()
-                                .filter_map(|b| {
-                                    if let ContentBlock::Text { text } = b {
-                                        Some(text.as_str())
-                                    } else {
-                                        None
-                                    }
-                                })
-                                .collect::<Vec<_>>()
-                                .join("");
-
-                            let mut asst_msg = serde_json::json!({
-                                "role": "assistant",
-                                "tool_calls": tool_calls,
-                            });
-                            if !text_content.is_empty() {
-                                asst_msg["content"] = serde_json::json!(text_content);
-                            }
-                            api_messages.push(asst_msg);
-                        } else {
-                            api_messages.push(serde_json::json!({
-                                "role": "assistant",
-                                "content": msg.get_all_text(),
-                            }));
-                        }
-                    } else {
-                        api_messages.push(serde_json::json!({
-                            "role": "assistant",
-                            "content": msg.get_all_text(),
-                        }));
-                    }
-                }
-                Role::System => {
-                    api_messages.push(serde_json::json!({
-                        "role": "system",
-                        "content": msg.get_all_text(),
-                    }));
-                }
-            }
-        }
-
-        // GPT-5+ and o-series use max_completion_tokens; older models use max_tokens
-        let use_new_param =
-            model.starts_with("gpt-5") || model.starts_with("o1") || model.starts_with("o3");
-
-        let mut body = if use_new_param {
-            serde_json::json!({
-                "model": model,
-                "messages": api_messages,
-                "max_completion_tokens": request.max_tokens,
-                "stream": true,
-                "stream_options": { "include_usage": true },
-            })
-        } else {
-            serde_json::json!({
-                "model": model,
-                "messages": api_messages,
-                "max_tokens": request.max_tokens,
-                "stream": true,
-                "stream_options": { "include_usage": true },
-            })
-        };
-
-        if let Some(temp) = request.temperature {
-            body["temperature"] = serde_json::json!(temp);
-        }
-
-        if !request.tools.is_empty() {
-            let tools: Vec<serde_json::Value> = request
-                .tools
-                .iter()
-                .map(|t| {
-                    serde_json::json!({
-                        "type": "function",
-                        "function": {
-                            "name": t.name,
-                            "description": t.description,
-                            "parameters": t.input_schema,
-                        }
-                    })
-                })
-                .collect();
-            body["tools"] = serde_json::Value::Array(tools);
-        }
-
-        let url = format!("{}/chat/completions", self.base_url);
-        let auth_header = match &self.auth {
-            Auth::ApiKey(key) | Auth::Bearer(key) => format!("Bearer {}", key),
-            Auth::OAuth { token, .. } => format!("Bearer {}", token.access_token),
-            Auth::Custom(_) => String::new(),
-        };
-
-        let (tx, rx) = mpsc::channel(256);
-
-        let req = self
-            .client
-            .post(&url)
-            .header("authorization", &auth_header)
-            .header("content-type", "application/json")
-            .json(&body)
-            .build()
-            .map_err(CerseiError::Http)?;
-
-        let client = self.client.clone();
-
-        tokio::spawn(async move {
-            match client.execute(req).await {
-                Ok(response) => {
-                    if !response.status().is_success() {
-                        let status = response.status().as_u16();
-                        let body = response.text().await.unwrap_or_default();
-                        let _ = tx
-                            .send(StreamEvent::Error {
-                                message: format!("HTTP {}: {}", status, body),
-                            })
-                            .await;
-                        return;
-                    }
-
-                    let _ = tx
-                        .send(StreamEvent::MessageStart {
-                            id: String::new(),
-                            model: String::new(),
-                        })
-                        .await;
-                    let mut stream = response.bytes_stream();
-                    let mut buffer = String::new();
-                    let mut text_started = false;
-                    // Track tool calls being assembled across chunks
-                    // OpenAI sends: tool_calls[i].id, tool_calls[i].function.name (first chunk)
-                    //               tool_calls[i].function.arguments (subsequent chunks, accumulated)
-                    let mut tool_calls: std::collections::HashMap<usize, (String, String, String)> =
-                        std::collections::HashMap::new(); // index -> (id, name, args_json)
-                    let mut has_tool_calls = false;
-
-                    while let Some(chunk) = stream.next().await {
-                        match chunk {
-                            Ok(bytes) => {
-                                buffer.push_str(&String::from_utf8_lossy(&bytes));
-                                while let Some(pos) = buffer.find("\n") {
-                                    let line = buffer[..pos].to_string();
-                                    buffer = buffer[pos + 1..].to_string();
-
-                                    if let Some(data) = line.strip_prefix("data: ") {
-                                        let data = data.trim();
-                                        if data == "[DONE]" {
-                                            // Emit accumulated tool calls
-                                            for (idx, (id, name, args)) in &tool_calls {
-                                                let input: serde_json::Value =
-                                                    serde_json::from_str(args)
-                                                        .unwrap_or(serde_json::Value::Null);
-                                                let _ = tx
-                                                    .send(StreamEvent::ContentBlockStart {
-                                                        index: *idx + 1,
-                                                        block_type: "tool_use".into(),
-                                                        id: Some(id.clone()),
-                                                        name: Some(name.clone()),
-                                                    })
-                                                    .await;
-                                                // Send full args as InputJsonDelta
-                                                let _ = tx
-                                                    .send(StreamEvent::InputJsonDelta {
-                                                        index: *idx + 1,
-                                                        partial_json: args.clone(),
-                                                    })
-                                                    .await;
-                                                let _ = tx
-                                                    .send(StreamEvent::ContentBlockStop {
-                                                        index: *idx + 1,
-                                                    })
-                                                    .await;
-                                            }
-
-                                            if text_started {
-                                                let _ = tx
-                                                    .send(StreamEvent::ContentBlockStop {
-                                                        index: 0,
-                                                    })
-                                                    .await;
-                                            }
-
-                                            let stop = if has_tool_calls {
-                                                StopReason::ToolUse
-                                            } else {
-                                                StopReason::EndTurn
-                                            };
-
-                                            // Extract usage if available
-                                            let _ = tx
-                                                .send(StreamEvent::MessageDelta {
-                                                    stop_reason: Some(stop),
-                                                    usage: None,
-                                                })
-                                                .await;
-                                            let _ = tx.send(StreamEvent::MessageStop).await;
-                                            return;
-                                        }
-
-                                        if let Ok(json) =
-                                            serde_json::from_str::<serde_json::Value>(data)
-                                        {
-                                            let delta = &json["choices"][0]["delta"];
-                                            let finish_reason =
-                                                json["choices"][0]["finish_reason"].as_str();
-
-                                            // Text content
-                                            if let Some(text) = delta["content"].as_str() {
-                                                if !text_started {
-                                                    text_started = true;
-                                                    let _ = tx
-                                                        .send(StreamEvent::ContentBlockStart {
-                                                            index: 0,
-                                                            block_type: "text".into(),
-                                                            id: None,
-                                                            name: None,
-                                                        })
-                                                        .await;
-                                                }
-                                                let _ = tx
-                                                    .send(StreamEvent::TextDelta {
-                                                        index: 0,
-                                                        text: text.to_string(),
-                                                    })
-                                                    .await;
-                                            }
-
-                                            // Tool calls (accumulated across chunks)
-                                            if let Some(tc_array) = delta["tool_calls"].as_array() {
-                                                has_tool_calls = true;
-                                                for tc in tc_array {
-                                                    let idx =
-                                                        tc["index"].as_u64().unwrap_or(0) as usize;
-                                                    let entry = tool_calls
-                                                        .entry(idx)
-                                                        .or_insert_with(|| {
-                                                            (
-                                                                String::new(),
-                                                                String::new(),
-                                                                String::new(),
-                                                            )
-                                                        });
-
-                                                    // First chunk has id and function.name
-                                                    if let Some(id) = tc["id"].as_str() {
-                                                        entry.0 = id.to_string();
-                                                    }
-                                                    if let Some(name) =
-                                                        tc["function"]["name"].as_str()
-                                                    {
-                                                        entry.1 = name.to_string();
-                                                    }
-                                                    // Arguments accumulate across chunks
-                                                    if let Some(args) =
-                                                        tc["function"]["arguments"].as_str()
-                                                    {
-                                                        entry.2.push_str(args);
-                                                    }
-                                                }
-                                            }
-
-                                            // Usage from the final chunk
-                                            if let Some(usage) = json["usage"].as_object() {
-                                                let input_tokens = usage
-                                                    .get("prompt_tokens")
-                                                    .and_then(|v| v.as_u64())
-                                                    .unwrap_or(0);
-                                                let output_tokens = usage
-                                                    .get("completion_tokens")
-                                                    .and_then(|v| v.as_u64())
-                                                    .unwrap_or(0);
-                                                let _ = tx
-                                                    .send(StreamEvent::MessageDelta {
-                                                        stop_reason: finish_reason.and_then(|r| {
-                                                            match r {
-                                                                "stop" => Some(StopReason::EndTurn),
-                                                                "tool_calls" => {
-                                                                    Some(StopReason::ToolUse)
-                                                                }
-                                                                "length" => {
-                                                                    Some(StopReason::MaxTokens)
-                                                                }
-                                                                _ => None,
-                                                            }
-                                                        }),
-                                                        usage: Some(Usage {
-                                                            input_tokens,
-                                                            output_tokens,
-                                                            ..Default::default()
-                                                        }),
-                                                    })
-                                                    .await;
-                                            }
-                                        }
-                                    }
-                                }
-                            }
-                            Err(e) => {
-                                let _ = tx
-                                    .send(StreamEvent::Error {
-                                        message: e.to_string(),
-                                    })
-                                    .await;
-                                return;
-                            }
-                        }
-                    }
-                }
-                Err(e) => {
-                    let _ = tx
-                        .send(StreamEvent::Error {
-                            message: e.to_string(),
-                        })
-                        .await;
-                }
-            }
-        });
-
-        Ok(CompletionStream::new(rx))
-    }
-}
-
-// ─── Builder ─────────────────────────────────────────────────────────────────
-
-#[derive(Default)]
-pub struct OpenAiBuilder {
-    api_key: Option<String>,
-    base_url: Option<String>,
-    model: Option<String>,
-}
-
-impl OpenAiBuilder {
-    pub fn api_key(mut self, key: impl Into<String>) -> Self {
-        self.api_key = Some(key.into());
-        self
-    }
-
-    pub fn base_url(mut self, url: impl Into<String>) -> Self {
-        self.base_url = Some(url.into());
-        self
-    }
-
-    pub fn model(mut self, model: impl Into<String>) -> Self {
-        self.model = Some(model.into());
-        self
-    }
-
-    pub fn build(self) -> Result<OpenAi> {
-        let auth = if let Some(key) = self.api_key {
-            Auth::ApiKey(key)
-        } else {
-            return Err(CerseiError::Auth(
-                "No API key provided. Set OPENAI_API_KEY or use .api_key()".into(),
-            ));
-        };
-
-        Ok(OpenAi {
-            auth,
-            base_url: self.base_url.unwrap_or_else(|| OPENAI_API_BASE.to_string()),
-            default_model: self.model.unwrap_or_else(|| "gpt-4o".to_string()),
-            client: reqwest::Client::new(),
-        })
-    }
-}
diff --git a/third_party/cersei-provider/src/registry.rs b/third_party/cersei-provider/src/registry.rs
deleted file mode 100644
index 6e96fa0..0000000
--- a/third_party/cersei-provider/src/registry.rs
+++ /dev/null
@@ -1,492 +0,0 @@
-//! Static registry of known LLM providers.
-//!
-//! Each entry contains the provider's API base URL, env var names for auth,
-//! API format (Anthropic or OpenAI-compatible), and known models with
-//! context windows and capabilities.
-
-use crate::ProviderCapabilities;
-
-/// API format used by a provider.
-#[derive(Debug, Clone, Copy, PartialEq)]
-pub enum ApiFormat {
-    /// Anthropic's native API format (different SSE events, system prompt handling).
-    Anthropic,
-    /// OpenAI-compatible `/v1/chat/completions` format (used by most providers).
-    OpenAiCompatible,
-    /// Google Gemini native `generateContent` API format.
-    Google,
-}
-
-/// A known LLM provider.
-#[derive(Debug, Clone)]
-pub struct ProviderEntry {
-    pub id: &'static str,
-    pub name: &'static str,
-    pub api_base: &'static str,
-    pub env_keys: &'static [&'static str],
-    pub api_format: ApiFormat,
-    pub default_model: &'static str,
-    pub models: &'static [ModelEntry],
-}
-
-/// A known model within a provider.
-#[derive(Debug, Clone)]
-pub struct ModelEntry {
-    pub id: &'static str,
-    pub context_window: u64,
-    pub capabilities: ProviderCapabilities,
-}
-
-impl ProviderEntry {
-    /// Try to read an API key from the environment using this provider's env key list.
-    pub fn api_key_from_env(&self) -> Option<String> {
-        for key in self.env_keys {
-            if let Ok(val) = std::env::var(key) {
-                if !val.is_empty() {
-                    return Some(val);
-                }
-            }
-        }
-        None
-    }
-
-    /// Whether this provider requires an API key (Ollama does not).
-    pub fn requires_key(&self) -> bool {
-        !self.env_keys.is_empty()
-    }
-
-    /// Whether a local provider (one with no `env_keys`, e.g. Ollama) is
-    /// actually reachable right now. Does a 200ms TCP probe against the
-    /// host:port parsed out of `api_base`. Returns `true` when the probe
-    /// succeeds, `false` otherwise. Providers that *do* require a key
-    /// return `true` unconditionally (their availability is gated on the
-    /// env var, not connectivity).
-    pub fn is_reachable(&self) -> bool {
-        if self.requires_key() {
-            return true;
-        }
-        let host_port = extract_host_port(self.api_base);
-        let Some(host_port) = host_port else {
-            return false;
-        };
-        use std::net::ToSocketAddrs;
-        let addrs: Vec<std::net::SocketAddr> = match host_port.to_socket_addrs() {
-            Ok(it) => it.collect(),
-            Err(_) => return false,
-        };
-        addrs.into_iter().any(|addr| {
-            std::net::TcpStream::connect_timeout(&addr, std::time::Duration::from_millis(200))
-                .is_ok()
-        })
-    }
-
-    /// Get the context window for a model, falling back to a default.
-    pub fn context_window(&self, model: &str) -> u64 {
-        self.models
-            .iter()
-            .find(|m| m.id == model)
-            .map(|m| m.context_window)
-            .unwrap_or(128_000)
-    }
-}
-
-// ─── Capabilities shorthand ────────────────────────────────────────────────
-
-const FULL: ProviderCapabilities = ProviderCapabilities {
-    streaming: true,
-    tool_use: true,
-    vision: true,
-    thinking: false,
-    system_prompt: true,
-    caching: false,
-};
-
-const FULL_THINKING: ProviderCapabilities = ProviderCapabilities {
-    streaming: true,
-    tool_use: true,
-    vision: true,
-    thinking: true,
-    system_prompt: true,
-    caching: true,
-};
-
-const BASIC: ProviderCapabilities = ProviderCapabilities {
-    streaming: true,
-    tool_use: true,
-    vision: false,
-    thinking: false,
-    system_prompt: true,
-    caching: false,
-};
-
-// ─── Provider Registry ─────────────────────────────────────────────────────
-
-pub static REGISTRY: &[ProviderEntry] = &[
-    ProviderEntry {
-        id: "anthropic",
-        name: "Anthropic",
-        api_base: "https://api.anthropic.com",
-        env_keys: &["ANTHROPIC_API_KEY", "ANTHROPIC_KEY"],
-        api_format: ApiFormat::Anthropic,
-        default_model: "claude-sonnet-4-6",
-        models: &[
-            ModelEntry {
-                id: "claude-opus-4-6",
-                context_window: 200_000,
-                capabilities: FULL_THINKING,
-            },
-            ModelEntry {
-                id: "claude-sonnet-4-6",
-                context_window: 200_000,
-                capabilities: FULL_THINKING,
-            },
-            ModelEntry {
-                id: "claude-haiku-4-5",
-                context_window: 200_000,
-                capabilities: FULL,
-            },
-        ],
-    },
-    ProviderEntry {
-        id: "openai",
-        name: "OpenAI",
-        api_base: "https://api.openai.com/v1",
-        env_keys: &["OPENAI_API_KEY"],
-        api_format: ApiFormat::OpenAiCompatible,
-        default_model: "gpt-5.4-2026-03-05",
-        models: &[
-            ModelEntry {
-                id: "gpt-5.4-2026-03-05",
-                context_window: 1_000_000,
-                capabilities: FULL,
-            },
-            ModelEntry {
-                id: "gpt-5.3-chat-latest",
-                context_window: 1_000_000,
-                capabilities: FULL,
-            },
-            ModelEntry {
-                id: "gpt-5.3-chat",
-                context_window: 1_000_000,
-                capabilities: FULL,
-            },
-            ModelEntry {
-                id: "gpt-5.3-codex",
-                context_window: 1_000_000,
-                capabilities: FULL,
-            },
-            ModelEntry {
-                id: "gpt-5-chat",
-                context_window: 1_000_000,
-                capabilities: FULL,
-            },
-            ModelEntry {
-                id: "gpt-4o",
-                context_window: 128_000,
-                capabilities: FULL,
-            },
-            ModelEntry {
-                id: "gpt-4-turbo",
-                context_window: 128_000,
-                capabilities: FULL,
-            },
-            ModelEntry {
-                id: "o1",
-                context_window: 200_000,
-                capabilities: FULL,
-            },
-            ModelEntry {
-                id: "o3",
-                context_window: 200_000,
-                capabilities: FULL,
-            },
-            ModelEntry {
-                id: "o3-pro",
-                context_window: 200_000,
-                capabilities: FULL,
-            },
-        ],
-    },
-    ProviderEntry {
-        id: "google",
-        name: "Google",
-        api_base: "https://generativelanguage.googleapis.com/v1beta",
-        env_keys: &["GOOGLE_API_KEY", "GEMINI_API_KEY"],
-        api_format: ApiFormat::Google,
-        default_model: "gemini-3.1-pro-preview",
-        models: &[
-            ModelEntry {
-                id: "gemini-3.1-pro-preview",
-                context_window: 2_000_000,
-                capabilities: FULL,
-            },
-            ModelEntry {
-                id: "gemini-3.0-flash",
-                context_window: 1_000_000,
-                capabilities: FULL,
-            },
-            ModelEntry {
-                id: "gemini-2.0-flash",
-                context_window: 1_000_000,
-                capabilities: FULL,
-            },
-            ModelEntry {
-                id: "gemini-2.0-pro",
-                context_window: 1_000_000,
-                capabilities: FULL,
-            },
-            ModelEntry {
-                id: "gemini-1.5-pro",
-                context_window: 2_000_000,
-                capabilities: FULL,
-            },
-            ModelEntry {
-                id: "gemini-1.5-flash",
-                context_window: 1_000_000,
-                capabilities: FULL,
-            },
-        ],
-    },
-    ProviderEntry {
-        id: "mistral",
-        name: "Mistral",
-        api_base: "https://api.mistral.ai/v1",
-        env_keys: &["MISTRAL_API_KEY"],
-        api_format: ApiFormat::OpenAiCompatible,
-        default_model: "mistral-large-latest",
-        models: &[
-            ModelEntry {
-                id: "mistral-large-latest",
-                context_window: 128_000,
-                capabilities: FULL,
-            },
-            ModelEntry {
-                id: "codestral-latest",
-                context_window: 256_000,
-                capabilities: BASIC,
-            },
-        ],
-    },
-    ProviderEntry {
-        id: "groq",
-        name: "Groq",
-        api_base: "https://api.groq.com/openai/v1",
-        env_keys: &["GROQ_API_KEY"],
-        api_format: ApiFormat::OpenAiCompatible,
-        default_model: "llama-3.1-70b-versatile",
-        models: &[
-            ModelEntry {
-                id: "llama-3.1-70b-versatile",
-                context_window: 128_000,
-                capabilities: BASIC,
-            },
-            ModelEntry {
-                id: "llama-3.1-8b-instant",
-                context_window: 128_000,
-                capabilities: BASIC,
-            },
-            ModelEntry {
-                id: "mixtral-8x7b-32768",
-                context_window: 32_768,
-                capabilities: BASIC,
-            },
-        ],
-    },
-    ProviderEntry {
-        id: "deepseek",
-        name: "DeepSeek",
-        api_base: "https://api.deepseek.com/v1",
-        env_keys: &["DEEPSEEK_API_KEY"],
-        api_format: ApiFormat::OpenAiCompatible,
-        default_model: "deepseek-chat",
-        models: &[
-            ModelEntry {
-                id: "deepseek-chat",
-                context_window: 64_000,
-                capabilities: FULL,
-            },
-            ModelEntry {
-                id: "deepseek-coder",
-                context_window: 64_000,
-                capabilities: BASIC,
-            },
-        ],
-    },
-    ProviderEntry {
-        id: "xai",
-        name: "xAI",
-        api_base: "https://api.x.ai/v1",
-        env_keys: &["XAI_API_KEY"],
-        api_format: ApiFormat::OpenAiCompatible,
-        default_model: "grok-2",
-        models: &[ModelEntry {
-            id: "grok-2",
-            context_window: 128_000,
-            capabilities: FULL,
-        }],
-    },
-    ProviderEntry {
-        id: "together",
-        name: "Together",
-        api_base: "https://api.together.xyz/v1",
-        env_keys: &["TOGETHER_API_KEY"],
-        api_format: ApiFormat::OpenAiCompatible,
-        default_model: "meta-llama/Meta-Llama-3.1-70B-Instruct-Turbo",
-        models: &[ModelEntry {
-            id: "meta-llama/Meta-Llama-3.1-70B-Instruct-Turbo",
-            context_window: 128_000,
-            capabilities: BASIC,
-        }],
-    },
-    ProviderEntry {
-        id: "fireworks",
-        name: "Fireworks",
-        api_base: "https://api.fireworks.ai/inference/v1",
-        env_keys: &["FIREWORKS_API_KEY"],
-        api_format: ApiFormat::OpenAiCompatible,
-        default_model: "accounts/fireworks/models/llama-v3p1-70b-instruct",
-        models: &[ModelEntry {
-            id: "accounts/fireworks/models/llama-v3p1-70b-instruct",
-            context_window: 128_000,
-            capabilities: BASIC,
-        }],
-    },
-    ProviderEntry {
-        id: "perplexity",
-        name: "Perplexity",
-        api_base: "https://api.perplexity.ai",
-        env_keys: &["PERPLEXITY_API_KEY"],
-        api_format: ApiFormat::OpenAiCompatible,
-        default_model: "llama-3.1-sonar-large-128k-online",
-        models: &[ModelEntry {
-            id: "llama-3.1-sonar-large-128k-online",
-            context_window: 128_000,
-            capabilities: BASIC,
-        }],
-    },
-    ProviderEntry {
-        id: "cerebras",
-        name: "Cerebras",
-        api_base: "https://api.cerebras.ai/v1",
-        env_keys: &["CEREBRAS_API_KEY"],
-        api_format: ApiFormat::OpenAiCompatible,
-        default_model: "llama3.1-70b",
-        models: &[ModelEntry {
-            id: "llama3.1-70b",
-            context_window: 128_000,
-            capabilities: BASIC,
-        }],
-    },
-    ProviderEntry {
-        id: "ollama",
-        name: "Ollama",
-        api_base: "http://localhost:11434/v1",
-        env_keys: &[],
-        api_format: ApiFormat::OpenAiCompatible,
-        default_model: "llama3.1",
-        models: &[],
-    },
-    ProviderEntry {
-        id: "openrouter",
-        name: "OpenRouter",
-        api_base: "https://openrouter.ai/api/v1",
-        env_keys: &["OPENROUTER_API_KEY"],
-        api_format: ApiFormat::OpenAiCompatible,
-        default_model: "anthropic/claude-3.5-sonnet",
-        models: &[],
-    },
-    ProviderEntry {
-        id: "cohere",
-        name: "Cohere",
-        api_base: "https://api.cohere.com/compatibility/v1",
-        env_keys: &["COHERE_API_KEY", "CO_API_KEY"],
-        api_format: ApiFormat::OpenAiCompatible,
-        default_model: "command-r-plus",
-        models: &[
-            ModelEntry {
-                id: "command-r-plus",
-                context_window: 128_000,
-                capabilities: FULL,
-            },
-            ModelEntry {
-                id: "command-r",
-                context_window: 128_000,
-                capabilities: FULL,
-            },
-            ModelEntry {
-                id: "command-a",
-                context_window: 256_000,
-                capabilities: FULL,
-            },
-        ],
-    },
-    ProviderEntry {
-        id: "sambanova",
-        name: "SambaNova",
-        api_base: "https://api.sambanova.ai/v1",
-        env_keys: &["SAMBANOVA_API_KEY"],
-        api_format: ApiFormat::OpenAiCompatible,
-        default_model: "Meta-Llama-3.1-70B-Instruct",
-        models: &[
-            ModelEntry {
-                id: "Meta-Llama-3.1-70B-Instruct",
-                context_window: 128_000,
-                capabilities: BASIC,
-            },
-            ModelEntry {
-                id: "Meta-Llama-3.1-405B-Instruct",
-                context_window: 128_000,
-                capabilities: BASIC,
-            },
-        ],
-    },
-];
-
-/// Look up a provider by ID.
-pub fn lookup(provider_id: &str) -> Option<&'static ProviderEntry> {
-    REGISTRY.iter().find(|e| e.id == provider_id)
-}
-
-/// All registered providers.
-pub fn all() -> &'static [ProviderEntry] {
-    REGISTRY
-}
-
-/// Providers that have valid auth configured in the environment **and** — for
-/// local providers without an API key (e.g. Ollama) — are actually reachable
-/// via a quick TCP probe.
-///
-/// The probe prevents `from_model_string("auto")` from silently picking Ollama
-/// when the daemon is not running, which was causing the CLI to default to
-/// `llama3.1` on machines without any LLM installed.
-pub fn available() -> Vec<&'static ProviderEntry> {
-    REGISTRY
-        .iter()
-        .filter(|e| {
-            if e.requires_key() {
-                e.api_key_from_env().is_some()
-            } else {
-                e.is_reachable()
-            }
-        })
-        .collect()
-}
-
-/// Extract a `host:port` string from an http(s) URL for TCP probing.
-fn extract_host_port(api_base: &str) -> Option<String> {
-    let trimmed = api_base
-        .trim_start_matches("https://")
-        .trim_start_matches("http://");
-    let authority = trimmed.split('/').next()?;
-    if authority.contains(':') {
-        Some(authority.to_string())
-    } else {
-        // default ports based on scheme
-        let port = if api_base.starts_with("https://") {
-            443
-        } else {
-            80
-        };
-        Some(format!("{authority}:{port}"))
-    }
-}
diff --git a/third_party/cersei-provider/src/router.rs b/third_party/cersei-provider/src/router.rs
deleted file mode 100644
index 507f84a..0000000
--- a/third_party/cersei-provider/src/router.rs
+++ /dev/null
@@ -1,274 +0,0 @@
-//! Model router: parse `provider/model` strings and construct the right provider.
-//!
-//! ```rust,ignore
-//! use cersei_provider::router;
-//!
-//! let (provider, model) = router::from_model_string("openai/gpt-4o")?;
-//! let (provider, model) = router::from_model_string("groq/llama-3.1-70b-versatile")?;
-//! let (provider, model) = router::from_model_string("gpt-4o")?; // auto-detect
-//! ```
-
-use crate::registry::{self, ApiFormat, ProviderEntry};
-use crate::{Anthropic, Auth, Gemini, OpenAi, Provider};
-use cersei_types::*;
-
-/// Parse a model string and return a configured provider + resolved model name.
-///
-/// Accepts:
-/// - `"provider/model"` — explicit routing (e.g., `"groq/llama-3.1-70b-versatile"`)
-/// - `"model-name"` — auto-detect from known prefixes and env vars (e.g., `"gpt-4o"`)
-///
-/// Returns `(provider, model_name)` where `model_name` has the provider prefix stripped.
-pub fn from_model_string(model: &str) -> Result<(Box<dyn Provider>, String)> {
-    // "auto" — pick the first available *keyed* provider's default model.
-    //
-    // Local providers (Ollama, etc.) are skipped here on purpose: they need
-    // explicit opt-in via `--model ollama/<model>` so the CLI never silently
-    // starts talking to a daemon the user didn't ask for.
-    if model == "auto" {
-        let available = registry::available();
-        let entry = available
-            .iter()
-            .find(|e| e.requires_key())
-            .copied()
-            .ok_or_else(|| {
-                let all_keys: Vec<String> = registry::all()
-                    .iter()
-                    .flat_map(|e| e.env_keys.iter().map(|k| k.to_string()))
-                    .collect();
-                CerseiError::Auth(format!(
-                    "No API keys found. Set one of: {}\n\nOr point at a local provider explicitly, e.g. --model ollama/llama3.1",
-                    all_keys.join(", ")
-                ))
-            })?;
-        let model_name = entry.default_model;
-        let provider = build_provider(entry, model_name)?;
-        return Ok((provider, model_name.to_string()));
-    }
-
-    if let Some((provider_id, model_name)) = model.split_once('/') {
-        // Explicit: "anthropic/claude-sonnet-4-6"
-        let entry = registry::lookup(provider_id).ok_or_else(|| {
-            let known: Vec<&str> = registry::all().iter().map(|e| e.id).collect();
-            CerseiError::Config(format!(
-                "Unknown provider: '{}'. Known providers: {}",
-                provider_id,
-                known.join(", ")
-            ))
-        })?;
-        let provider = build_provider(entry, model_name)?;
-        Ok((provider, model_name.to_string()))
-    } else {
-        // Auto-detect: "gpt-4o" → openai
-        let (entry, resolved) = auto_detect(model)?;
-        let provider = build_provider(entry, resolved)?;
-        Ok((provider, resolved.to_string()))
-    }
-}
-
-/// List all providers that have valid auth configured.
-pub fn available_providers() -> Vec<&'static ProviderEntry> {
-    registry::available()
-}
-
-/// List all known providers.
-pub fn all_providers() -> &'static [ProviderEntry] {
-    registry::all()
-}
-
-// ─── Internal ──────────────────────────────────────────────────────────────
-
-fn build_provider(entry: &ProviderEntry, model: &str) -> Result<Box<dyn Provider>> {
-    match entry.api_format {
-        ApiFormat::Anthropic => {
-            let key = entry.api_key_from_env().ok_or_else(|| {
-                CerseiError::Auth(format!(
-                    "No API key for {}. Set {} in your environment.",
-                    entry.name,
-                    entry.env_keys.join(" or ")
-                ))
-            })?;
-            Ok(Box::new(Anthropic::new(Auth::ApiKey(key))))
-        }
-        ApiFormat::Google => {
-            let key = entry.api_key_from_env().ok_or_else(|| {
-                CerseiError::Auth(format!(
-                    "No API key for {}. Set {} in your environment.",
-                    entry.name,
-                    entry.env_keys.join(" or ")
-                ))
-            })?;
-            let provider = Gemini::builder().api_key(key).model(model).build()?;
-            Ok(Box::new(provider))
-        }
-        ApiFormat::OpenAiCompatible => {
-            let key = if entry.requires_key() {
-                entry.api_key_from_env().ok_or_else(|| {
-                    CerseiError::Auth(format!(
-                        "No API key for {}. Set {} in your environment.",
-                        entry.name,
-                        entry.env_keys.join(" or ")
-                    ))
-                })?
-            } else {
-                // Ollama and other local providers don't need a key
-                "no-key".to_string()
-            };
-
-            let provider = OpenAi::builder()
-                .base_url(entry.api_base)
-                .api_key(key)
-                .model(model)
-                .build()?;
-
-            Ok(Box::new(provider))
-        }
-    }
-}
-
-/// Auto-detect provider from a bare model name.
-fn auto_detect(model: &str) -> Result<(&'static ProviderEntry, &str)> {
-    // 1. Check known model prefixes
-    let prefix_match = match model {
-        m if m.starts_with("claude-") => Some("anthropic"),
-        m if m.starts_with("gpt-")
-            || m.starts_with("o1")
-            || m.starts_with("o3")
-            || m.starts_with("gpt5") =>
-        {
-            Some("openai")
-        }
-        m if m.starts_with("gemini-") => Some("google"),
-        m if m.starts_with("mistral-") || m.starts_with("codestral-") => Some("mistral"),
-        m if m.starts_with("deepseek-") => Some("deepseek"),
-        m if m.starts_with("grok-") => Some("xai"),
-        m if m.starts_with("command-") => Some("cohere"),
-        m if m.starts_with("llama") => {
-            // llama models could be on Groq, Together, etc.
-            // Prefer Groq if key is set, otherwise Together
-            if std::env::var("GROQ_API_KEY")
-                .ok()
-                .filter(|k| !k.is_empty())
-                .is_some()
-            {
-                Some("groq")
-            } else if std::env::var("TOGETHER_API_KEY")
-                .ok()
-                .filter(|k| !k.is_empty())
-                .is_some()
-            {
-                Some("together")
-            } else {
-                Some("ollama")
-            }
-        }
-        _ => None,
-    };
-
-    if let Some(provider_id) = prefix_match {
-        if let Some(entry) = registry::lookup(provider_id) {
-            return Ok((entry, model));
-        }
-    }
-
-    // 2. Fall back to first available provider
-    let available = registry::available();
-    if let Some(entry) = available.first() {
-        return Ok((entry, model));
-    }
-
-    // 3. Nothing available
-    let all_keys: Vec<String> = registry::all()
-        .iter()
-        .flat_map(|e| e.env_keys.iter().map(|k| k.to_string()))
-        .collect();
-
-    Err(CerseiError::Auth(format!(
-        "Cannot detect provider for model '{}'. No API keys found.\n\nSet one of: {}",
-        model,
-        all_keys.join(", ")
-    )))
-}
-
-// ─── Tests ─────────────────────────────────────────────────────────────────
-
-#[cfg(test)]
-mod tests {
-    use super::*;
-
-    #[test]
-    fn test_explicit_routing_unknown_provider() {
-        let result = from_model_string("nonexistent/some-model");
-        assert!(result.is_err());
-        match result {
-            Err(e) => {
-                let msg = e.to_string();
-                assert!(
-                    msg.contains("nonexistent"),
-                    "Error should mention the provider name: {msg}"
-                );
-            }
-            Ok(_) => panic!("Expected error for unknown provider"),
-        }
-    }
-
-    #[test]
-    fn test_auto_detect_prefixes() {
-        // These test auto_detect logic without requiring env vars
-        let (entry, model) = auto_detect("claude-sonnet-4-6").unwrap_or_else(|_| {
-            // If no key is set, it still identifies the provider
-            (registry::lookup("anthropic").unwrap(), "claude-sonnet-4-6")
-        });
-        assert_eq!(entry.id, "anthropic");
-        assert_eq!(model, "claude-sonnet-4-6");
-    }
-
-    #[test]
-    fn test_registry_lookup() {
-        assert!(registry::lookup("anthropic").is_some());
-        assert!(registry::lookup("openai").is_some());
-        assert!(registry::lookup("groq").is_some());
-        assert!(registry::lookup("ollama").is_some());
-        assert!(registry::lookup("nonexistent").is_none());
-    }
-
-    #[test]
-    fn test_registry_lookup_new_providers() {
-        assert!(registry::lookup("cohere").is_some());
-        assert!(registry::lookup("sambanova").is_some());
-    }
-
-    #[test]
-    fn test_google_native_format() {
-        let entry = registry::lookup("google").unwrap();
-        assert_eq!(entry.api_format, ApiFormat::Google);
-        assert!(entry.api_base.contains("v1beta"));
-        assert!(!entry.api_base.contains("openai"));
-    }
-
-    #[test]
-    fn test_auto_detect_cohere() {
-        let (entry, model) = auto_detect("command-r-plus")
-            .unwrap_or_else(|_| (registry::lookup("cohere").unwrap(), "command-r-plus"));
-        assert_eq!(entry.id, "cohere");
-        assert_eq!(model, "command-r-plus");
-    }
-
-    #[test]
-    fn test_ollama_no_key_required() {
-        let entry = registry::lookup("ollama").unwrap();
-        assert!(!entry.requires_key());
-    }
-
-    #[test]
-    fn test_all_providers_count() {
-        assert!(registry::all().len() >= 15);
-    }
-
-    #[test]
-    fn test_provider_entry_context_window() {
-        let entry = registry::lookup("anthropic").unwrap();
-        assert_eq!(entry.context_window("claude-sonnet-4-6"), 200_000);
-        assert_eq!(entry.context_window("unknown-model"), 128_000); // fallback
-    }
-}
diff --git a/third_party/cersei-provider/src/stream.rs b/third_party/cersei-provider/src/stream.rs
deleted file mode 100644
index afb2b18..0000000
--- a/third_party/cersei-provider/src/stream.rs
+++ /dev/null
@@ -1,156 +0,0 @@
-//! Stream accumulator: collects SSE stream events into a complete response.
-
-use cersei_types::*;
-use std::collections::HashMap;
-
-/// Accumulates streaming events into content blocks.
-pub struct StreamAccumulator {
-    content_blocks: Vec<ContentBlock>,
-    partial_text: HashMap<usize, String>,
-    partial_json: HashMap<usize, String>,
-    partial_thinking: HashMap<usize, String>,
-    block_types: HashMap<usize, String>,
-    tool_use_ids: HashMap<usize, String>,
-    tool_use_names: HashMap<usize, String>,
-    stop_reason: Option<StopReason>,
-    usage: Usage,
-    model: Option<String>,
-    message_id: Option<String>,
-}
-
-impl StreamAccumulator {
-    pub fn new() -> Self {
-        Self {
-            content_blocks: Vec::new(),
-            partial_text: HashMap::new(),
-            partial_json: HashMap::new(),
-            partial_thinking: HashMap::new(),
-            block_types: HashMap::new(),
-            tool_use_ids: HashMap::new(),
-            tool_use_names: HashMap::new(),
-            stop_reason: None,
-            usage: Usage::default(),
-            model: None,
-            message_id: None,
-        }
-    }
-
-    pub fn process_event(&mut self, event: StreamEvent) {
-        match event {
-            StreamEvent::MessageStart { id, model } => {
-                self.message_id = Some(id);
-                self.model = Some(model);
-            }
-            StreamEvent::ContentBlockStart {
-                index,
-                block_type,
-                id,
-                name,
-            } => {
-                self.block_types.insert(index, block_type);
-                if let Some(id) = id {
-                    self.tool_use_ids.insert(index, id);
-                }
-                if let Some(name) = name {
-                    self.tool_use_names.insert(index, name);
-                }
-            }
-            StreamEvent::TextDelta { index, text } => {
-                self.partial_text.entry(index).or_default().push_str(&text);
-            }
-            StreamEvent::InputJsonDelta {
-                index,
-                partial_json,
-            } => {
-                self.partial_json
-                    .entry(index)
-                    .or_default()
-                    .push_str(&partial_json);
-            }
-            StreamEvent::ThinkingDelta { index, thinking } => {
-                self.partial_thinking
-                    .entry(index)
-                    .or_default()
-                    .push_str(&thinking);
-            }
-            StreamEvent::ContentBlockStop { index } => {
-                let block_type = self.block_types.get(&index).cloned().unwrap_or_default();
-                let block = match block_type.as_str() {
-                    "text" => ContentBlock::Text {
-                        text: self.partial_text.remove(&index).unwrap_or_default(),
-                    },
-                    "tool_use" => {
-                        let json_str = self.partial_json.remove(&index).unwrap_or_default();
-                        let input =
-                            serde_json::from_str(&json_str).unwrap_or(serde_json::Value::Null);
-                        ContentBlock::ToolUse {
-                            id: self.tool_use_ids.remove(&index).unwrap_or_default(),
-                            name: self.tool_use_names.remove(&index).unwrap_or_default(),
-                            input,
-                        }
-                    }
-                    "thinking" => ContentBlock::Thinking {
-                        thinking: self.partial_thinking.remove(&index).unwrap_or_default(),
-                        signature: String::new(),
-                    },
-                    _ => ContentBlock::Text {
-                        text: self.partial_text.remove(&index).unwrap_or_default(),
-                    },
-                };
-                // Ensure we have enough slots
-                while self.content_blocks.len() <= index {
-                    self.content_blocks.push(ContentBlock::Text {
-                        text: String::new(),
-                    });
-                }
-                self.content_blocks[index] = block;
-            }
-            StreamEvent::MessageDelta { stop_reason, usage } => {
-                if let Some(sr) = stop_reason {
-                    self.stop_reason = Some(sr);
-                }
-                if let Some(u) = usage {
-                    self.usage.merge(&u);
-                }
-            }
-            StreamEvent::MessageStop => {}
-            StreamEvent::Ping => {}
-            StreamEvent::Error { .. } => {}
-        }
-    }
-
-    pub fn into_response(self) -> Result<super::CompletionResponse> {
-        let message = Message {
-            role: Role::Assistant,
-            content: if self.content_blocks.is_empty() {
-                MessageContent::Text(String::new())
-            } else {
-                MessageContent::Blocks(self.content_blocks)
-            },
-            id: self.message_id,
-            metadata: Some(MessageMetadata {
-                model: self.model,
-                usage: Some(self.usage.clone()),
-                stop_reason: self.stop_reason.clone(),
-                provider_data: serde_json::Value::Null,
-            }),
-        };
-
-        Ok(super::CompletionResponse {
-            message,
-            usage: self.usage,
-            stop_reason: self.stop_reason.unwrap_or(StopReason::EndTurn),
-        })
-    }
-
-    /// Get accumulated text so far (for streaming display).
-    pub fn current_text(&self) -> String {
-        self.partial_text.values().cloned().collect()
-    }
-}
-
-impl Default for StreamAccumulator {
-    fn default() -> Self {
-        Self::new()
-    }
-}