diff --git a/examples/a2ui_agent/agent.py b/examples/a2ui_agent/agent.py index 56b06e0d..116dab19 100644 --- a/examples/a2ui_agent/agent.py +++ b/examples/a2ui_agent/agent.py @@ -22,6 +22,7 @@ """ from veadk import Agent +from veadk.utils.pdf_to_images import pdf_to_images_before_model_callback INSTRUCTION = """You are a helpful assistant that can render rich UI. @@ -38,6 +39,9 @@ description="Demo agent that replies with A2UI rich UI.", instruction=INSTRUCTION, enable_a2ui=True, + # Uploaded PDFs are rendered to page images so the vision model can read + # them. The default model (doubao-seed-1.6) is vision-capable. + before_model_callback=pdf_to_images_before_model_callback, ) # Required by the Google ADK agent loader. diff --git a/examples/basic-app/README.md b/examples/basic-app/README.md index 2e3e1b9e..d6e93f75 100644 --- a/examples/basic-app/README.md +++ b/examples/basic-app/README.md @@ -14,7 +14,7 @@ basic-app/ ├── agents/ │ └── basic_app_agent/ # backend agent (A2UI on), exposes root_agent ├── agentkit.yaml # AgentKit deployment config (build_script wired) -├── scripts/install_veadk.sh # installs veadk[a2ui] from feat/a2ui (build time) +├── scripts/install_veadk.sh # installs veadk[a2ui,pdf] (build time) ├── requirements.txt # empty (veadk is installed by the build script) ├── .dockerignore └── .env.example @@ -50,7 +50,7 @@ cp .env.example .env ## 2. Run locally (optional) ```bash -pip install "veadk-python[a2ui]" +pip install "veadk-python[a2ui,pdf]" python app.py # or: python -m app # open http://127.0.0.1:8000 ``` @@ -59,6 +59,14 @@ You should see the web UI; ask e.g. "show me a flight status card" and the agent replies with rich A2UI. `/list-apps` returns `["basic_app_agent"]` and `/ping` returns `{"status": "ok"}`. +### Attachments (image / PDF) + +The composer's **+** button uploads **images** and **PDFs**. Images are sent to +the (vision-capable) model directly; PDFs are rendered to page images by a +`before_model_callback` (`veadk.utils.pdf_to_images`) so the model can read them +— this needs the `pdf` extra (included in `[a2ui,pdf]` above) and a +vision-capable model (the default `doubao-seed-1.6` is). + ## 3. Deploy to AgentKit ```bash diff --git a/examples/basic-app/README.zh.md b/examples/basic-app/README.zh.md index 09e28f39..43e22f15 100644 --- a/examples/basic-app/README.zh.md +++ b/examples/basic-app/README.zh.md @@ -13,7 +13,7 @@ basic-app/ ├── agents/ │ └── basic_app_agent/ # 后端 Agent(已开启 A2UI),暴露 root_agent ├── agentkit.yaml # AgentKit 部署配置(已接入 build_script) -├── scripts/install_veadk.sh # 构建时从 feat/a2ui 安装 veadk[a2ui] +├── scripts/install_veadk.sh # 构建时从 feat/a2ui 安装 veadk[a2ui,pdf] ├── requirements.txt # 留空(veadk 由构建脚本安装) ├── .dockerignore └── .env.example @@ -47,7 +47,7 @@ cp .env.example .env ## 2. 本地运行(可选) ```bash -pip install "veadk-python[a2ui]" +pip install "veadk-python[a2ui,pdf]" python app.py # 或:python -m app # 打开 http://127.0.0.1:8000 ``` @@ -55,6 +55,13 @@ python app.py # 或:python -m app 你应当能看到 Web UI;试着问“给我一张航班状态卡片”,Agent 会用富 A2UI 作答。 `/list-apps` 返回 `["basic_app_agent"]`,`/ping` 返回 `{"status": "ok"}`。 +### 附件(图片 / PDF) + +输入框的 **+** 按钮可上传**图片**与 **PDF**。图片会直接发送给(具备视觉能力的) +模型;PDF 则由 `before_model_callback`(`veadk.utils.pdf_to_images`)渲染为逐页 +图片后交给模型识别——这需要 `pdf` 额外依赖(已包含在上面的 `[a2ui,pdf]` 中), +并使用具备视觉能力的模型(默认的 `doubao-seed-1.6` 即可)。 + ## 3. 部署到 AgentKit ```bash diff --git a/examples/basic-app/agents/basic_app_agent/agent.py b/examples/basic-app/agents/basic_app_agent/agent.py index b56da69b..86efff3b 100644 --- a/examples/basic-app/agents/basic_app_agent/agent.py +++ b/examples/basic-app/agents/basic_app_agent/agent.py @@ -20,6 +20,7 @@ """ from veadk import Agent +from veadk.utils.pdf_to_images import pdf_to_images_before_model_callback INSTRUCTION = """You are a helpful assistant that can render rich UI. @@ -36,6 +37,9 @@ description="Basic front+back demo agent that can reply with A2UI rich UI.", instruction=INSTRUCTION, enable_a2ui=True, + # Uploaded PDFs are rendered to page images so the vision model can read + # them. The default model (doubao-seed-1.6) is vision-capable. + before_model_callback=pdf_to_images_before_model_callback, ) # Required by the Google ADK agent loader. diff --git a/examples/basic-app/scripts/install_veadk.sh b/examples/basic-app/scripts/install_veadk.sh index ddf6272c..7b59c050 100755 --- a/examples/basic-app/scripts/install_veadk.sh +++ b/examples/basic-app/scripts/install_veadk.sh @@ -34,4 +34,4 @@ done cd "$SRC" git sparse-checkout set veadk -uv pip install ".[a2ui]" +uv pip install ".[a2ui,pdf]" diff --git a/frontend/src/App.tsx b/frontend/src/App.tsx index 2c54c919..83439b86 100644 --- a/frontend/src/App.tsx +++ b/frontend/src/App.tsx @@ -1,5 +1,5 @@ import { useEffect, useState } from "react"; -import { Check, Copy, Loader2 } from "lucide-react"; +import { Check, Copy, FileText, Loader2 } from "lucide-react"; import { motion } from "motion/react"; import { createSession, @@ -9,6 +9,7 @@ import { listSessions, runSSE, type AdkSession, + type Attachment, } from "./adk/client"; import { applyEvent, emptyAcc, eventsToTurns, type Turn } from "./blocks"; import { Sidebar } from "./ui/Sidebar"; @@ -104,6 +105,22 @@ const GREETINGS = [ ]; const pickGreeting = () => GREETINGS[Math.floor(Math.random() * GREETINGS.length)]; +const MAX_FILE_BYTES = 20 * 1024 * 1024; // 20 MB/file (base64 inflates ~33%) + +/** Read a File as base64 (without the `data:...;base64,` prefix). */ +function fileToBase64(file: File): Promise { + return new Promise((resolve, reject) => { + const reader = new FileReader(); + reader.onload = () => { + const res = String(reader.result); + const comma = res.indexOf(","); + resolve(comma >= 0 ? res.slice(comma + 1) : res); + }; + reader.onerror = () => reject(reader.error); + reader.readAsDataURL(file); + }); +} + export default function App() { const [apps, setApps] = useState([]); const [appName, setAppName] = useState(""); @@ -111,6 +128,7 @@ export default function App() { const [sessionId, setSessionId] = useState(""); const [turns, setTurns] = useState([]); const [input, setInput] = useState(""); + const [attachments, setAttachments] = useState([]); const [busy, setBusy] = useState(false); const [error, setError] = useState(""); const [traceOpen, setTraceOpen] = useState(false); @@ -220,8 +238,25 @@ export default function App() { } } - async function send(text: string) { - if (!text.trim() || busy || !appName || !userId) return; + async function addFiles(files: FileList | File[]) { + const picked: Attachment[] = []; + for (const f of Array.from(files)) { + if (f.size > MAX_FILE_BYTES) { + setError(`文件过大(>20MB):${f.name}`); + continue; + } + const data = await fileToBase64(f); + picked.push({ + mimeType: f.type || "application/octet-stream", + data, + name: f.name, + }); + } + if (picked.length) setAttachments((a) => [...a, ...picked]); + } + + async function send(text: string, atts: Attachment[] = []) { + if ((!text.trim() && atts.length === 0) || busy || !appName || !userId) return; setError(""); setBusy(true); @@ -238,9 +273,16 @@ export default function App() { } } + const userBlocks: Turn["blocks"] = []; + if (atts.length) + userBlocks.push({ + kind: "attachment", + files: atts.map((a) => ({ mimeType: a.mimeType, data: a.data, name: a.name })), + }); + if (text.trim()) userBlocks.push({ kind: "text", text }); setTurns((t) => [ ...t, - { role: "user", blocks: [{ kind: "text", text }], meta: { ts: Date.now() / 1000 } }, + { role: "user", blocks: userBlocks, meta: { ts: Date.now() / 1000 } }, { role: "assistant", blocks: [] }, ]); @@ -248,7 +290,18 @@ export default function App() { let acc = emptyAcc(); let tokens = 0; let ts = Date.now() / 1000; - for await (const event of runSSE({ appName, userId, sessionId: sid, text })) { + for await (const event of runSSE({ + appName, + userId, + sessionId: sid, + text, + attachments: atts, + })) { + const errMsg = event.error ?? event.errorMessage ?? event.error_message; + if (typeof errMsg === "string" && errMsg) { + setError(errMsg); + break; + } acc = applyEvent(acc, event); const usage = event.usageMetadata ?? event.usage_metadata; if (usage?.totalTokenCount) tokens = usage.totalTokenCount; @@ -305,11 +358,18 @@ export default function App() { onChange={setInput} onSubmit={() => { const text = input; + const atts = attachments; setInput(""); - send(text); + setAttachments([]); + send(text, atts); }} disabled={!appName || !userId} busy={busy} + attachments={attachments} + onAddFiles={addFiles} + onRemoveAttachment={(i) => + setAttachments((a) => a.filter((_, j) => j !== i)) + } /> ); return ( @@ -332,7 +392,10 @@ export default function App() { {turns.map((turn, i) => { const isLast = i === turns.length - 1; if (turn.role === "user") { - const text = turn.blocks.map((b) => ("text" in b ? b.text : "")).join(""); + const text = turn.blocks.map((b) => (b.kind === "text" ? b.text : "")).join(""); + const atts = turn.blocks.flatMap((b) => + b.kind === "attachment" ? b.files : [], + ); return ( -
- -
+ {atts.length > 0 && ( +
+ {atts.map((f, j) => + f.mimeType?.startsWith("image/") && f.data ? ( + {f.name + ) : ( +
+ + {f.name ?? "文件"} +
+ ), + )} +
+ )} + {text && ( +
+ +
+ )}
{turn.meta?.ts && {fmtTime(turn.meta.ts)}} diff --git a/frontend/src/adk/client.ts b/frontend/src/adk/client.ts index 7df12050..f57f2a4f 100644 --- a/frontend/src/adk/client.ts +++ b/frontend/src/adk/client.ts @@ -20,6 +20,11 @@ export interface AdkEvent { timestamp?: number; usageMetadata?: AdkUsage; usage_metadata?: AdkUsage; + // Set when the model/run fails; /run_sse emits it as a `data: {"error": ...}` + // frame (also seen as errorMessage / error_message). + error?: string; + errorMessage?: string; + error_message?: string; content?: { role?: string; parts?: AdkPart[]; @@ -45,9 +50,20 @@ export interface AdkSession { [k: string]: unknown; } +export interface AdkInlineData { + mimeType?: string; + data?: string; // base64 (no data: prefix) + displayName?: string; + // snake_case fallback (defensive, in case the server echoes snake_case) + mime_type?: string; + display_name?: string; +} + export interface AdkPart { text?: string; thought?: boolean; + inlineData?: AdkInlineData; + inline_data?: AdkInlineData; // snake_case fallback (defensive) functionCall?: { name?: string; args?: Record }; functionResponse?: { name?: string; response?: Record }; // snake_case fallbacks (defensive) @@ -55,6 +71,13 @@ export interface AdkPart { function_response?: { name?: string; response?: Record }; } +/** A file the user attached in the composer, encoded for `/run_sse`. */ +export interface Attachment { + mimeType: string; + data: string; // base64 (no data: prefix) + name?: string; +} + const API_BASE = ""; // same origin (prod) / proxied (dev) /** fetch wrapper that forwards the gateway auth querystring on every request. */ @@ -123,6 +146,7 @@ export interface RunArgs { userId: string; sessionId: string; text: string; + attachments?: Attachment[]; } /** Stream agent events for one user turn. */ @@ -131,7 +155,14 @@ export async function* runSSE({ userId, sessionId, text, + attachments = [], }: RunArgs): AsyncGenerator { + const parts: AdkPart[] = [ + ...attachments.map((a) => ({ + inlineData: { mimeType: a.mimeType, data: a.data, displayName: a.name }, + })), + ...(text.trim() ? [{ text }] : []), + ]; const res = await apiFetch(`/run_sse`, { method: "POST", headers: { "Content-Type": "application/json" }, @@ -139,7 +170,7 @@ export async function* runSSE({ app_name: appName, user_id: userId, session_id: sessionId, - new_message: { role: "user", parts: [{ text }] }, + new_message: { role: "user", parts }, streaming: true, }), }); diff --git a/frontend/src/blocks.ts b/frontend/src/blocks.ts index de57edcf..58918058 100644 --- a/frontend/src/blocks.ts +++ b/frontend/src/blocks.ts @@ -14,11 +14,18 @@ import type { A2uiMessage } from "./a2ui/types"; const A2UI_TOOL = "send_a2ui_json_to_client"; const VALIDATED_JSON_KEY = "validated_a2ui_json"; +export interface AttachmentView { + mimeType?: string; + data?: string; // base64 (no data: prefix) + name?: string; +} + export type Block = | { kind: "thinking"; text: string; done: boolean } | { kind: "text"; text: string } | { kind: "tool"; name: string; args?: unknown; response?: unknown; done: boolean } - | { kind: "a2ui"; messages: A2uiMessage[] }; + | { kind: "a2ui"; messages: A2uiMessage[] } + | { kind: "attachment"; files: AttachmentView[] }; /** Accumulator for one assistant turn. `liveStart` marks where the current * streaming-preview blocks begin (everything before it is finalized). */ @@ -45,6 +52,29 @@ export function emptyAcc(): Acc { const fnCall = (p: AdkPart) => p.functionCall ?? p.function_call; const fnResp = (p: AdkPart) => p.functionResponse ?? p.function_response; +/** ADK/genai serialises inline_data bytes as URL-safe base64 (-_), but a + * `data:` URI requires standard base64 (+/). Convert so reloaded images + * render instead of failing to a broken . */ +function toStdBase64(b64: string): string { + return b64.replace(/-/g, "+").replace(/_/g, "/"); +} + +/** Pull file attachments (inline_data) out of a message's parts. */ +export function attachmentsFromParts(parts: AdkPart[]): AttachmentView[] { + const files: AttachmentView[] = []; + for (const p of parts) { + const d = p.inlineData ?? p.inline_data; + if (d && d.data) { + files.push({ + mimeType: d.mimeType ?? d.mime_type, + data: toStdBase64(d.data), + name: d.displayName ?? d.display_name, + }); + } + } + return files; +} + function appendText(blocks: Block[], kind: "thinking" | "text", text: string) { const last = blocks[blocks.length - 1]; if (last && last.kind === kind) last.text += text; @@ -117,11 +147,16 @@ export function eventsToTurns(events: AdkEvent[]): Turn[] { // mis-split the assistant turn and drop tool results. const isUser = ev.author === "user"; if (isUser) { - const text = (ev.content?.parts ?? []) + const parts = ev.content?.parts ?? []; + const text = parts .map((p) => p.text) .filter((t): t is string => !!t) .join(""); - turns.push({ role: "user", blocks: [{ kind: "text", text }], meta: { ts: ev.timestamp } }); + const files = attachmentsFromParts(parts); + const blocks: Block[] = []; + if (files.length) blocks.push({ kind: "attachment", files }); + if (text) blocks.push({ kind: "text", text }); + turns.push({ role: "user", blocks, meta: { ts: ev.timestamp } }); acc = emptyAcc(); } else { let last = turns[turns.length - 1]; diff --git a/frontend/src/styles.css b/frontend/src/styles.css index e4d55228..afc8b49c 100644 --- a/frontend/src/styles.css +++ b/frontend/src/styles.css @@ -657,6 +657,86 @@ body { .comp-send:active:not(:disabled) { transform: scale(0.94); } .comp-send:disabled { opacity: 0.3; cursor: default; } +/* ---------- composer "+" upload menu ---------- */ +.composer-menu-wrap { position: relative; flex-shrink: 0; } +.composer-menu { + position: absolute; + bottom: 100%; + left: 0; + z-index: 31; + min-width: 168px; + margin-bottom: 6px; + padding: 4px; + background: hsl(var(--background)); + border: 1px solid hsl(var(--border)); + border-radius: 12px; + box-shadow: 0 6px 20px hsl(var(--foreground) / 0.12); +} + +/* ---------- pending attachments (composer) ---------- */ +.attachment-row { + display: flex; + flex-wrap: wrap; + gap: 8px; + padding: 0 8px 8px; +} +.attachment-thumb-wrap { position: relative; } +.attachment-thumb { + width: 56px; + height: 56px; + object-fit: cover; + border-radius: 12px; + border: 1px solid hsl(var(--border)); + display: block; +} +.attachment-file { + position: relative; + display: flex; + align-items: center; + gap: 6px; + max-width: 200px; + padding: 8px 10px; + border: 1px solid hsl(var(--border)); + border-radius: 12px; + background: hsl(var(--secondary)); + color: hsl(var(--foreground)); + font-size: 13px; +} +.attachment-file .icon { width: 16px; height: 16px; flex-shrink: 0; } +.attachment-file-name { + overflow: hidden; + text-overflow: ellipsis; + white-space: nowrap; +} +.attachment-remove { + position: absolute; + top: -6px; + right: -6px; + display: flex; + align-items: center; + justify-content: center; + width: 18px; + height: 18px; + padding: 0; + border: 1px solid hsl(var(--border)); + border-radius: 50%; + background: hsl(var(--background)); + color: hsl(var(--muted-foreground)); + cursor: pointer; +} +.attachment-remove:hover { color: hsl(var(--foreground)); } +.attachment-remove .icon { width: 11px; height: 11px; } + +/* ---------- attachments on a sent user turn ---------- */ +.msg-attachments { + display: flex; + flex-wrap: wrap; + justify-content: flex-end; + gap: 8px; + margin-bottom: 6px; +} +.msg-attachments .attachment-thumb { width: 120px; height: 120px; } + /* ---------- A2UI rendered surfaces ---------- */ .a2ui-surface { max-width: 360px; width: 100%; font-size: 14px; } .a2ui-card { diff --git a/frontend/src/ui/Composer.tsx b/frontend/src/ui/Composer.tsx index ecafcebc..e08cbe73 100644 --- a/frontend/src/ui/Composer.tsx +++ b/frontend/src/ui/Composer.tsx @@ -1,6 +1,7 @@ -import { useLayoutEffect, useRef } from "react"; -import { ArrowUp, Loader2, Plus } from "lucide-react"; +import { useLayoutEffect, useRef, useState } from "react"; +import { ArrowUp, FileText, ImageIcon, Loader2, Plus, X } from "lucide-react"; import { motion } from "motion/react"; +import type { Attachment } from "../adk/client"; export interface ComposerProps { value: string; @@ -8,10 +9,25 @@ export interface ComposerProps { onSubmit: () => void; disabled: boolean; // not connected yet busy: boolean; // a turn is streaming + attachments: Attachment[]; + onAddFiles: (files: FileList | File[]) => void; + onRemoveAttachment: (index: number) => void; } -export function Composer({ value, onChange, onSubmit, disabled, busy }: ComposerProps) { +export function Composer({ + value, + onChange, + onSubmit, + disabled, + busy, + attachments, + onAddFiles, + onRemoveAttachment, +}: ComposerProps) { const ref = useRef(null); + const imageInput = useRef(null); + const fileInput = useRef(null); + const [menuOpen, setMenuOpen] = useState(false); // Auto-grow the textarea up to a max height, then scroll. useLayoutEffect(() => { @@ -21,14 +37,72 @@ export function Composer({ value, onChange, onSubmit, disabled, busy }: Composer el.style.height = `${Math.min(el.scrollHeight, 200)}px`; }, [value]); - const canSend = !disabled && !busy && value.trim().length > 0; + const canSend = + !disabled && !busy && (value.trim().length > 0 || attachments.length > 0); + + function pick(input: React.RefObject) { + setMenuOpen(false); + input.current?.click(); + } + + function onInputChange(e: React.ChangeEvent) { + if (e.target.files && e.target.files.length) onAddFiles(e.target.files); + e.target.value = ""; // allow re-picking the same file + } return (
+ {attachments.length > 0 && ( +
+ {attachments.map((a, i) => ( + onRemoveAttachment(i)} + /> + ))} +
+ )} +
- +
+ + {menuOpen && ( + <> +
setMenuOpen(false)} /> +
+ + +
+ + )} +
+