Google Gemini support using Google Generative AI SDK (#865)

Co-authored-by: Max Leiter <max.leiter@vercel.com>
vercel · Dec 18, 2023 · b78a73e · b78a73e
1 parent 37969fb
commit b78a73e
Show file tree

Hide file tree

Showing 25 changed files with 816 additions and 8 deletions.
diff --git a/.changeset/gentle-dots-drum.md b/.changeset/gentle-dots-drum.md
@@ -0,0 +1,5 @@
+---
+'ai': patch
+---
+
+Add GoogleGenerativeAIStream for Gemini support
diff --git a/docs/pages/docs/api-reference/_meta.json b/docs/pages/docs/api-reference/_meta.json
@@ -6,6 +6,7 @@
   "anthropic-stream": "AnthropicStream",
   "aws-bedrock-stream": "AWSBedrock*Stream",
   "cohere-stream": "CohereStream",
+  "google-generative-ai-stream": "GoogleGenerativeAIStream",
   "huggingface-stream": "HuggingFaceStream",
   "langchain-stream": "LangChainStream",
   "openai-stream": "OpenAIStream",

diff --git a/docs/pages/docs/api-reference/google-generative-ai-stream.mdx b/docs/pages/docs/api-reference/google-generative-ai-stream.mdx
@@ -0,0 +1,63 @@
+# GoogleGenerativeAIStream
+
+## `GoogleGenerativeAIStream(res: Response, cb?: AIStreamCallbacks): ReadableStream` [#googlegenerativeaistream]
+
+The `GoogleGenerativeAIStream` function is a utility that transforms the output from
+[Google's Generative AI SDK](https://github.com/google/generative-ai-js) into a `ReadableStream`.
+It uses `AIStream` under the hood, applying a specific parser for the Google's response data structure.
+
+This works with the official Generative AI SDK, and it's supported in both Node.js, [Edge Runtime](https://edge-runtime.vercel.app), and browser environments.
+
+## Parameters
+
+### `res: Response`
+
+The `Response` object returned by the request to the Google Generative AI SDK.
+
+### `cb?: AIStreamCallbacks`
+
+This optional parameter can be an object containing callback functions to handle the start, each token, and completion of the AI response. In the absence of this parameter, default behavior is implemented.
+
+## Example
+
+The `GoogleGenerativeAIStream` function can be coupled with a call to the Google Generative AI SDK to generate a readable stream of the completion. This stream can then facilitate the real-time consumption of AI outputs as they're being generated.
+
+Here's a step-by-step example of how to implement this in Next.js:
+
+```js filename="app/api/completion/route.ts"
+import { GoogleGenerativeAI } from '@google/generative-ai';
+import { GoogleGenerativeAIStream, Message, StreamingTextResponse } from 'ai';
+
+const genAI = new GoogleGenerativeAI(process.env.GOOGLE_API_KEY || '');
+
+// IMPORTANT! Set the runtime to edge
+export const runtime = 'edge';
+
+// convert messages from the Vercel AI SDK Format to the format
+// that is expected by the Google GenAI SDK
+const buildGoogleGenAIPrompt = (messages: Message[]) => ({
+  contents: messages
+    .filter(message => message.role === 'user' || message.role === 'assistant')
+    .map(message => ({
+      role: message.role === 'user' ? 'user' : 'model',
+      parts: [{ text: message.content }],
+    })),
+});
+
+export async function POST(req: Request) {
+  // Extract the `prompt` from the body of the request
+  const { messages } = await req.json();
+
+  const geminiStream = await genAI
+    .getGenerativeModel({ model: 'gemini-pro' })
+    .generateContentStream(buildGoogleGenAIPrompt(messages));
+
+  // Convert the response into a friendly text-stream
+  const stream = GoogleGenerativeAIStream(geminiStream);
+
+  // Respond with the stream
+  return new StreamingTextResponse(stream);
+}
+```
+
+In this example, the `GoogleGenerativeAIStream` function transforms the text generation stream from the Google Generative AI SDK into a ReadableStream of parsed result. This allows clients to consume AI outputs in real-time as they're generated, instead of waiting for the complete response.
diff --git a/docs/pages/docs/guides/providers/google.mdx b/docs/pages/docs/guides/providers/google.mdx
@@ -0,0 +1,234 @@
+# Google
+
+import { Steps, Callout } from 'nextra-theme-docs';
+
+Vercel AI SDK provides a set of utilities to make it easy to use [Google's Generative AI SDK](https://github.com/google/generative-ai-js)
+that enables you to build apps using [Google Gemini](https://ai.google.dev/).
+In this guide, we'll walk through how to use the utilities to create a chat bot and a text completion app.
+
+## Guide: Chat Bot
+
+<Steps>
+
+### Create a Next.js app
+
+Create a Next.js application and install `ai`:
+
+```sh
+pnpm dlx create-next-app my-ai-app
+cd my-ai-app
+pnpm install ai
+```
+
+### Add your Google API Key to `.env`
+
+Create a `.env` file in your project root and add your [API Key for the Google AI SDK](https://makersuite.google.com/app/apikey):
+
+```env filename=".env"
+GOOGLE_API_KEY=xxxxxxx
+```
+
+### Create a Route Handler
+
+Create a Next.js Route Handler that uses the Edge Runtime to generate a response to a series of messages via Google's Generative AI SDK, and returns the response as a streaming text response.
+
+For this example, we'll create a route handler at `app/api/chat/route.ts` that accepts a `POST` request with a `messages` array of strings:
+
+```tsx filename="app/api/chat/route.ts" showLineNumbers
+import { GoogleGenerativeAI } from '@google/generative-ai';
+import { GoogleGenerativeAIStream, Message, StreamingTextResponse } from 'ai';
+
+const genAI = new GoogleGenerativeAI(process.env.GOOGLE_API_KEY || '');
+
+// IMPORTANT! Set the runtime to edge
+export const runtime = 'edge';
+
+// convert messages from the Vercel AI SDK Format to the format
+// that is expected by the Google GenAI SDK
+const buildGoogleGenAIPrompt = (messages: Message[]) => ({
+  contents: messages
+    .filter(message => message.role === 'user' || message.role === 'assistant')
+    .map(message => ({
+      role: message.role === 'user' ? 'user' : 'model',
+      parts: [{ text: message.content }],
+    })),
+});
+
+export async function POST(req: Request) {
+  // Extract the `prompt` from the body of the request
+  const { messages } = await req.json();
+
+  const geminiStream = await genAI
+    .getGenerativeModel({ model: 'gemini-pro' })
+    .generateContentStream(buildGoogleGenAIPrompt(messages));
+
+  // Convert the response into a friendly text-stream
+  const stream = GoogleGenerativeAIStream(geminiStream);
+
+  // Respond with the stream
+  return new StreamingTextResponse(stream);
+}
+```
+
+<Callout>
+  Vercel AI SDK provides 2 utility helpers to make the above seamless: First, we
+  pass the streaming `response` we receive from Google's Generative AI SDK to
+  [`GoogleGenerativeAIStream`](/docs/api-reference/google-generative-ai-stream).
+  This utility class decodes/extracts the text tokens in the response and then
+  re-encodes them properly for simple consumption. We can then pass that new
+  stream directly to
+  [`StreamingTextResponse`](/docs/api-reference/streaming-text-response). This
+  is another utility class that extends the normal Node/Edge Runtime `Response`
+  class with the default headers you probably want (hint: `'Content-Type':
+  'text/plain; charset=utf-8'` is already set for you).
+</Callout>
+
+### Wire up the UI
+
+Create a Client component with a form that we'll use to gather the prompt from the user and then stream back the completion from.
+By default, the [`useChat`](/docs/api-reference#usechat) hook will use the `POST` Route Handler we created above (it defaults to `/api/chat`). You can override this by passing a `api` prop to `useChat({ api: '...'})`.
+
+```tsx filename="app/page.tsx" showLineNumbers
+'use client';
+
+import { useChat } from 'ai/react';
+
+export default function Chat() {
+  const { messages, input, handleInputChange, handleSubmit } = useChat();
+
+  return (
+    <div>
+      {messages.map(m => (
+        <div key={m.id}>
+          {m.role === 'user' ? 'User: ' : 'AI: '}
+          {m.content}
+        </div>
+      ))}
+
+      <form onSubmit={handleSubmit}>
+        <input
+          value={input}
+          placeholder="Say something..."
+          onChange={handleInputChange}
+        />
+      </form>
+    </div>
+  );
+}
+```
+
+</Steps>
+
+## Guide: Text Completion
+
+<Steps>
+
+### Use the Completion API
+
+Similar to the Chat Bot example above, we'll create a Next.js Route Handler that generates a text completion via the same
+Google Generative AI SDK that we'll then stream back to our Next.js. It accepts a `POST` request with a `prompt` string:
+
+```tsx filename="app/api/completion/route.ts" showLineNumbers
+import { GoogleGenerativeAI } from '@google/generative-ai';
+import { GoogleGenerativeAIStream, StreamingTextResponse } from 'ai';
+
+const genAI = new GoogleGenerativeAI(process.env.GOOGLE_API_KEY || '');
+
+// IMPORTANT! Set the runtime to edge
+export const runtime = 'edge';
+
+export async function POST(req: Request) {
+  // Extract the `prompt` from the body of the request
+  const { prompt } = await req.json();
+
+  // Ask Google Generative AI for a streaming completion given the prompt
+  const response = await genAI
+    .getGenerativeModel({ model: 'gemini-pro' })
+    .generateContentStream({
+      contents: [{ role: 'user', parts: [{ text: prompt }] }],
+    });
+
+  // Convert the response into a friendly text-stream
+  const stream = GoogleGenerativeAIStream(response);
+
+  // Respond with the stream
+  return new StreamingTextResponse(stream);
+}
+```
+
+### Wire up the UI
+
+We can use the [`useCompletion`](/docs/api-reference#usecompletion) hook to make it easy to wire up the UI. By default, the `useCompletion` hook will use the `POST` Route Handler we created above (it defaults to `/api/completion`). You can override this by passing a `api` prop to `useCompletion({ api: '...'})`.
+
+```tsx filename="app/page.tsx" showLineNumbers
+'use client';
+
+import { useCompletion } from 'ai/react';
+
+export default function Completion() {
+  const {
+    completion,
+    input,
+    stop,
+    isLoading,
+    handleInputChange,
+    handleSubmit,
+  } = useCompletion();
+
+  return (
+    <div className="mx-auto w-full max-w-md py-24 flex flex-col stretch">
+      <form onSubmit={handleSubmit}>
+        <label>
+          Say something...
+          <input
+            className="fixed w-full max-w-md bottom-0 border border-gray-300 rounded mb-8 shadow-xl p-2"
+            value={input}
+            onChange={handleInputChange}
+          />
+        </label>
+        <output>Completion result: {completion}</output>
+        <button type="button" onClick={stop}>
+          Stop
+        </button>
+        <button disabled={isLoading} type="submit">
+          Send
+        </button>
+      </form>
+    </div>
+  );
+}
+```
+
+</Steps>
+
+## Guide: Save to Database After Completion
+
+It’s common to want to save the result of a completion to a database after streaming it back to the user. The `GoogleGenerativeAIStream` adapter accepts a couple of optional callbacks that can be used to do this.
+
+```tsx filename="app/api/completion/route.ts" showLineNumbers
+export async function POST(req: Request) {
+  // ...
+
+  // Convert the response into a friendly text-stream
+  const stream = GoogleGenerativeAIStream(response, {
+    onStart: async () => {
+      // This callback is called when the stream starts
+      // You can use this to save the prompt to your database
+      await savePromptToDatabase(prompt);
+    },
+    onToken: async (token: string) => {
+      // This callback is called for each token in the stream
+      // You can use this to debug the stream or save the tokens to your database
+      console.log(token);
+    },
+    onCompletion: async (completion: string) => {
+      // This callback is called when the completion is ready
+      // You can use this to save the final completion to your database
+      await saveCompletionToDatabase(completion);
+    },
+  });
+
+  // Respond with the stream
+  return new StreamingTextResponse(response);
+}
+```
diff --git a/docs/pages/docs/index.mdx b/docs/pages/docs/index.mdx
@@ -189,7 +189,8 @@ export function A({ href, children }) {
   <IntegrationCard href="/docs/guides/providers/langchain" title="LangChain" description="LangChain is an open source prompt engineering framework for developing applications powered by language models. The Vercel AI SDK provides a simple way to use LangChain in your frontend web applications." />
   <IntegrationCard href="/docs/guides/providers/mistral" title="Mistral" description="Mistral is an AI research and deployment company. The Vercel AI SDK provides a simple way to use Mistral models in your frontend web applications." />
   <IntegrationCard href="/docs/guides/providers/aws-bedrock" title="AWS Bedrock" description="Bedrock is a fully managed service that offers a choice of high-performing foundation models from leading AI companies through AWS. The Vercel AI SDK provides a simple way to use AWS Bedrock models in your frontend web applications." />
- <IntegrationCard href="/docs/guides/providers/cohere" title="Cohere" description="Cohere is an AI platform for enterprise. The Vercel AI SDK provides a simple way to use Cohere's command models in your frontend web applications." />
+  <IntegrationCard href="/docs/guides/providers/google" title="Google" description="Google Gemini is a state-of-the-art AI model by Google. The Vercel AI SDK provides a simple way to use Google Gemini in your frontend web applications." />
+  <IntegrationCard href="/docs/guides/providers/cohere" title="Cohere" description="Cohere is an AI platform for enterprise. The Vercel AI SDK provides a simple way to use Cohere's command models in your frontend web applications." />
   <IntegrationCard href="/docs/guides/providers/anthropic" title="Anthropic" description="Anthropic is an AI research and deployment company. The Vercel AI SDK provides a simple way to use Anthropic's Claude models in your frontend web applications." />
   <IntegrationCard href="/docs/guides/providers/replicate" title="Replicate" description="Replicate is an on-demand AI model hosting platform. The Vercel AI SDK provides a simple way to use Replicate models in your frontend web applications." />
   <IntegrationCard href="/docs/guides/providers/hugging-face" title="Hugging Face" description="Hugging Face is a collaboration and model hosting platform for the machine learning community. The Vercel AI SDK provides a simple way to use Hugging Face models in your frontend web applications." />

diff --git a/examples/next-google-generative-ai/.env.local.example b/examples/next-google-generative-ai/.env.local.example
@@ -0,0 +1 @@
+GOOGLE_API_KEY=xxxxxx
diff --git a/examples/next-google-generative-ai/.gitignore b/examples/next-google-generative-ai/.gitignore
@@ -0,0 +1,35 @@
+# See https://help.github.com/articles/ignoring-files/ for more about ignoring files.
+
+# dependencies
+/node_modules
+/.pnp
+.pnp.js
+
+# testing
+/coverage
+
+# next.js
+/.next/
+/out/
+
+# production
+/build
+
+# misc
+.DS_Store
+*.pem
+
+# debug
+npm-debug.log*
+yarn-debug.log*
+yarn-error.log*
+
+# local env files
+.env*.local
+
+# vercel
+.vercel
+
+# typescript
+*.tsbuildinfo
+next-env.d.ts
diff --git a/examples/next-google-generative-ai/app/api/chat/route.ts b/examples/next-google-generative-ai/app/api/chat/route.ts
@@ -0,0 +1,33 @@
+import { GoogleGenerativeAI } from '@google/generative-ai';
+import { GoogleGenerativeAIStream, Message, StreamingTextResponse } from 'ai';
+
+const genAI = new GoogleGenerativeAI(process.env.GOOGLE_API_KEY || '');
+
+// IMPORTANT! Set the runtime to edge
+export const runtime = 'edge';
+
+// convert messages from the Vercel AI SDK Format to the format
+// that is expected by the Google GenAI SDK
+const buildGoogleGenAIPrompt = (messages: Message[]) => ({
+  contents: messages
+    .filter(message => message.role === 'user' || message.role === 'assistant')
+    .map(message => ({
+      role: message.role === 'user' ? 'user' : 'model',
+      parts: [{ text: message.content }],
+    })),
+});
+
+export async function POST(req: Request) {
+  // Extract the `prompt` from the body of the request
+  const { messages } = await req.json();
+
+  const geminiStream = await genAI
+    .getGenerativeModel({ model: 'gemini-pro' })
+    .generateContentStream(buildGoogleGenAIPrompt(messages));
+
+  // Convert the response into a friendly text-stream
+  const stream = GoogleGenerativeAIStream(geminiStream);
+
+  // Respond with the stream
+  return new StreamingTextResponse(stream);
+}