In [24]:
# Copyright 2024 Google LLC
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#    https://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.

# Generative AI Knowledge Base model predictions

To run this notebook, make sure you have uploaded at least one document into your knowledge base.

> ⭐️ If you haven't, follow the [**Uploading documents and query model** tutorial](https://console.cloud.google.com/products/solutions/deployments?walkthrough_id=panels--sic--generative-ai-knowledge-base_toc).

Before you begin, make sure all the dependencies are installed.

In [25]:
!pip install google-genai google-cloud-aiplatform google-cloud-firestore



## Overview

A **Large Language Model (LLM)** can be very good at answering general questions.
But it might not do as well to answer questions from your documents on its own.

The LLM will answer only from what it learned from its _training dataset_.
Your documents might include information or words that weren't on that dataset.
Or they might be used in a different or more specialized context.

This is where **Vector Search** comes into place.
Each time you upload a document, the Cloud Function webhook processes it.
When a document is processed, each individual page is _indexed_.
This allows us to not only find documents, but the specific pages.

The relevant pages can then be used as _context_ for the LLM to answer the question.
This _grounds_ the model to answer questions based on the documents only.
Without this, the model might give wrong answers, or _hallucinations_.

## My Google Cloud resources

Fill in your project ID, the
[Google Cloud location](https://cloud.google.com/about/locations)
you want to use, and your
Vector Search index endpoint ID.
If you followed the tutorial, the deployed index ID should be `deployed_index`, otherwise change it to the ID you chose.

You can find your Vector Search index endpoint ID in the [Index endpoints tab](https://console.cloud.google.com/vertex-ai/matching-engine/index-endpoints).

> 💡 The Vector Search index endpoint ID looks like a number, like `1234567890123456789`.

Run the following cell to set up your resources and authenticate to your account.

The first step is to initialize the GenAI client library using the project and location of your choice.

In [26]:
from google import genai

genai_client = genai.Client(vertexai=True, project=project_id, location=location)

## Get text embeddings

You can use the Gecko model to get embeddings from text.
For more information, see the
[Get text embeddings](https://cloud.google.com/vertex-ai/generative-ai/docs/embeddings/get-text-embeddings)
page.

## Find document context

All the documents you have processed have been indexed into your Vector Search index.
You can query for the closest embeddings to a given embedding from your Vector Search index endpoint.

> 💡 If you haven't processed any documents yet, you won't get any results.

In [27]:
from google.cloud import aiplatform
from itertools import groupby

aiplatform.init(project=project_id, location=location)

def find_document(question: str, index_endpoint_id: str, deployed_index_id: str) -> tuple[str, int]:
    # Get embeddings for the question.
    embedding = get_text_embedding(question)

    # Find the closest point from the Vector Search index endpoint.
    endpoint = aiplatform.MatchingEngineIndexEndpoint(index_endpoint_id)
    point = endpoint.find_neighbors(
        deployed_index_id=deployed_index_id,
        queries=[embedding],
        num_neighbors=1,
    )[0][0]

    # Get the document name and page number from the point ID.
    (filename, page_number) = point.id.split(':', 1)
    return (filename, int(page_number))

# Query the Vector Search index for the most relevant page.
(filename, page_number) = find_document(question, index_endpoint_id, deployed_index_id)
print(f"{filename=} {page_number=}")

RefreshError: ("Failed to retrieve http://metadata.google.internal/computeMetadata/v1/instance/service-accounts/default/?recursive=true from the Google Compute Engine metadata service. Status: 404 Response:\nb''", <google.auth.transport.requests._Response object at 0x788499b47bd0>)

## Get document text

When documents were processed, their text was stored in Firestore as well.
The Vector Search query returned the relevant documents with their page numbers.
With this you can download the document's pages and give only the most relevant page to the model.

In [None]:
from google.cloud import firestore

def get_document_text(filename: str, page_number: int) -> str:
    db = firestore.Client(database="knowledge-base-database")
    doc = db.collection("documents").document(filename.replace('/', '-'))
    return doc.get().get('pages')[page_number]

# Download the document's page text from Firestore.
context = get_document_text(filename, page_number)
print(f"{context[:1000]}\n...\n...")

## Ask a foundational model

With the relevant context ready, you can now make a _prompt_ that includes both the context and the question.

Here's Gemini's response.
Note that Gemini responds in [Markdown](https://www.markdownguide.org).

In [None]:
from IPython.display import Markdown, display
from google.genai.types import GenerateContentConfig

def ask_model(question: str) -> None:
    (filename, page_number) = find_document(question, index_endpoint_id, deployed_index_id)
    context = get_document_text(filename, page_number)
    response = genai_client.models.generate_content(
        model="gemini-2.0-flash",
        contents=question,
        config=GenerateContentConfig(
            system_instruction=[
                "Answer the question based on the following text:",
                context,
            ],
        ),
    )
    print(question)
    display(Markdown(response.text))

ask_model("What are LFs and why are they useful?")

## (Optional) Ask your tuned model

If you want to tune a model, follow the [**Fine-tune an LLM model** tutorial](https://console.cloud.google.com/products/solutions/deployments?walkthrough_id=panels--sic--generative-ai-knowledge-base_toc).

First, find the tuning job ID for your tuned model.

In [None]:
from vertexai.tuning import sft

for tuning_job in sft.SupervisedTuningJob.list():
    model_name = tuning_job.gca_resource.tuned_model_display_name
    tuning_job_id = tuning_job.resource_name
    print(f"{model_name}: {tuning_job_id}")

Copy your tuning job ID and paste it below.
Don't forget to run the cell to define the `tuning_job_id` variable.

In [None]:
tuning_job_id = "" # @param {type:"string"}

assert tuning_job_id, "Please set the tuning_job_id"

In [None]:
from vertexai.tuning import sft

tuning_job = sft.SupervisedTuningJob(tuning_job_id)
assert tuning_job.has_ended, "Please wait until the tuning job finishes."
assert tuning_job.tuned_model_endpoint_name

tuned_model_endpoint = tuning_job.tuned_model_endpoint_name
print(f"{tuned_model_endpoint=}")
# The tuned model endpoint follows this format:
#   projects/<PROJECT_NUMBER>/locations/<LOCATION>/endpoints/<MODEL_ENDPOINT_ID>

In [None]:
from vertexai.generative_models import GenerativeModel

def ask_tuned_model(tuned_model_endpoint: str, question: str) -> None:
    (filename, page_number) = find_document(question, index_endpoint_id, deployed_index_id)
    context = get_document_text(filename, page_number)
    response = genai_client.models.generate_content(
        model=tuned_model_endpoint,
        contents=[f"Text: {context}", question],
        config=GenerateContentConfig(
            system_instruction=[
                "Answer the question based on the following text",
            ],
        ),
    )
    print(question)
    display(Markdown(response.text))

ask_tuned_model(tuned_model_endpoint, "What are LFs and why are they useful?")

Add `%load_ext cudf.pandas` before importing pandas to speed up operations using GPU




In [None]:
%load_ext cudf.pandas
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt

# Randomly generated dataset of parking violations-
# Define the number of rows
num_rows = 1000000

states = ["NY", "NJ", "CA", "TX"]
violations = ["Double Parking", "Expired Meter", "No Parking",
              "Fire Hydrant", "Bus Stop"]
vehicle_types = ["SUBN", "SDN"]

# Create a date range
start_date = "2022-01-01"
end_date = "2022-12-31"
dates = pd.date_range(start=start_date, end=end_date, freq='D')

# Generate random data
data = {
    "Registration State": np.random.choice(states, size=num_rows),
    "Violation Description": np.random.choice(violations, size=num_rows),
    "Vehicle Body Type": np.random.choice(vehicle_types, size=num_rows),
    "Issue Date": np.random.choice(dates, size=num_rows),
    "Ticket Number": np.random.randint(1000000000, 9999999999, size=num_rows)
}

# Create a DataFrame
df = pd.DataFrame(data)

# How does the parking violations change from day to day segmented by vehicle type
# Averaged using a 7-day rolling mean

daily_counts = df.groupby(['Issue Date', 'Vehicle Body Type']
                          ).size().unstack(fill_value=0)

# Calculate a 7-day rolling mean of daily violations for each vehicle type
rolling_means = daily_counts.rolling(window=7).mean()

# Display the rolling means for each vehicle type over time
rolling_means.tail(100).plot(figsize=(14, 7),
                             title="7-Day Rolling Average of Parking Violations by Vehicle Type")
plt.ylabel("Average Number of Violations")
plt.xlabel("Date")
plt.show()

In [None]:
# @title Create a prompt

import google.generativeai as genai
from google.colab import userdata

api_key_name = 'GOOGLE_API_KEY' # @param {type: "string"}
prompt = 'What is the velocity of an unladen swallow?' # @param {type: "string"}
system_instructions = 'You have a tendency to speak in riddles.' # @param {type: "string"}
model = 'gemini-2.0-flash' # @param {type: "string"} ["gemini-1.0-pro", "gemini-1.5-pro", "gemini-1.5-flash", "gemini-2.0-flash"]
temperature = 0.5 # @param {type: "slider", min: 0, max: 2, step: 0.05}
stop_sequence = '' # @param {type: "string"}

if model == 'gemini-1.0-pro' and system_instructions is not None:
  system_instructions = None
  print('\x1b[31m(WARNING: System instructions ignored, gemini-1.0-pro does not support system instructions)\x1b[0m')

if model == 'gemini-1.0-pro' and temperature > 1:
  temperature = 1
  print('\x1b[34m(INFO: Temperature set to 1, gemini-1.0-pro does not support temperature > 1)\x1b[0m')

if system_instructions == '':
  system_instructions = None

api_key = userdata.get(api_key_name)
genai.configure(api_key=api_key)
model = genai.GenerativeModel(model, system_instruction=system_instructions)
config = genai.GenerationConfig(temperature=temperature, stop_sequences=[stop_sequence])
response = model.generate_content(contents=[prompt], generation_config=config)
response.text

In [None]:
# @title Create a prompt

import google.generativeai as genai
from google.colab import userdata

api_key_name = 'GOOGLE_API_KEY' # @param {type: "string"}
prompt = 'What is the velocity of an unladen swallow?' # @param {type: "string"}
system_instructions = 'You have a tendency to speak in riddles.' # @param {type: "string"}
model = 'gemini-2.0-flash' # @param {type: "string"} ["gemini-1.0-pro", "gemini-1.5-pro", "gemini-1.5-flash", "gemini-2.0-flash"]
temperature = 0.5 # @param {type: "slider", min: 0, max: 2, step: 0.05}
stop_sequence = '' # @param {type: "string"}

if model == 'gemini-1.0-pro' and system_instructions is not None:
  system_instructions = None
  print('\x1b[31m(WARNING: System instructions ignored, gemini-1.0-pro does not support system instructions)\x1b[0m')

if model == 'gemini-1.0-pro' and temperature > 1:
  temperature = 1
  print('\x1b[34m(INFO: Temperature set to 1, gemini-1.0-pro does not support temperature > 1)\x1b[0m')

if system_instructions == '':
  system_instructions = None

api_key = userdata.get(api_key_name)
genai.configure(api_key=api_key)
model = genai.GenerativeModel(model, system_instruction=system_instructions)
config = genai.GenerationConfig(temperature=temperature, stop_sequences=[stop_sequence])
response = model.generate_content(contents=[prompt], generation_config=config)
response.text

In [None]:
# @title Configure Gemini API key

import google.generativeai as genai
from google.colab import userdata

gemini_api_secret_name = 'GOOGLE_API_KEY'  # @param {type: "string"}

try:
  GOOGLE_API_KEY=userdata.get(gemini_api_secret_name)
  genai.configure(api_key=GOOGLE_API_KEY)
except userdata.SecretNotFoundError as e:
   print(f'Secret not found\n\nThis expects you to create a secret named {gemini_api_secret_name} in Colab\n\nVisit https://aistudio.google.com/app/apikey to create an API key\n\nStore that in the secrets section on the left side of the notebook (key icon)\n\nName the secret {gemini_api_secret_name}')
   raise e
except userdata.NotebookAccessError as e:
  print(f'You need to grant this notebook access to the {gemini_api_secret_name} secret in order for the notebook to access Gemini on your behalf.')
  raise e
except Exception as e:
  print(f"There was an unknown error. Ensure you have a secret {gemini_api_secret_name} stored in Colab and it's a valid key from https://aistudio.google.com/app/apikey")
  raise e

In [None]:
from google.colab import drive
drive.mount('/gdrive')
%cd /gdrive

In [None]:
# Clone the entire repo.
!git clone -l -s git://github.com/jakevdp/PythonDataScienceHandbook.git cloned-repo
%cd cloned-repo
!ls

In [None]:
# Fetch a single <1MB file using the raw GitHub URL.
!curl --remote-name \
     -H 'Accept: application/vnd.github.v3.raw' \
     --location https://api.github.com/repos/jakevdp/PythonDataScienceHandbook/contents/notebooks/data/california_cities.csv

In [None]:
from google.colab import drive
drive.mount('/gdrive')
%cd /gdrive

In [None]:
# @title Connect to the API and send an example message

text = 'milehigh.world into the void ' # @param {type: "string"}

model = genai.GenerativeModel('gemini-2.0-flash')
chat = model.start_chat(history=[])

response = chat.send_message(text)
response.text

In [None]:
%load_ext cudf.pandas
import pandas as pd
import random

# Define the species categories
species_categories = ['setosa', 'versicolor', 'virginica']
flower_color_categories = ['red','yellow','green']

# Define the range for each attribute based on typical iris flower measurements
sepal_length_range = (4.0, 8.0)

# Create data for 1,000,000 samples
n = 1000000
data = {
    'sepal_length': [random.uniform(*sepal_length_range) for _ in range(n)],
    'flower_color': [random.choice(flower_color_categories) for _ in range(n)],
    'species': [random.choice(species_categories) for _ in range(n)]
}

df = pd.DataFrame(data)

df.groupby(['species','flower_color']).size().sort_values(ascending=False)