# get_context
## for RAG

In [16]:
import pandas as pd
import numpy as np
import numpy.linalg
from google.api_core import retry
from vertexai.language_models import TextEmbeddingModel, TextGenerationModel

In [12]:
# read csv
vector_store = pd.read_csv('/home/jupyter/property_pilot/data/vector_store.csv')
# embedding to numpy array
vector_store['embeddings'] = vector_store['embeddings'].apply(lambda x: np.array(list(map(float, x.split(',')))))
vector_store.head()

Unnamed: 0,texts,embeddings
0,"Ravenswood Chicagoans love craft beer, and wit...","[-0.015725474804639816, -0.04845630005002022, ..."
1,"Sample some brews at laidback Begyle Brewing, ...","[-0.01855381578207016, -0.012325075455009937, ..."
2,"aff. Grab a bite: After all that beer, you’re ...","[-0.007087345700711012, -0.028249159455299377,..."
3,iscover the neighborhood’s cool mix of urban a...,"[-0.023173771798610687, -0.0009359957766719162..."
4,lass. Or take a walk through sprawling Winnema...,"[-0.02770255133509636, -0.020968427881598473, ..."


In [17]:
embeddings_model = TextEmbeddingModel.from_pretrained("textembedding-gecko@001")

# Compute the cosine similarity of two vectors, wrap as returned function to make easier to use with Pandas
def get_similarity_fn(query_vector):
    def fn(row):
        return np.dot(row, query_vector) / (
            numpy.linalg.norm(row) * numpy.linalg.norm(query_vector)
        )

    return fn

# Retrieve embeddings from the specified model with retry logic
@retry.Retry(timeout=300.0)
def get_embeddings(text):
    return embeddings_model.get_embeddings([text])[0].values

In [18]:
def get_context(question, vector_store, num_docs):
    # Embed the search query
    query_vector = np.array(get_embeddings(question))

    # Get similarity to all other vectors and sort, cut off at num_docs
    top_matched = (
        vector_store["embeddings"]
        .apply(get_similarity_fn(query_vector))
        .sort_values(ascending=False)[:num_docs]
        .index
    )
    top_matched_df = vector_store[vector_store.index.isin(top_matched)][["texts"]]

    # Return a string with the top matches
    context = " ".join(top_matched_df.texts.values)
    return context

In [21]:
# Example

user_query = 'Tell me about Hyde Park.'
context = get_context(question = user_query, vector_store=vector_store, num_docs=10)
print(context)

Hyde Park  It was the site of the Chicago World’s Fair of 1893, home to former President Barack Obama, an incubator for Nobel Prize winners — basically, history is made in Hyde Park. Get hands-on at the science museum: This culturally rich neighborhood is bookended by two of the city’s most significant institutions — the University of Chicago to the west and the Museum of Science and Industry to t  and culture: Also near Hyde Park, the DuSable Black History Museum is the country’s first institution dedicated to African American history and culture. It resides within Washington Park, home to a bird and butterfly sanctuary and the much-photographed Fountain of Time. And the Hyde Park Art Center has been a powerhouse on the art scene for more than 70 years, claiming the title of oldest alterna Theatre See all events in Hyde Park Discover more in Hyde Park With trendy restaurants, cozy coffee shops, cocktail bars and shops on just about every corner, there’s endless ways to spend a day in 