In [9]:
!pip install chromadb

Collecting chromadb
  Downloading chromadb-0.5.21-py3-none-any.whl.metadata (6.8 kB)
Collecting build>=1.0.3 (from chromadb)
  Downloading build-1.2.2.post1-py3-none-any.whl.metadata (6.5 kB)
Collecting chroma-hnswlib==0.7.6 (from chromadb)
  Downloading chroma_hnswlib-0.7.6-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl.metadata (252 bytes)
Collecting posthog>=2.4.0 (from chromadb)
  Downloading posthog-3.7.4-py2.py3-none-any.whl.metadata (2.0 kB)
Collecting onnxruntime>=1.14.1 (from chromadb)
  Downloading onnxruntime-1.20.1-cp310-cp310-manylinux_2_27_x86_64.manylinux_2_28_x86_64.whl.metadata (4.5 kB)
Collecting opentelemetry-instrumentation-fastapi>=0.41b0 (from chromadb)
  Downloading opentelemetry_instrumentation_fastapi-0.49b2-py3-none-any.whl.metadata (2.1 kB)
Collecting tokenizers<=0.20.3,>=0.13.2 (from chromadb)
  Downloading tokenizers-0.20.3-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl.metadata (6.7 kB)
Collecting pypika>=0.48.9 (from chromadb)
  Do

In [23]:
!pip install openai

Collecting openai
  Downloading openai-1.56.1-py3-none-any.whl.metadata (24 kB)
Collecting jiter<1,>=0.4.0 (from openai)
  Downloading jiter-0.8.0-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl.metadata (5.2 kB)
Downloading openai-1.56.1-py3-none-any.whl (389 kB)
Downloading jiter-0.8.0-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (343 kB)
Installing collected packages: jiter, openai
Successfully installed jiter-0.8.0 openai-1.56.1


In [1]:
from google.cloud import storage
import os
import openai
import numpy as np
from openai import OpenAI
import chromadb

In [None]:
# Initialize the Google Cloud Storage client
storage_client = storage.Client()

# Define your bucket name and ChromaDB folder path
bucket_name = "project-yelp"  # Replace with your bucket name
chromadb_folder = "reviews_rag_chroma_reviews_db/"  # Path to your ChromaDB folder in GCS
local_destination = "/home/jupyter"  # Local path to store downloaded files

def download_folder_from_gcs(bucket_name, folder_prefix, local_destination):
    """
    Downloads all objects in a 'folder' (prefix) from a GCS bucket to a local directory.
    """
    # Get the bucket
    bucket = storage_client.bucket(bucket_name)

    # List all blobs (objects) with the folder prefix
    blobs = bucket.list_blobs(prefix=folder_prefix)

    for blob in blobs:
        # Define local file path, preserving folder structure
        local_path = os.path.join(local_destination, blob.name[len(folder_prefix):])
        os.makedirs(os.path.dirname(local_path), exist_ok=True)

        # Download the blob to the local file
        blob.download_to_filename(local_path)

# Call the function to download the folder
download_folder_from_gcs(bucket_name, chromadb_folder, local_destination)

In [3]:
# Initialize the ChromaDB client with the downloaded directory
client = chromadb.PersistentClient(path="./reviews_rag_chromadb_reviews_db")

collections = client.list_collections()
print("Collections in ChromaDB:", collections)

Collections in ChromaDB: [Collection(name=reviews_embeddings_collection)]


In [None]:
# API KEY FOR OPENAI
api_key = "lorem_ipsum"
os.environ["OPENAI_API_KEY"] = api_key
collection = client.get_or_create_collection(name="reviews_embeddings_collection")

def recommend_places(query_text, collection):
    def get_query_embedding(query_text, model="text-embedding-ada-002"):
        response = openai.embeddings.create(
            model=model,
            input=query_text
        )
        return np.array(response.data[0].embedding, dtype = np.float64)

    query_embedding = get_query_embedding(query_text)

    results = collection.query(
        query_embeddings=[query_embedding], 
        n_results=3,
    )
    
    for i, result in enumerate(results["distances"]):
        print(f"Retrieved doc distance from query -> {result}")
        
    documents = results["documents"]
    metadatas = results["metadatas"]
    retrieved_data = [
        {"document": doc, **meta} for doc, meta in zip(documents[0], metadatas[0])
    ]
    formatted_data = "\n".join([
        f"Document {i+1}:\n"
        f"Business Name: {item.get('name', 'N/A')}\n"
        f"Category: {item.get('categories', 'N/A')}\n"
        f"Attributes: {item.get('attributes', 'N/A')}\n"
        f"Content: {item['document']}\n"
        for i, item in enumerate(retrieved_data)
    ])

    client_openai = OpenAI()
    response = client_openai.chat.completions.create(
        model="gpt-4o",
        messages=[
            {
                    "role": "system", 
                    "content": f"""
                    You are a helpful assistant.\n
                    You will be provided with results for a query in the following format: {formatted_data}.\n
                    Your task:
                    \t- Summarize the results in a detailed and user-friendly way. Give the location of the place as well.\n
                    \t- Highlight key aspects of each option, such as business name, category, and notable attributes (e.g., Wi-Fi, parking).\n
                    \t- Provide actionable recommendations for when to visit or choose each option, considering the query's intent (e.g., best times to visit, suitability for specific purposes like work, dining, or casual meetups).\n
                    \t- Structure your response to make it easy for the user to compare and decide between the options.\n
                    Ensure your tone is friendly, informative, and neutral, offering a balanced perspective of each result.
                    """
                },
        ],
        temperature=1,
        max_tokens=2048,
        top_p=1,
        frequency_penalty=0,
        presence_penalty=0,
        response_format={
            "type": "text"
        }
    )

    print(response.choices[0].message.content)

In [17]:
query_text = "I want to eat good seafood restaurant in New Orleans. Recommend me some places. Parking can be optional since I'll be arriving by an Uber."
recommend_places(query_text, collection)

Retrieved doc distance from query -> [0.31690114736557007, 0.3423919379711151, 0.34809622168540955]
When considering dining options for seafood and soul food in New Orleans and its surrounding areas, you have three distinct venues to choose from, each with its own unique offerings and characteristics. Here’s a detailed and user-friendly comparison:

### 1. Saints Restaurant & Bar
- **Location:** 2600 Martin Luther King Jr Blvd, New Orleans, LA
- **Categories:** Restaurants, Seafood, Cocktail Bars, Nightlife
- **Notable Attributes:**
  - Full bar, free Wi-Fi
  - Outdoor seating available
  - Wheelchair accessible
  - Delivery and takeout options
  - Bike parking not available, street parking only
- **Key Aspects:**
  - Known for its seafood, particularly the Saints Ultimate Feast which includes a variety of seafood items like crawfish and crab, praised for its flavor.
  - Mixed reviews about consistency; some praise the crawfish and garlic butter sauce, while others had issues with seaf

In [18]:
query_text_list = ["Recommend a restaurant in Indianapolis with a high user rating that accepts credit cards and has a special focus on vegan options.",
                    "Find a family-friendly restaurant in Tampa that offers outdoor seating, has a good reputation for cleanliness, and accepts credit card payments.",
                    "I'm looking for an Italian restaurant in New Orleans that's highly rated for ambiance, offers free Wi-Fi, and is open for late-night dining.",
                    "Can you suggest a restaurant in Speedway that specializes in gluten-free dishes, has ample parking, and a rating of 4 stars or above?",
                    "Find a seafood restaurant in Chalfont that offers live music on weekends, accepts digital payments, and has high customer reviews.",
                    "I need a restaurant in Fenton that offers a brunch menu, has high chairs available for children, and provides quick service.",
                    "Recommend a downtown Indianapolis restaurant that serves organic food, has a patio, and where reservations are not required.",
                    "I'm looking for a gym in Los Angeles with flexible hours, modern equipment, and a friendly cancellation policy.",
                    "Suggest a reliable plumbing service in Tampa that's available for emergency calls and has a high satisfaction rating.",
                    "Find me a home service provider in Indianapolis that specializes in eco-friendly solutions and offers weekend appointments.",
                  ]

for query in query_text_list:
    print(f"For query -> {query} -> Results -> \n")
    recommend_places(query, collection)
    print("\n")

For query -> Recommend a restaurant in Indianapolis with a high user rating that accepts credit cards and has a special focus on vegan options. -> Results -> 

Retrieved doc distance from query -> [0.369920551776886, 0.4071659743785858, 0.4100884795188904]
Here is a summary of the places based on the provided documents, along with key aspects and recommendations to help you decide which option might best suit your needs:

### 1. Indianapolis Monthly - Best New Restaurants 2019 Event

- **Location:** 2820 N Meridian St, Indianapolis, IN
- **Category:** Festivals, Arts & Entertainment
- **Key Aspects:** Community-focused event showcasing various local eateries; features a wide range of food such as sushi, pasta, desserts, nachos, and beverages.
- **Notable Attributes:** Not specified.
- **Highlights:**
  - Offers a diverse taste of Indy's dining scene.
  - Opportunities for networking and community engagement.
  - Some culinary highlights included tabouli salad from Aroma Indian Cuisine 