# Step 1: Setting Up the Python Application

In [1]:
!pip install lancedb pylance langchain langchain-openai pandas


[1m[[0m[34;49mnotice[0m[1;39;49m][0m[39;49m A new release of pip is available: [0m[31;49m24.3.1[0m[39;49m -> [0m[32;49m25.0.1[0m
[1m[[0m[34;49mnotice[0m[1;39;49m][0m[39;49m To update, run: [0m[32;49mpip install --upgrade pip[0m


In [2]:
import os

from langchain_openai import ChatOpenAI
from langchain.output_parsers import PydanticOutputParser
from langchain.prompts import PromptTemplate

from pydantic import BaseModel

import lancedb
from lancedb.embeddings import get_registry
from lancedb.pydantic import LanceModel, Vector
from lancedb.embeddings import get_registry

import pandas as pd

import textwrap

In [3]:
os.environ["OPENAI_API_KEY"] = "voc-315507027126677388591167c58e9cc285a6.52708711"
os.environ["OPENAI_API_BASE"] = "https://openai.vocareum.com/v1"

# Set to true to give custom answers to user preference questions.
INTERACTIVE = False

In [4]:
# Setup LLM
llm = ChatOpenAI(model="gpt-3.5-turbo", 
             openai_api_key=os.getenv('OPENAI_API_KEY'),
             temperature=1.0, 
             max_tokens=3000)

In [5]:
# Setup Vector Database
vector_db = lancedb.connect(
  uri="./lancedb"
)

# Step 2: Generating Real Estate Listings

In [6]:
class Listing(BaseModel):
    neighborhood: str
    price: int
    bedrooms: int
    bathrooms: int
    house_size: int
    description: str
    neighborhood_description: str    

parser = PydanticOutputParser(pydantic_object=Listing)

In [7]:
# Build prompt to create fake real estate listings
example_listing = """
Neighborhood: Green Oaks
Price: $800,000
Bedrooms: 3
Bathrooms: 2
House Size: 2,000 sqft

Description: Welcome to this eco-friendly oasis nestled in the heart of Green Oaks. This charming 3-bedroom, 2-bathroom home boasts energy-efficient features such as solar panels and a well-insulated structure. Natural light floods the living spaces, highlighting the beautiful hardwood floors and eco-conscious finishes. The open-concept kitchen and dining area lead to a spacious backyard with a vegetable garden, perfect for the eco-conscious family. Embrace sustainable living without compromising on style in this Green Oaks gem.

Neighborhood Description: Green Oaks is a close-knit, environmentally-conscious community with access to organic grocery stores, community gardens, and bike paths. Take a stroll through the nearby Green Oaks Park or grab a cup of coffee at the cozy Green Bean Cafe. With easy access to public transportation and bike lanes, commuting is a breeze.
"""

PROMPT_TEMPLATE = """You are a real estate agent, who wants to create compelling real estate listings, clients are not able to resist.

Here is an example of the information a listing:

{example}

---

{format_instructions}
"""

system_prompt = PromptTemplate.from_template(PROMPT_TEMPLATE, partial_variables={'example': example_listing, 'format_instructions':parser.get_format_instructions}).format()
human_prompt = """
    Create 10 listings as JSON array.
"""

messages = [
    ("system", system_prompt),
    ("human", human_prompt),
]

In [8]:
#Send Prompt to LLM to retrieve listings.
response = llm.invoke(messages)
response

AIMessage(content='```json\n[\n    {\n        "neighborhood": "Green Oaks",\n        "price": 800000,\n        "bedrooms": 3,\n        "bathrooms": 2,\n        "house_size": 2000,\n        "description": "Welcome to this eco-friendly oasis nestled in the heart of Green Oaks. This charming 3-bedroom, 2-bathroom home boasts energy-efficient features such as solar panels and a well-insulated structure. Natural light floods the living spaces, highlighting the beautiful hardwood floors and eco-conscious finishes. The open-concept kitchen and dining area lead to a spacious backyard with a vegetable garden, perfect for the eco-conscious family. Embrace sustainable living without compromising on style in this Green Oaks gem.",\n        "neighborhood_description": "Green Oaks is a close-knit, environmentally-conscious community with access to organic grocery stores, community gardens, and bike paths. Take a stroll through the nearby Green Oaks Park or grab a cup of coffee at the cozy Green Bean

In [9]:
# Remove potential json markdown from listing
listing_json = response.content.strip('` \n')

if listing_json.startswith('json'):
    listing_json = listing_json[4:]

listing_json

'\n[\n    {\n        "neighborhood": "Green Oaks",\n        "price": 800000,\n        "bedrooms": 3,\n        "bathrooms": 2,\n        "house_size": 2000,\n        "description": "Welcome to this eco-friendly oasis nestled in the heart of Green Oaks. This charming 3-bedroom, 2-bathroom home boasts energy-efficient features such as solar panels and a well-insulated structure. Natural light floods the living spaces, highlighting the beautiful hardwood floors and eco-conscious finishes. The open-concept kitchen and dining area lead to a spacious backyard with a vegetable garden, perfect for the eco-conscious family. Embrace sustainable living without compromising on style in this Green Oaks gem.",\n        "neighborhood_description": "Green Oaks is a close-knit, environmentally-conscious community with access to organic grocery stores, community gardens, and bike paths. Take a stroll through the nearby Green Oaks Park or grab a cup of coffee at the cozy Green Bean Cafe. With easy access t

In [10]:
# Write listings as JSON to file
f = open('listings.json', 'w')
f.write(listing_json.strip())
f.close()

# Step 3: Storing Listings in a Vector Database

In [11]:
func = get_registry().get("openai").create(base_url=os.getenv('OPENAI_API_BASE'))

class ListingVector(LanceModel):
    neighborhood: str
    price: int
    bedrooms: int
    bathrooms: int
    house_size: int
    description: str = func.SourceField()
    description_vector: Vector(func.ndims()) = func.VectorField()  


In [12]:
# Read listings from file
df = pd.read_json("listings.json")
df

Unnamed: 0,neighborhood,price,bedrooms,bathrooms,house_size,description,neighborhood_description
0,Green Oaks,800000,3,2,2000,Welcome to this eco-friendly oasis nestled in ...,"Green Oaks is a close-knit, environmentally-co..."
1,Sunnydale Estates,950000,4,3,2800,Discover luxury living in the prestigious Sunn...,Sunnydale Estates is an upscale neighborhood k...
2,Lakeview Heights,720000,3,2,2200,Immerse yourself in the beauty of Lakeview Hei...,Lakeview Heights is a picturesque community su...
3,Golden Pines,680000,3,2,1900,Experience the tranquility of Golden Pines. Th...,Golden Pines is a nature lover's paradise with...
4,Riverfront Terrace,1050000,5,4,3500,Live in luxury at Riverfront Terrace. This exp...,Riverfront Terrace is an exclusive waterfront ...
5,Mountain Ridge,890000,4,3,2700,Escape to the serenity of Mountain Ridge. This...,Mountain Ridge is a peaceful mountain communit...
6,Sunset Hills,780000,3,2,2100,Enjoy the sunset views from Sunset Hills. This...,Sunset Hills is a friendly community known for...
7,Harbor View,950000,4,3,2600,Wake up to breathtaking harbor views in Harbor...,Harbor View is a coveted seaside community kno...
8,Meadowbrook Estates,720000,3,2,2000,Step into the elegance of Meadowbrook Estates....,Meadowbrook Estates is an upscale community kn...
9,Maple Grove,670000,3,2,1900,Find your sanctuary in Maple Grove. This invit...,Maple Grove is a family-friendly neighborhood ...


In [13]:
# Write listings to database

vector_data = df.apply(lambda listing: {
    "neighborhood": listing["neighborhood"],
    "price": listing["price"],
    "bedrooms": listing["bedrooms"],
    "bathrooms": listing["bathrooms"],
    "house_size": listing["house_size"],
    "description": listing["description"] + " " + listing["neighborhood_description"]
}, axis=1).values.tolist()

table = vector_db.create_table('listings', mode='overwrite', exist_ok=True, schema=ListingVector)
table.add(vector_data)

table.to_pandas()

Unnamed: 0,neighborhood,price,bedrooms,bathrooms,house_size,description,description_vector
0,Green Oaks,800000,3,2,2000,Welcome to this eco-friendly oasis nestled in ...,"[0.012680263, -0.000397279, 0.0059024305, -0.0..."
1,Sunnydale Estates,950000,4,3,2800,Discover luxury living in the prestigious Sunn...,"[0.03086828, 0.002924329, 0.013881516, -0.0266..."
2,Lakeview Heights,720000,3,2,2200,Immerse yourself in the beauty of Lakeview Hei...,"[0.012345566, 0.006913517, 0.019489534, -0.016..."
3,Golden Pines,680000,3,2,1900,Experience the tranquility of Golden Pines. Th...,"[0.031288713, -0.005780893, 0.009240195, -0.01..."
4,Riverfront Terrace,1050000,5,4,3500,Live in luxury at Riverfront Terrace. This exp...,"[0.0037524728, 0.004762754, -0.00031550307, -0..."
5,Mountain Ridge,890000,4,3,2700,Escape to the serenity of Mountain Ridge. This...,"[0.008170463, 0.013568463, -0.015446584, -0.00..."
6,Sunset Hills,780000,3,2,2100,Enjoy the sunset views from Sunset Hills. This...,"[0.02806657, 0.0152901905, 0.0013127859, -0.02..."
7,Harbor View,950000,4,3,2600,Wake up to breathtaking harbor views in Harbor...,"[0.007002868, -0.004306897, 0.021714218, -0.01..."
8,Meadowbrook Estates,720000,3,2,2000,Step into the elegance of Meadowbrook Estates....,"[-0.0025120673, -0.00090990844, -0.00085263, -..."
9,Maple Grove,670000,3,2,1900,Find your sanctuary in Maple Grove. This invit...,"[0.015432407, 0.013707832, 0.0016072782, -0.01..."


# Step 4: Building the User Preference Interface

In [14]:
# Example
questions = [   
    "How big do you want your house to be?",
    "What are 3 most important things for you in choosing this property?", 
    "Which amenities would you like?", 
    "Which transportation options are important to you?",
    "How urban do you want your neighborhood to be?",   
]
answers = [
    "A comfortable three-bedroom house with a spacious kitchen and a cozy living room.",
    "A quiet neighborhood, good local schools, and convenient shopping options.",
    "A backyard for gardening, a two-car garage, and a modern, energy-efficient heating system.",
    "Easy access to a reliable bus line, proximity to a major highway, and bike-friendly roads.",
    "A balance between suburban tranquility and access to urban amenities like restaurants and theaters."
]

qa_messages = []
qa_messages.append(("system", f"""
    You are a real estate preference retrieving chat bot.
    You are asking users {len(questions)} questions and they respond to it.
    
    After the last question process the given preferences and return a JSON object with keys and values containing the preferences.
    If there are concrete numbers given return them as integer field in the object, e.g. number of bedrooms = 2.
"""))

for i in range(len(questions)):
    qa_messages.append(("ai", questions[i]))
    
    # If INTERACTIVE is true, ask the user his preferences.
    if INTERACTIVE:
        qa_messages.append(("human", input(questions[i] + "\n")))
    else:
        qa_messages.append(("human", answers[i]))

qa_response = llm.invoke(qa_messages)

In [15]:
# Remove potential JSON markdown from response
user_preferences = qa_response.content.strip('` \n')

if user_preferences.startswith('json'):
    user_preferences = user_preferences[4:]

print(user_preferences)

{
    "House Size": "Comfortable three-bedroom",
    "Important Factors": ["Quiet neighborhood", "Good local schools", "Convenient shopping options"],
    "Amenities": ["Backyard for gardening", "Two-car garage", "Modern, energy-efficient heating system"],
    "Transportation Options": ["Easy access to a reliable bus line", "Proximity to a major highway", "Bike-friendly roads"],
    "Urban Preference": "Balanced between suburban tranquility and access to urban amenities"
}


# Step 5: Searching Based on Preferences

In [16]:
def search_listings(user_preferences, limit):
    return table.search(user_preferences).limit(limit).to_pydantic(ListingVector)

# Search for the 5 best matches of listings to user preferences
matched_listings = search_listings(user_preferences, 5)

matched_listings

[ListingVector(neighborhood='Maple Grove', price=670000, bedrooms=3, bathrooms=2, house_size=1900, description="Find your sanctuary in Maple Grove. This inviting 3-bedroom, 2-bathroom home features a cozy living room, a sunlit kitchen, and a landscaped backyard with mature trees. The covered porch is perfect for sipping morning coffee or enjoying the peaceful surroundings. Relax and unwind in the tranquility of Maple Grove. Maple Grove is a family-friendly neighborhood with top-rated schools, parks, and community events. Residents can enjoy picnics in the park, nature walks, and local farmers' markets. Experience the charm and sense of community in Maple Grove.", description_vector=FixedSizeList(dim=1536)),
 ListingVector(neighborhood='Green Oaks', price=800000, bedrooms=3, bathrooms=2, house_size=2000, description='Welcome to this eco-friendly oasis nestled in the heart of Green Oaks. This charming 3-bedroom, 2-bathroom home boasts energy-efficient features such as solar panels and a 

# Step 6: Personalizing Listing Descriptions

In [17]:
def personalize_listing(listing, user_preferences):

    # Create personalized listing descriptions for the found listing.
    PROMPT_TEMPLATE = """
    Take the following Listing:

    {listing}

    ---

    Without changing factual information, augment this real estate listing to highlight aspects relevant to the buyer preferences:

    {user_preferences}
    """

    prompt = PromptTemplate.from_template(PROMPT_TEMPLATE).format(user_preferences=user_preferences,listing=listing.description)
    personalized_listing = listing.model_copy()
    personalized_listing.description = llm.invoke(prompt).content
    return personalized_listing




In [18]:
personalized_listings = []
for listing in matched_listings:
    personalized_listing = personalize_listing(listing, user_preferences)
    personalized_listings.append(personalized_listing)

In [19]:
pd.DataFrame([listing.model_dump() for listing in personalized_listings])

Unnamed: 0,neighborhood,price,bedrooms,bathrooms,house_size,description,description_vector
0,Maple Grove,670000,3,2,1900,Find your sanctuary in Maple Grove. This comfo...,"[0.015432407148182392, 0.013707832433283329, 0..."
1,Green Oaks,800000,3,2,2000,Welcome to this eco-friendly oasis nestled in ...,"[0.012680263258516788, -0.00039727898547425866..."
2,Sunset Hills,780000,3,2,2100,Enjoy the sunset views from Sunset Hills. This...,"[0.028066569939255714, 0.01529019046574831, 0...."
3,Sunnydale Estates,950000,4,3,2800,Discover luxury living in the prestigious Sunn...,"[0.030868280678987503, 0.002924328902736306, 0..."
4,Golden Pines,680000,3,2,1900,Experience the tranquility and convenience of ...,"[0.031288713216781616, -0.005780892912298441, ..."


In [None]:
# Logic for Searching and Augmenting Listing Descriptions
print("User preferences:\n")
print(user_preferences)

# Find the top listing
matched_listings = search_listings(user_preferences, 1)
    
if matched_listings:
    top_listing = matched_listings[0]
    
    print("Top matching listing without augmentation:\n")
    print(f"\tNeighborhood: {top_listing.neighborhood}")
    print(f"\tPrice: ${top_listing.price}")
    print(f"\tBedrooms: {top_listing.bedrooms}")
    print(f"\tBathrooms: {top_listing.bathrooms}")
    print(f"\tHouse Size: {top_listing.house_size} sqft")
    print(f"\tDescription:\n{textwrap.fill(top_listing.description, width=70, initial_indent='\t\t', subsequent_indent='\t\t')}\n")

    personalized_listing = personalize_listing(top_listing, user_preferences)

    print("Top matching listing with augmentation:\n")
    print(f"\tNeighborhood: {personalized_listing.neighborhood}")
    print(f"\tPrice: ${personalized_listing.price}")
    print(f"\tBedrooms: {personalized_listing.bedrooms}")
    print(f"\tBathrooms: {personalized_listing.bathrooms}")
    print(f"\tHouse Size: {personalized_listing.house_size} sqft")
    print(f"\tDescription:\n{textwrap.fill(personalized_listing.description, width=70, initial_indent='\t\t', subsequent_indent='\t\t')}\n")
else:
    print("No relevant listings found.")



User preferences:

{
    "House Size": "Comfortable three-bedroom",
    "Important Factors": ["Quiet neighborhood", "Good local schools", "Convenient shopping options"],
    "Amenities": ["Backyard for gardening", "Two-car garage", "Modern, energy-efficient heating system"],
    "Transportation Options": ["Easy access to a reliable bus line", "Proximity to a major highway", "Bike-friendly roads"],
    "Urban Preference": "Balanced between suburban tranquility and access to urban amenities"
}
Top matching listing without augmentation:

	Neighborhood: Maple Grove
	Price: $670000
	Bedrooms: 3
	Bathrooms: 2
	House Size: 1900 sqft
	Description:
		Find your sanctuary in Maple Grove. This inviting 3-bedroom,
		2-bathroom home features a cozy living room, a sunlit kitchen, and a
		landscaped backyard with mature trees. The covered porch is perfect
		for sipping morning coffee or enjoying the peaceful surroundings.
		Relax and unwind in the tranquility of Maple Grove. Maple Grove is a
		family-