# Step 1: Setting Up the Python Application

In [19]:
!pip install lancedb pylance langchain langchain-openai pandas


[1m[[0m[34;49mnotice[0m[1;39;49m][0m[39;49m A new release of pip is available: [0m[31;49m24.3.1[0m[39;49m -> [0m[32;49m25.0.1[0m
[1m[[0m[34;49mnotice[0m[1;39;49m][0m[39;49m To update, run: [0m[32;49mpip install --upgrade pip[0m


In [None]:
import os

from langchain_openai import ChatOpenAI
from langchain.output_parsers import PydanticOutputParser
from langchain.prompts import PromptTemplate

from pydantic import BaseModel

import lancedb
from lancedb.embeddings import get_registry
from lancedb.pydantic import LanceModel, Vector
from lancedb.embeddings import get_registry

import pandas as pd

In [None]:
os.environ["OPENAI_API_KEY"] = "YOUR_API_KEY"
os.environ["OPENAI_API_BASE"] = "https://openai.vocareum.com/v1"

# Set to true to give custom answers to user preference questions.
INTERACTIVE = False

In [22]:
# Setup LLM
llm = ChatOpenAI(model="gpt-3.5-turbo", 
             openai_api_key=os.getenv('OPENAI_API_KEY'),
             temperature=1.0, 
             max_tokens=3000)

In [23]:
# Setup Vector Database
vector_db = lancedb.connect(
  uri="./lancedb"
)

# Step 2: Generating Real Estate Listings

In [24]:
class Listing(BaseModel):
    neighborhood: str
    price: int
    bedrooms: int
    bathrooms: int
    house_size: int
    description: str
    neighborhood_description: str    

parser = PydanticOutputParser(pydantic_object=Listing)

In [25]:
# Build prompt to create fake real estate listings
example_listing = """
Neighborhood: Green Oaks
Price: $800,000
Bedrooms: 3
Bathrooms: 2
House Size: 2,000 sqft

Description: Welcome to this eco-friendly oasis nestled in the heart of Green Oaks. This charming 3-bedroom, 2-bathroom home boasts energy-efficient features such as solar panels and a well-insulated structure. Natural light floods the living spaces, highlighting the beautiful hardwood floors and eco-conscious finishes. The open-concept kitchen and dining area lead to a spacious backyard with a vegetable garden, perfect for the eco-conscious family. Embrace sustainable living without compromising on style in this Green Oaks gem.

Neighborhood Description: Green Oaks is a close-knit, environmentally-conscious community with access to organic grocery stores, community gardens, and bike paths. Take a stroll through the nearby Green Oaks Park or grab a cup of coffee at the cozy Green Bean Cafe. With easy access to public transportation and bike lanes, commuting is a breeze.
"""

PROMPT_TEMPLATE = """You are a real estate agent, who wants to create compelling real estate listings, clients are not able to resist.

Here is an example of the information a listing:

{example}

---

{format_instructions}
"""

system_prompt = PromptTemplate.from_template(PROMPT_TEMPLATE, partial_variables={'example': example_listing, 'format_instructions':parser.get_format_instructions}).format()
human_prompt = """
    Create 10 listings as JSON array.
"""

messages = [
    ("system", system_prompt),
    ("human", human_prompt),
]

In [26]:
#Send Prompt to LLM to retrieve listings.
response = llm.invoke(messages)
response

AIMessage(content='```json\n[\n    {\n        "neighborhood": "Green Oaks",\n        "price": 800000,\n        "bedrooms": 3,\n        "bathrooms": 2,\n        "house_size": 2000,\n        "description": "Welcome to this eco-friendly oasis nestled in the heart of Green Oaks. This charming 3-bedroom, 2-bathroom home boasts energy-efficient features such as solar panels and a well-insulated structure. Natural light floods the living spaces, highlighting the beautiful hardwood floors and eco-conscious finishes. The open-concept kitchen and dining area lead to a spacious backyard with a vegetable garden, perfect for the eco-conscious family. Embrace sustainable living without compromising on style in this Green Oaks gem.",\n        "neighborhood_description": "Green Oaks is a close-knit, environmentally-conscious community with access to organic grocery stores, community gardens, and bike paths. Take a stroll through the nearby Green Oaks Park or grab a cup of coffee at the cozy Green Bean

In [27]:
# Remove potential json markdown from listing
listing_json = response.content.strip('` \n')

if listing_json.startswith('json'):
    listing_json = listing_json[4:]

listing_json

'\n[\n    {\n        "neighborhood": "Green Oaks",\n        "price": 800000,\n        "bedrooms": 3,\n        "bathrooms": 2,\n        "house_size": 2000,\n        "description": "Welcome to this eco-friendly oasis nestled in the heart of Green Oaks. This charming 3-bedroom, 2-bathroom home boasts energy-efficient features such as solar panels and a well-insulated structure. Natural light floods the living spaces, highlighting the beautiful hardwood floors and eco-conscious finishes. The open-concept kitchen and dining area lead to a spacious backyard with a vegetable garden, perfect for the eco-conscious family. Embrace sustainable living without compromising on style in this Green Oaks gem.",\n        "neighborhood_description": "Green Oaks is a close-knit, environmentally-conscious community with access to organic grocery stores, community gardens, and bike paths. Take a stroll through the nearby Green Oaks Park or grab a cup of coffee at the cozy Green Bean Cafe. With easy access t

In [28]:
# Write listings as JSON to file
f = open('listings.json', 'w')
f.write(listing_json.strip())
f.close()

# Step 3: Storing Listings in a Vector Database

In [29]:
func = get_registry().get("openai").create(base_url=os.getenv('OPENAI_API_BASE'))

class ListingVector(LanceModel):
    neighborhood: str
    price: int
    bedrooms: int
    bathrooms: int
    house_size: int
    description: str = func.SourceField()
    description_vector: Vector(func.ndims()) = func.VectorField()  


In [30]:
# Read listings from file
df = pd.read_json("listings.json")
df

Unnamed: 0,neighborhood,price,bedrooms,bathrooms,house_size,description,neighborhood_description
0,Green Oaks,800000,3,2,2000,Welcome to this eco-friendly oasis nestled in ...,"Green Oaks is a close-knit, environmentally-co..."
1,Maple Grove,650000,4,3,2500,"Step into this modern 4-bedroom, 3-bathroom ho...",Maple Grove offers a family-friendly environme...
2,Riverside Heights,950000,5,4,3500,"Luxury living awaits in this 5-bedroom, 4-bath...",Riverside Heights is known for its upscale liv...
3,Sunnyvale Meadows,720000,3,2,2100,"Welcome to this charming 3-bedroom, 2-bathroom...",Sunnyvale Meadows offers a tranquil setting wi...
4,Pine Valley,550000,4,3,2200,"Discover this lovely 4-bedroom, 3-bathroom hom...",Pine Valley exudes a peaceful ambiance with sc...
5,Harbor Bay,880000,4,3,2800,Live the coastal lifestyle in this stunning 4-...,Harbor Bay offers a resort-style living experi...
6,Meadowbrook Heights,680000,3,2,1900,"Step into this charming 3-bedroom, 2-bathroom ...",Meadowbrook Heights is a nature lover's paradi...
7,Golden Hills,750000,4,3,2300,"Welcome to this beautiful 4-bedroom, 3-bathroo...",Golden Hills offers a prestigious lifestyle wi...
8,Lakefront Estates,920000,5,4,3200,Indulge in luxury living in this expansive 5-b...,Lakefront Estates is known for its upscale pro...
9,Mountain View Terrace,690000,3,2,2000,"Discover this cozy 3-bedroom, 2-bathroom home ...",Mountain View Terrace provides a serene settin...


In [31]:
# Write listings to database

vector_data = df.apply(lambda listing: {
    "neighborhood": listing["neighborhood"],
    "price": listing["price"],
    "bedrooms": listing["bedrooms"],
    "bathrooms": listing["bathrooms"],
    "house_size": listing["house_size"],
    "description": listing["description"] + " " + listing["neighborhood_description"]
}, axis=1).values.tolist()

table = vector_db.create_table('listings', mode='overwrite', exist_ok=True, schema=ListingVector)
table.add(vector_data)

table.to_pandas()

Unnamed: 0,neighborhood,price,bedrooms,bathrooms,house_size,description,description_vector
0,Green Oaks,800000,3,2,2000,Welcome to this eco-friendly oasis nestled in ...,"[0.012680263, -0.000397279, 0.0059024305, -0.0..."
1,Maple Grove,650000,4,3,2500,"Step into this modern 4-bedroom, 3-bathroom ho...","[0.013225698, 0.016146159, -0.008484774, -0.02..."
2,Riverside Heights,950000,5,4,3500,"Luxury living awaits in this 5-bedroom, 4-bath...","[0.015028327, 0.021495543, -0.0016582182, -0.0..."
3,Sunnyvale Meadows,720000,3,2,2100,"Welcome to this charming 3-bedroom, 2-bathroom...","[0.009319344, 0.013159768, 0.009921636, -0.012..."
4,Pine Valley,550000,4,3,2200,"Discover this lovely 4-bedroom, 3-bathroom hom...","[0.030138627, 0.010957135, 0.00087188603, -0.0..."
5,Harbor Bay,880000,4,3,2800,Live the coastal lifestyle in this stunning 4-...,"[0.009495488, -0.00065554626, 0.011885914, -0...."
6,Meadowbrook Heights,680000,3,2,1900,"Step into this charming 3-bedroom, 2-bathroom ...","[0.0025294574, 0.001907299, -0.003006282, -0.0..."
7,Golden Hills,750000,4,3,2300,"Welcome to this beautiful 4-bedroom, 3-bathroo...","[0.024445431, 0.020198612, 0.00418456, -0.0215..."
8,Lakefront Estates,920000,5,4,3200,Indulge in luxury living in this expansive 5-b...,"[0.009897441, 0.017221548, -0.0070997644, -0.0..."
9,Mountain View Terrace,690000,3,2,2000,"Discover this cozy 3-bedroom, 2-bathroom home ...","[0.01838561, 0.012598514, -0.0054445677, -0.01..."


# Step 4: Building the User Preference Interface

In [32]:
# Example
questions = [   
    "How big do you want your house to be?",
    "What are 3 most important things for you in choosing this property?", 
    "Which amenities would you like?", 
    "Which transportation options are important to you?",
    "How urban do you want your neighborhood to be?",   
]
answers = [
    "A comfortable three-bedroom house with a spacious kitchen and a cozy living room.",
    "A quiet neighborhood, good local schools, and convenient shopping options.",
    "A backyard for gardening, a two-car garage, and a modern, energy-efficient heating system.",
    "Easy access to a reliable bus line, proximity to a major highway, and bike-friendly roads.",
    "A balance between suburban tranquility and access to urban amenities like restaurants and theaters."
]

qa_messages = []
qa_messages.append(("system", f"""
    You are a real estate preference retrieving chat bot.
    You are asking users {len(questions)} questions and they respond to it.
    
    After the last question process the given preferences and return a JSON object with keys and values containing the preferences.
    If there are concrete numbers given return them as integer field in the object, e.g. number of bedrooms = 2.
"""))

for i in range(len(questions)):
    qa_messages.append(("ai", questions[i]))
    
    # If INTERACTIVE is true, ask the user his preferences.
    if INTERACTIVE:
        qa_messages.append(("human", input(questions[i] + "\n")))
    else:
        qa_messages.append(("human", answers[i]))

qa_response = llm.invoke(qa_messages)

In [33]:
# Remove potential JSON markdown from response
user_preferences = qa_response.content.strip('` \n')

if user_preferences.startswith('json'):
    user_preferences = user_preferences[4:]

print(user_preferences)

{
    "HouseSize": "Three-bedroom",
    "KitchenSize": "Spacious",
    "LivingRoomSize": "Cozy",
    "Neighborhood": {
        "Quiet": true,
        "GoodSchools": true,
        "ConvenientShopping": true
    },
    "Amenities": {
        "Backyard": true,
        "GarageSize": "Two-car",
        "HeatingSystem": "Modern and energy-efficient"
    },
    "Transportation": {
        "BusLine": true,
        "ProximityToHighway": true,
        "BikeFriendly": true
    },
    "UrbanFactor": "Balanced suburban tranquility with access to urban amenities"
}


# Step 5: Searching Based on Preferences

In [34]:
# Search for the 3 best matches of listings to user preferences
search_result = table.search(user_preferences).limit(3).to_pydantic(ListingVector)

search_result

[ListingVector(neighborhood='Meadowbrook Heights', price=680000, bedrooms=3, bathrooms=2, house_size=1900, description="Step into this charming 3-bedroom, 2-bathroom home in the peaceful Meadowbrook Heights neighborhood. The inviting living room features a brick fireplace and a bay window with garden views. The updated kitchen is equipped with quartz countertops and a breakfast nook. Retreat to the backyard oasis with a deck and mature trees, offering a private and serene escape. Enjoy a relaxed lifestyle in Meadowbrook Heights. Meadowbrook Heights is a nature lover's paradise with walking trails, parks, and nature reserves just steps away. Residents can unwind in the community pool or enjoy a picnic in the neighborhood park. With easy access to shopping centers and schools, this neighborhood provides a mix of tranquility and convenience.", description_vector=FixedSizeList(dim=1536)),
 ListingVector(neighborhood='Golden Hills', price=750000, bedrooms=4, bathrooms=3, house_size=2300, de

# Step 6: Personalizing Listing Descriptions

In [35]:
# Create personalized listing descriptions for the found listing.
PROMPT_TEMPLATE = """
Take the following Listing:

{listing}

---

Rewrite this real estate listing to highlight aspects relevant to the buyer preferences: {user_preferences}.
"""

personalized_listings = []
for listing in search_result:
    prompt = PromptTemplate.from_template(PROMPT_TEMPLATE).format(user_preferences=user_preferences,listing=listing.description)
    personalized_listing = listing.model_copy()
    personalized_listing.description = llm.invoke(prompt).content
    personalized_listings.append(personalized_listing)

personalized_listings

pd.DataFrame([listing.model_dump() for listing in personalized_listings])

Unnamed: 0,neighborhood,price,bedrooms,bathrooms,house_size,description,description_vector
0,Meadowbrook Heights,680000,3,2,1900,Discover this spacious three-bedroom home in a...,"[0.0025294574443250895, 0.0019072990398854017,..."
1,Golden Hills,750000,4,3,2300,"Welcome to this charming three-bedroom, two-ba...","[0.0244454313069582, 0.020198611542582512, 0.0..."
2,Maple Grove,650000,4,3,2500,Step into this cozy three-bedroom home in the ...,"[0.013225697912275791, 0.016146158799529076, -..."
