# The project showcase an AI real estate agent built by state of the art LLM tools

In [154]:
import numpy as np
import pandas as pd
import random
import openai
import re
from openai import OpenAI
import sys

In [2]:
with open('openai_api_key.txt', 'r') as file:
    openai_api_key = file.read()
client = OpenAI(api_key=openai_api_key)

In [27]:
property_types = ["condo", "apartment", "house", "ranch", "mansion"]

agent_prompt = """
You are a real estate agent, there are many properties on the market for sale, including condo, apartment, house, ranch, and mansion.
The listing must include: neighborhood name, property type, price, size, number of bedrooms, number of bathrooms, description, neighborhood description.
Use the following format:

Neighborhood name (be creative):
Property type:
Price:
Size:
Number of bedrooms:
Number of bathrooms:
Description:
Neighborhood Description (be creative):

Generate a description for a {} listed for sale:
"""

buyer_prompt = """
You are a real estate buyer, you are interetsed in buying a {}.
A real estate agent is helping you choosing a property listing that best suits your preferences.

Answer the following questions:

Questions:
What kind of property are you interested in buying?
What kind of neighborhood would you like to live in?
How many bedrooms and bathrooms do you need?
What amenities would you like?

Answers:
"""

## Step 1: Use OpenAI GPT-3.5-Turbo to generate listings of properties

In [28]:
def generate_listing_description(agent_prompt):
    """
    Using custom prompt to generate property listings
    from gpt-3.5-turbo model. 
    """
    try:
        response = client.chat.completions.create(
            messages=[
                {
                    "role": "user",
                    "content": agent_prompt,
                }
            ],
            model="gpt-3.5-turbo",
            temperature = 0.9
        )
        return response.choices[0].message.content.strip()
    except Exception as e:
        print(e)
        return ""

def generate_listings(num_listings, agent_prompt, property_types):
    """
    generate a number (num_listings) of listings using generate_listing_description
    parse the response string into a dictionary
    storm the listing dictionaries into a Pandas dataframe
    """
    listings = []
    success = False
    keys = ["neighborhood name",
            "property type",
            "price",
            "size",
            "number of bedrooms",
            "number of bathrooms",
            "description",
            "neighborhood description"]
    for i in range(num_listings):
        property_type = property_types[int(np.mod(i,5))]
        prompt = agent_prompt.format(property_type)
        listing_str = generate_listing_description(prompt).lower()
        listing_dict = {key: "" for key in keys}
        current_key = None
        for line in listing_str.split('\n'):
            line_key = next((key for key in keys if line.startswith(key + ":")), None)
            if line_key:
                current_key = line_key
                listing_dict[current_key] = line.split(": ", 1)[1].strip()
            elif current_key:
                listing_dict[current_key] += " " + line.strip()
        listings.append(listing_dict)
    listings_df = pd.DataFrame(listings)
    return listings_df

In [120]:
# Test generate_listings
num_listings = 20
listings_df = generate_listings(num_listings, agent_prompt, property_types)

In [121]:
listings_df.head(20)

Unnamed: 0,neighborhood name,property type,price,size,number of bedrooms,number of bathrooms,description,neighborhood description
0,sunshine bay,condo,"$300,000",900 sq ft,2,1.0,"this charming 2 bedroom, 1 bathroom condo in s...",sunshine bay is a picturesque waterfront commu...
1,willow creek heights,apartment,"$300,000","1,000 square feet",2,2.0,this spacious and modern apartment in willow c...,willow creek heights is a charming neighborhoo...
2,sunset valley heights,house,"$500,000","2,500 sq ft",4,3.0,this charming two-story house in sunset valley...,sunset valley heights is a peaceful and family...
3,rolling hills estates,ranch,"$1,200,000",10 acres,4,3.0,this sprawling ranch offers the perfect countr...,rolling hills estates is a picturesque communi...
4,serenity heights,mansion,"$5,000,000","10,000 square feet",6,8.0,this luxurious mansion is the epitome of elega...,serenity heights is an exclusive gated communi...
5,ocean breeze heights,condo,"$350,000","1,200 sqft",2,2.0,this modern condo in ocean breeze heights offe...,ocean breeze heights is a vibrant seaside comm...
6,sunny shores,apartment,"$300,000",900 sqft,2,1.0,this charming apartment in sunny shores is per...,sunny shores is a picturesque beach community ...
7,serenity valley,house,"$500,000","2,500 sq. ft.",4,3.0,this charming house in serenity valley offers ...,serenity valley is a peaceful and family-frien...
8,sunset valley ranch,ranch,"$1,200,000",10 acres,3,2.0,this charming ranch features a spacious farmho...,sunset valley ranch is a peaceful countryside ...
9,serene pines estates,mansion,"$5,000,000","10,000 sqft",6,8.0,luxurious mansion nestled in the exclusive ser...,serene pines estates is a prestigious and priv...


In [122]:
listings_df.to_csv("listings.csv")

## Step 2: Create embeddings for the listings

In [123]:
# load saved listing data
# listings_df = pd.read_csv("./listings.csv", index_col=0)

In [124]:
# concatenate description and neighborhood description into one column for calculating embeddings
listings_df["combined_description"] = listings_df["description"].str.cat(listings_df["neighborhood description"], sep=" ")

In [125]:
# Using "text-embedding-ada-002" as text embedding model
def get_embedding(text):
    text = text.replace("\n", " ")
    model = "text-embedding-ada-002"
    return client.embeddings.create(input=[text], model=model).data[0].embedding

# Add embeddings list to dataframe
listings_df["ada_embeddings"] = listings_df["combined_description"].apply(get_embedding)

In [126]:
# convert price from string to float
# listings_df["price"] = listings_df["price"].str.replace("$", "").str.replace(",", "").astype(float)
listings_df.head()

Unnamed: 0,neighborhood name,property type,price,size,number of bedrooms,number of bathrooms,description,neighborhood description,combined_description,ada_embeddings
0,sunshine bay,condo,"$300,000",900 sq ft,2,1,"this charming 2 bedroom, 1 bathroom condo in s...",sunshine bay is a picturesque waterfront commu...,"this charming 2 bedroom, 1 bathroom condo in s...","[1.6579855582676828e-05, -0.003965419251471758..."
1,willow creek heights,apartment,"$300,000","1,000 square feet",2,2,this spacious and modern apartment in willow c...,willow creek heights is a charming neighborhoo...,this spacious and modern apartment in willow c...,"[0.015302270650863647, 3.857985939248465e-05, ..."
2,sunset valley heights,house,"$500,000","2,500 sq ft",4,3,this charming two-story house in sunset valley...,sunset valley heights is a peaceful and family...,this charming two-story house in sunset valley...,"[0.013635315001010895, 0.004286431707441807, 0..."
3,rolling hills estates,ranch,"$1,200,000",10 acres,4,3,this sprawling ranch offers the perfect countr...,rolling hills estates is a picturesque communi...,this sprawling ranch offers the perfect countr...,"[-0.01076695416122675, 0.0030821559485048056, ..."
4,serenity heights,mansion,"$5,000,000","10,000 square feet",6,8,this luxurious mansion is the epitome of elega...,serenity heights is an exclusive gated communi...,this luxurious mansion is the epitome of elega...,"[0.011287693865597248, 0.006992080714553595, -..."


In [127]:
# save listings_df with embeddings
listings_df.to_pickle("./listings_with_embedding.pkl")

## Step 3: Store listings in a vector database

In [128]:
listings_df = pd.read_pickle("./listings_with_embedding.pkl")

In [129]:
import lancedb
from lancedb.pydantic import vector, LanceModel

class PropertyListings(LanceModel):
    neighborhood_name: str
    property_type: str
    price: str
    size: str
    num_bedrooms: int
    num_bathrooms: float
    description: str
    neighborhood_description: str
    combined_description: str
    ada_embeddings: vector(1536)

db = lancedb.connect("./.lancedb")
table_name = "property_listings"
table = db.create_table(table_name, schema = PropertyListings, mode="overwrite")

In [130]:
type(listings_df["ada_embeddings"][0])

list

In [131]:
# update column name to match lancedb schema
listings_df.rename(columns={"neighborhood name": "neighborhood_name",
                            "property type": "property_type",
                            "number of bedrooms": "num_bedrooms",
                            "number of bathrooms": "num_bathrooms",
                            "neighborhood description": "neighborhood_description"
                           }, inplace=True)
table.add(listings_df)

# Step 4: Collect buyer preferences

In [54]:
def generate_buyer_preference():
    """
    Using custom prompt to generate property listings
    from gpt-3.5-turbo model. 
    """

    try:
        response = client.chat.completions.create(
            messages=[
                {
                    "role": "user",
                    "content": buyer_prompt.format(random.choice(property_types)),
                }
            ],
            model="gpt-3.5-turbo",
            temperature = 0.5
        )
        return response.choices[0].message.content.strip()
    except Exception as e:
        print(e)
        return ""
def clean_buyer_preference(response):
    if "\n\n" in response:
        response = response.split("\n\n")
    if "\n" in response:
        response = response.split("\n")
    if "?" in response:
        response = [s.strip() for s in response if "?" not in s]
    result_response = " ".join(response)
    # remove numerical bullet points in result_response
    result_response = re.sub(r'\d+\. ', '', result_response)
    # remove "-" in result_response
    result_response = re.sub(r'- ', '', result_response)
    # remove "I [text] " pattern
    cleaned_response = re.sub(r'I \w+ ', '', result_response)
    return cleaned_response

def search_listings(table, preference, num:int):
    """
    inputs:
        listings_df (Pandas DataFrame): listings database
        query (str): customer preference
        num (int): number of recommendations
    outputs:
        reco_df (Pandas DataFrame): recommendations 
    
    """
    cleaned_preference = clean_buyer_preference(preference)
    embedding = get_embedding(cleaned_preference)
    reco_df = table.search(embedding).limit(num).to_pandas()
    return reco_df

In [55]:
preference = generate_buyer_preference()
preference

'I am interested in buying a condo.\nI would like to live in a neighborhood that is safe, has good schools, and is close to amenities such as restaurants, shopping centers, and parks.\nI would need at least 2 bedrooms and 2 bathrooms.\nI would like amenities such as a gym, swimming pool, and a community garden.'

In [56]:
cleaned_preference = clean_buyer_preference(preference)
cleaned_preference

'interested in buying a condo. like to live in a neighborhood that is safe, has good schools, and is close to amenities such as restaurants, shopping centers, and parks. need at least 2 bedrooms and 2 bathrooms. like amenities such as a gym, swimming pool, and a community garden.'

# Step 5: Search listings based on buyer's preference

## Test a buyer

In [64]:
sample_preference = generate_buyer_preference()
print(sample_preference)
reco_df = search_listings(table, sample_preference, 3)
reco_df

I am interested in buying a single-family home.
I would like to live in a safe and family-friendly neighborhood with good schools.
I would need at least 3 bedrooms and 2 bathrooms.
I would like amenities such as a backyard, a garage, and a modern kitchen.


Unnamed: 0,neighborhood_name,property_type,price,size,num_bedrooms,num_bathrooms,description,neighborhood_description,combined_description,ada_embeddings,_distance
0,serene meadows,house,"$500,000","2,500 sq ft",4,2.5,"this charming 4 bedroom, 2.5 bathroom house in...",serene meadows is a quiet and family-friendly ...,"this charming 4 bedroom, 2.5 bathroom house in...","[-0.0007424627, 0.027851487, -0.018284183, -0....",0.343752
1,willowbrook heights,apartment,"$300,000",900 sq. ft.,2,1.0,this cozy and modern apartment is perfect for ...,willowbrook heights is known for its tree-line...,this cozy and modern apartment is perfect for ...,"[-0.006473578, -0.0033602677, 0.016648225, -0....",0.368013
2,serene haven heights,apartment,"$300,000",900 sq ft,2,1.0,this charming apartment is perfect for those l...,serene haven heights is a quiet and family-fri...,this charming apartment is perfect for those l...,"[0.01952567, 0.0137843825, 0.010258914, -0.002...",0.369944


# Step 6: Alter the retrieved listing's description with the buyer's preferences

## Use Retreival Augmented Generation (RAG) technique to alter the description of the matched property

In [65]:
rag_prompt = """
You are a real estate agent.
Genearte a tailored description based on the context below, highlight the specific preferences in the context.
Do not change factual information including name, neighborhood, amenities, and location.

Context: 

{}

---

Description: 

{}

Tailored description:
"""

In [146]:
def generate_custom_listing_description(preference, num):
    reco_df = search_listings(table, preference, num)
    agent_reco = generate_listing_description(rag_prompt.format(preference, reco_df.iloc[0]["combined_description"]))
    reco_df.drop(['combined_description', 'ada_embeddings', '_distance'], axis=1, inplace=True)
    return agent_reco, reco_df

In [142]:
n = 3
agent_reco, reco_df = generate_custom_listing_description(table, preference, n)
print(agent_reco)
reco_df.head()

Welcome to your new home in Sunnywood Estates! This 4-bedroom, 3-bathroom house is the perfect combination of modern convenience and classic charm. With a spacious living room featuring a cozy fireplace, a gourmet kitchen with stainless steel appliances, and a backyard oasis complete with a sparkling pool and BBQ area, this home is ideal for both relaxation and entertaining. The master suite offers a private retreat with a luxurious ensuite bathroom and walk-in closet. 

Sunnywood Estates is a safe and family-friendly neighborhood known for its tree-lined streets, community parks, and top-rated schools. You'll also enjoy the convenience of shopping centers, restaurants, and outdoor recreation options nearby. With a strong sense of community and an active neighborhood association, Sunnywood Estates is the perfect place to settle down and call home. Don't miss out on this beautiful property that meets all your must-haves!


Unnamed: 0,neighborhood_name,property_type,price,size,num_bedrooms,num_bathrooms,description,neighborhood_description
0,sunnywood estates,house,"$500,000","2,500 sqft",4,3.0,"this charming 4-bedroom, 3-bathroom house in s...",sunnywood estates is a family-friendly neighbo...
1,willow grove estates,house,"$500,000","2,500 sq ft",4,2.5,this charming two-story house features a spaci...,willow grove estates is a family-friendly neig...
2,serenity valley,house,"$500,000","2,500 sq. ft.",4,3.0,this charming house in serenity valley offers ...,serenity valley is a peaceful and family-frien...


# Step 7: Implement user interface as app

In [149]:
import gradio as gr

In [152]:
def startup():
    # Automatically generate a sample buyer preference
    initial_preference = generate_buyer_preference()
    return initial_preference

def update_results_label(value):
    label = f"Showing Top {value} Results"
    return gr.Dataframe(label=label, show_label=True)

def close_app():
    print("Closing the application.")
    demo.close()

initial_pref = startup()
listings_df = pd.read_csv("./listings.csv",index_col=0)

with gr.Blocks() as demo:
    with gr.Row():
        client_pref = gr.Textbox(value=initial_pref, label="Client Preference",show_label=True)
    with gr.Row():
        b1 = gr.Button("Generate Client Preferences")
        b2 = gr.Button("Submit")
        num = gr.Slider(minimum=1, maximum=10, label="Number of Results", step=1)
        b3 = gr.Button("Close App")
    with gr.Row():
        agent_reco = gr.Textbox(value="", label="Agent Recommendation",show_label=True)
    with gr.Row():
        reco_list = gr.Dataframe(label=f"Showing Top {num.value} Results", show_label=True)
    with gr.Row():
        avail_listings = gr.Dataframe(listings_df, label="Available Listings", show_label=True)
    num.change(update_results_label, inputs=num, outputs=reco_list)
    b1.click(generate_buyer_preference, outputs=client_pref)
    b2.click(generate_custom_listing_description, inputs=[client_pref, num], outputs=[agent_reco, reco_list])
    b3.click(close_app)

demo.launch()

Running on local URL:  http://127.0.0.1:7861

To create a public link, set `share=True` in `launch()`.




In [153]:
demo.close()

Closing server running on port: 7861
