# The project showcase an AI real estate agent built by state of the art LLM tools

## Step 1: Use OpenAI GPT-3.5-Turbo to generate listings of properties

In [1]:
import numpy as np
import pandas as pd
import random
import openai
import re
from openai import OpenAI

In [2]:
with open('openai_api_key.txt', 'r') as file:
    openai_api_key = file.read()
client = OpenAI(api_key=openai_api_key)

In [3]:
prompt = """
You are a real estate agent, there are many properties on the market for sale, including condo, apartment, house, ranch, and mansion.
The listing must include: neighborhood name, property type, price, size, number of bedrooms, number of bathrooms, description, neighborhood description.
Use the following format:

Neighborhood name:
Property type:
Price:
Size:
Number of bedrooms:
Number of bathrooms:
Description:
Neighborhood Description:

Generate a description for a {} listed for sale:
"""

property_types = ["condo", "apartment", "house", "ranch", "mansion"]

def generate_listing_description(prompt):
    """
    Using custom prompt to generate property listings
    from gpt-3.5-turbo model. 
    """
    try:
        response = client.chat.completions.create(
            messages=[
                {
                    "role": "user",
                    "content": prompt,
                }
            ],
            model="gpt-3.5-turbo",
            temperature = 1.0
        )
        return response.choices[0].message.content.strip()
    except Exception as e:
        print(e)
        return ""

def generate_listings(num_listings, openai_api_key, prompt, property_types):
    """
    generate a number (num_listings) of listings using generate_listing_description
    parse the response string into a dictionary
    storm the listing dictionaries into a Pandas dataframe
    """
    listings = []
    success = False
    keys = ["neighborhood name",
            "property type",
            "price",
            "size",
            "number of bedrooms",
            "number of bathrooms",
            "description",
            "neighborhood description"]
    for i in range(num_listings):
        property_type = random.choice(property_types)
        prompt = prompt.format(property_type)
        listing_str = generate_listing_description(prompt).lower()
        listing_dict = {key: "" for key in keys}
        current_key = None
        for line in listing_str.split('\n'):
            line_key = next((key for key in keys if line.startswith(key + ":")), None)
            if line_key:
                current_key = line_key
                listing_dict[current_key] = line.split(": ", 1)[1].strip()
            elif current_key:
                listing_dict[current_key] += " " + line.strip()
        listings.append(listing_dict)
    listings_df = pd.DataFrame(listings)
    return listings_df

In [10]:
# Test generate_listing_description(openai_api_key, prompt)
prompt = prompt.format("apartment")
response = generate_listing_description(prompt)
response

'Neighborhood name: Westwood\nProperty type: Apartment\nPrice: $500,000\nSize: 1,200 square feet\nNumber of bedrooms: 2\nNumber of bathrooms: 2\nDescription: This modern and stylish apartment features an open concept living area, spacious bedrooms, and updated kitchen with stainless steel appliances. Enjoy the luxury of a balcony overlooking the city skyline and amenities such as a gym and pool.\nNeighborhood Description: Westwood is a trendy and desirable neighborhood known for its upscale dining, shopping, and entertainment options. With easy access to parks and green spaces, this area attracts young professionals and families alike.'

In [3]:
# Test generate_listings(num_of_listings, openai_api_key, prompt)
num_listings = 20
listings_df = generate_listings(num_listings, openai_api_key, prompt, property_types)

In [4]:
listings_df.head()

Unnamed: 0,neighborhood name,property type,price,size,number of bedrooms,number of bathrooms,description,neighborhood description
0,willow creek,ranch,"$750,000",10 acres,3,2,this charming ranch features a renovated farmh...,willow creek is a peaceful rural community kno...
1,midtown,condo,"$500,000","1,200 sqft",2,2,this modern and sleek condo in the heart of mi...,"midtown is known for its vibrant nightlife, tr..."
2,maplewood,apartment,"$300,000",900 sqft,2,1,this charming apartment in the heart of maplew...,maplewood is a vibrant and family-friendly nei...
3,rolling hills estates,ranch,"$1,500,000",20 acres,4,3,this stunning ranch property offers 20 acres o...,rolling hills estates is known for its picture...
4,oakridge heights,condo,"$300,000",900 sqft,2,1,this cozy condo in oakridge heights offers a c...,oakridge heights is a bustling neighborhood wi...


In [5]:
listings_df.to_csv("listings.csv")

## Step 2: Create embeddings for the listings

In [4]:
# load saved listing data
listings_df = pd.read_csv("./listings.csv", index_col=0)

In [5]:
# concatenate description and neighborhood description into one column
listings_df["combined_description"] = listings_df["description"].str.cat(listings_df["neighborhood description"], sep=" ")

In [6]:
listings_df["combined_description"][0]

'this charming ranch features a renovated farmhouse with a spacious living room, cozy fireplace, and a beautiful wrap-around porch. the property includes a barn with horse stables, a riding arena, and plenty of open pastures for livestock. perfect for those looking for a peaceful country retreat. willow creek is a peaceful rural community known for its large ranch properties and picturesque landscapes. residents enjoy the tranquility of country living while still being within a short drive to nearby towns for shopping and dining.'

In [7]:
# Using "text-embedding-ada-002" as text embedding model
client = OpenAI(api_key=openai_api_key)
def get_embedding(text):
    text = text.replace("\n", " ")
    model = "text-embedding-ada-002"
    return client.embeddings.create(input=[text], model=model).data[0].embedding

# Add embeddings list to dataframe
listings_df["ada_embeddings"] = listings_df["combined_description"].apply(get_embedding)

In [8]:
# convert price from string to float
# listings_df["price"] = listings_df["price"].str.replace("$", "").str.replace(",", "").astype(float)
listings_df.head()

Unnamed: 0,neighborhood name,property type,price,size,number of bedrooms,number of bathrooms,description,neighborhood description,combined_description,ada_embeddings
0,willow creek,ranch,"$750,000",10 acres,3,2.0,this charming ranch features a renovated farmh...,willow creek is a peaceful rural community kno...,this charming ranch features a renovated farmh...,"[-0.00754887517541647, 0.003767798189073801, -..."
1,midtown,condo,"$500,000","1,200 sqft",2,2.0,this modern and sleek condo in the heart of mi...,"midtown is known for its vibrant nightlife, tr...",this modern and sleek condo in the heart of mi...,"[0.005038858391344547, 0.006939304526895285, -..."
2,maplewood,apartment,"$300,000",900 sqft,2,1.0,this charming apartment in the heart of maplew...,maplewood is a vibrant and family-friendly nei...,this charming apartment in the heart of maplew...,"[-0.0019746043253690004, -0.005034757778048515..."
3,rolling hills estates,ranch,"$1,500,000",20 acres,4,3.0,this stunning ranch property offers 20 acres o...,rolling hills estates is known for its picture...,this stunning ranch property offers 20 acres o...,"[-0.009719268418848515, 0.0010025061201304197,..."
4,oakridge heights,condo,"$300,000",900 sqft,2,1.0,this cozy condo in oakridge heights offers a c...,oakridge heights is a bustling neighborhood wi...,this cozy condo in oakridge heights offers a c...,"[0.01581690087914467, -0.00694337347522378, 0...."


In [9]:
# save listings_df with embeddings
listings_df.to_csv("./listings_with_embedding.csv")

## Step 3: Store listings in a vector database

In [10]:
import lancedb
from lancedb.pydantic import vector, LanceModel

class PropertyListings(LanceModel):
    neighborhood_name: str
    property_type: str
    price: str
    size: str
    num_bedrooms: int
    num_bathrooms: float
    description: str
    neighborhood_description: str
    combined_description: str
    ada_embeddings: vector(1536)

db = lancedb.connect("./.lancedb")
table_name = "property_listings"
db.drop_table(table_name)
table = db.create_table(table_name, schema = PropertyListings)

In [11]:
listings_df.rename(columns={"neighborhood name": "neighborhood_name",
                            "property type": "property_type",
                            "number of bedrooms": "num_bedrooms",
                            "number of bathrooms": "num_bathrooms",
                            "neighborhood description": "neighborhood_description"
                           }, inplace=True)
table.add(listings_df)

In [12]:
# test database search
query = np.random.randn(1536)

In [13]:
table.search(query).limit(1).to_pandas()

Unnamed: 0,neighborhood_name,property_type,price,size,num_bedrooms,num_bathrooms,description,neighborhood_description,combined_description,ada_embeddings,_distance
0,midtown,condo,"$500,000","1,200 sqft",2,2.0,this modern and sleek condo in the heart of mi...,"midtown is known for its vibrant nightlife, tr...",this modern and sleek condo in the heart of mi...,"[0.0050388584, 0.0069393045, -0.00026934137, -...",1596.897095


# Step 4: Collect buyer preferences

In [95]:
# Generate buyer preferences using gpt-3.5-turbo
buyer_prompt = """
You are a real estate buyer, you are interetsed in buying a {}.
A real estate agent is helping you choosing a property listing that best suits your preferences.

Answer the following questions:

Questions:
What kind of property are you interested in buying?
What kind of neighborhood would you like to live in?
How many bedrooms and bathrooms do you need?
What amenities would you like?

Answers:
"""

property_type = ["house", "condo", "apartment", "mansion", "ranch"]

client = OpenAI(api_key=openai_api_key)

def generate_buyer_preference():
    """
    Using custom prompt to generate property listings
    from gpt-3.5-turbo model. 
    """

    try:
        response = client.chat.completions.create(
            messages=[
                {
                    "role": "user",
                    "content": buyer_prompt.format(random.choice(property_type)),
                }
            ],
            model="gpt-3.5-turbo",
            temperature = 0.5
        )
        return response.choices[0].message.content.strip()
    except Exception as e:
        print(e)
        return ""
def clean_buyer_preference(response):
    if "\n\n" in response:
        response = response.split("\n\n")
    if "\n" in response:
        response = response.split("\n")
    if "?" in response:
        response = [s.strip() for s in response if "?" not in s]
    result_response = " ".join(response)
    # remove numerical bullet points in result_response
    result_response = re.sub(r'\d+\. ', '', result_response)
    # remove "-" in result_response
    result_response = re.sub(r'- ', '', result_response)
    # remove "I [text] " pattern
    cleaned_response = re.sub(r'I \w+ ', '', result_response)
    return cleaned_response

def search_listings(table, preference, num:int):
    """
    inputs:
        listings_df (Pandas DataFrame): listings database
        query (str): customer preference
        num (int): number of recommendations
    outputs:
        reco_df (Pandas DataFrame): recommendations 
    
    """
    cleaned_preference = clean_buyer_preference(preference)
    embedding = get_embedding(cleaned_preference)
    reco_df = table.search(embedding).limit(num).to_pandas()
    return reco_df

In [97]:
preference = generate_buyer_preference()
preference

'I am interested in buying a ranch property.\nI would like to live in a neighborhood that is peaceful and has a rural feel.\nI would need at least 3 bedrooms and 2 bathrooms.\nI would like amenities such as a barn or stable for horses, a large outdoor space for farming or gardening, and maybe a pond or lake on the property.'

In [83]:
cleaned_preference = clean_buyer_preference(preference)
cleaned_preference

'interested in buying a house or a condo.  like to live in a friendly and safe neighborhood with good schools and amenities nearby.  prefer a property with at least 3 bedrooms and 2 bathrooms.  like amenities such as a backyard, a garage, updated kitchen appliances, and maybe a pool or a gym in the community.'

In [25]:
# Generate 10 buyer preferneces and store them
num_of_samples = 10
buyer_preference_df = pd.DataFrame(columns=["cleaned_buyer_preference", "embeddings"])
for i in range(num_of_samples):
    response = generate_buyer_preference
    cleaned_response = clean_buyer_preference(response)
    cleaned_response_embedding = get_embedding(cleaned_response)
    buyer_preference_df.loc[i, "cleaned_buyer_preference"] = cleaned_response
    buyer_preference_df.loc[i, "embeddings"] = cleaned_response_embedding

In [26]:
buyer_preference_df.head()

Unnamed: 0,cleaned_buyer_preference,embeddings
0,interested in buying a house. like to live in ...,"[-0.006827543489634991, 0.025150764733552933, ..."
1,interested in buying a mansion. like to live i...,"[-0.018689924851059914, -0.0007153096375986934..."
2,interested in buying a house. like to live in ...,"[-0.00694231828674674, 0.02111886255443096, -0..."
3,interested in buying a house. like to live in ...,"[-0.005305054597556591, 0.02649988979101181, -..."
4,interested in buying a mansion. like to live i...,"[-0.013978690840303898, 0.008009166456758976, ..."


# Step 5: Search listings based on buyer's preference

## Test a buyer

In [18]:
preference = generate_buyer_preference()
print(preference)
reco_df = search_listings(table, preference, 3)
reco_df

I am interested in buying a house.
I would like to live in a family-friendly neighborhood with good schools and parks nearby.
I would need at least 3 bedrooms and 2 bathrooms.
I would like amenities such as a backyard, a garage, and maybe a pool or a deck for outdoor entertaining.


Unnamed: 0,neighborhood_name,property_type,price,size,num_bedrooms,num_bathrooms,description,neighborhood_description,combined_description,ada_embeddings,_distance
0,oakwood heights,house,"$500,000","2,500 sqft",4,3.0,"this charming 4-bedroom, 3-bathroom house is p...",oakwood heights is a quiet and family-friendly...,"this charming 4-bedroom, 3-bathroom house is p...","[0.008564916, 0.014116251, -0.0142616425, -0.0...",0.31209
1,willow creek,house,"$350,000","2,000 sq ft",4,2.5,"this charming 4 bedroom, 2.5 bathroom house in...",willow creek is a family-friendly neighborhood...,"this charming 4 bedroom, 2.5 bathroom house in...","[0.0067040585, 0.006078434, -0.0076392023, -0....",0.320997
2,riverfront estates,house,"$500,000","2,500 sq. ft.",4,3.0,this charming house in riverfront estates feat...,riverfront estates is a peaceful and picturesq...,this charming house in riverfront estates feat...,"[0.00049743586, -0.0026723682, -0.01714036, -0...",0.373045


# Step 6: Alter the retrieved listing's description with the buyer's preferences

## Use Retreival Augmented Generation (RAG) technique to alter the description of the matched property

In [19]:
rag_prompt = """
You are a real estate agent.
Genearte a tailored description based on the context below, highlight the specific preferences in the context.
Do not change factual information including name, neighborhood, amenities, and location.

Context: 

{}

---

Description: 

{}

Tailored description:
"""

In [61]:
def generate_custom_listing_description(preference, num):
    reco_df = search_listings(table, preference, num)
    agent_reco = generate_listing_description(rag_prompt.format(preference, reco_df.iloc[0]["combined_description"]))
    reco_df.drop(['combined_description', 'ada_embeddings', '_distance'], axis=1, inplace=True)
    return agent_reco, reco_df

In [62]:
preference

'I am interested in buying a house.\nI would like to live in a family-friendly neighborhood with good schools and parks nearby.\nI would need at least 3 bedrooms and 2 bathrooms.\nI would like amenities such as a backyard, a garage, and maybe a pool or a deck for outdoor entertaining.'

In [63]:
n = 3

In [64]:
agent_reco, reco_df = generate_custom_listing_description(preference, n)

In [65]:
agent_reco

"Welcome to your new family home in the beautiful Oakwood Heights neighborhood! This 4-bedroom, 3-bathroom house is ideal for those looking for a family-friendly community with good schools and parks. The spacious living room and updated kitchen provide ample space for family gatherings, while the backyard deck is perfect for outdoor entertaining. The master suite features a walk-in closet and en-suite bathroom, offering a private retreat. With a bonus room that can be used as an office or playroom, this house meets all your needs for space and comfort. Plus, with a garage for convenient parking and the possibility of adding a pool, this property has all the amenities you desire. Don't miss out on the opportunity to make this house your new family home!"

In [66]:
reco_df

Unnamed: 0,neighborhood_name,property_type,price,size,num_bedrooms,num_bathrooms,description,neighborhood_description
0,oakwood heights,house,"$500,000","2,500 sqft",4,3.0,"this charming 4-bedroom, 3-bathroom house is p...",oakwood heights is a quiet and family-friendly...
1,willow creek,house,"$350,000","2,000 sq ft",4,2.5,"this charming 4 bedroom, 2.5 bathroom house in...",willow creek is a family-friendly neighborhood...
2,riverfront estates,house,"$500,000","2,500 sq. ft.",4,3.0,this charming house in riverfront estates feat...,riverfront estates is a peaceful and picturesq...


# Step 7: Implement user interface as app

In [27]:
import gradio as gr

In [101]:
with gr.Blocks() as demo:
    with gr.Row():
        client_pref = gr.Textbox(value=sample_preference, label="Client Preference",show_label=True)
    with gr.Row():
        b1 = gr.Button("Generate Client Preferences")
        b2 = gr.Button("Submit")
        num = gr.Slider(minimum=1, maximum=10, label="Number of Results", step= 1, randomize=True)
    with gr.Row():
        agent_reco = gr.Textbox(value="", label="Agent Recommendation",show_label=True)
    with gr.Row():
        reco_list = gr.Dataframe(reco_df, label="Showing Top {} Results".format(num.value), show_label=True)
    with gr.Row():
        avail_listings = gr.Dataframe(listings_df, label="Available Listings", show_label=True)
        
    b1.click(generate_buyer_preference, outputs=client_pref)
    b2.click(generate_custom_listing_description, inputs=[client_pref, num], outputs=[agent_reco, reco_list])
    num.change()

demo.launch()

Running on local URL:  http://127.0.0.1:7862

Thanks for being a Gradio user! If you have questions or feedback, please join our Discord server and chat with us: https://discord.gg/feTf9x3ZSB

To create a public link, set `share=True` in `launch()`.




Traceback (most recent call last):
  File "/Users/wang140/workspace_torch/torch/lib/python3.11/site-packages/gradio/queueing.py", line 522, in process_events
    response = await route_utils.call_process_api(
               ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
  File "/Users/wang140/workspace_torch/torch/lib/python3.11/site-packages/gradio/route_utils.py", line 260, in call_process_api
    output = await app.get_blocks().process_api(
             ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
  File "/Users/wang140/workspace_torch/torch/lib/python3.11/site-packages/gradio/blocks.py", line 1689, in process_api
    result = await self.call_function(
             ^^^^^^^^^^^^^^^^^^^^^^^^^
  File "/Users/wang140/workspace_torch/torch/lib/python3.11/site-packages/gradio/blocks.py", line 1255, in call_function
    prediction = await anyio.to_thread.run_sync(
                 ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
  File "/Users/wang140/workspace_torch/torch/lib/python3.11/site-packages/anyio/to_thread.py", lin

In [102]:
demo.close()

Closing server running on port: 7862
