# The project showcase an AI real estate agent built by state of the art LLM tools

## Step 1: Use OpenAI GPT-3.5-Turbo to generate listings of properties

In [14]:
import numpy as np
import pandas as pd
import random
import openai
from openai import OpenAI

with open('openai_api_key.txt', 'r') as file:
    openai_api_key = file.read()

prompt = """
You are a real estate agent, there are many properties on the market for sale, including condo, apartment, house, ranch, mansion, and farmland.
The listing must include: neighborhood name, property type, price, size, number of bedrooms, number of bathrooms, description, neighborhood description.
Use the following format:

Neighborhood name:
Property type:
Price:
Size:
Number of bedrooms:
Number of bathrooms:
Description:
Neighborhood Description:

Generate a description for a {} listed for sale:
"""

property_types = ["condo", "apartment", "house", "ranch", "mansion"]

def generate_listing_description(openai_api_key, prompt, property_type):
    """
    Using custom prompt to generate property listings
    from gpt-3.5-turbo model. 
    """
    client = OpenAI(
        api_key=openai_api_key,
    )
    try:
        response = client.chat.completions.create(
            messages=[
                {
                    "role": "user",
                    "content": prompt.format(property_type),
                }
            ],
            model="gpt-3.5-turbo",
            temperature = 1.0
        )
        return response.choices[0].message.content.strip()
    except Exception as e:
        print(e)
        return ""

def generate_listings(num_listings, openai_api_key, prompt, property_types):
    """
    generate a number (num_listings) of listings using generate_listing_description
    parse the response string into a dictionary
    storm the listing dictionaries into a Pandas dataframe
    """
    listings = []
    success = False
    keys = ["neighborhood name",
            "property type",
            "price",
            "size",
            "number of bedrooms",
            "number of bathrooms",
            "description",
            "neighborhood description"]
    for i in range(num_listings):
        property_type = random.choice(property_types)
        listing_str = generate_listing_description(openai_api_key, prompt, property_type).lower()
        listing_dict = {key: "" for key in keys}
        current_key = None
        for line in listing_str.split('\n'):
            line_key = next((key for key in keys if line.startswith(key + ":")), None)
            if line_key:
                current_key = line_key
                listing_dict[current_key] = line.split(": ", 1)[1].strip()
            elif current_key:
                listing_dict[current_key] += " " + line.strip()
        listings.append(listing_dict)
    listings_df = pd.DataFrame(listings)
    return listings_df

In [2]:
# Test generate_listing_description(openai_api_key, prompt)
response = generate_listing_description(openai_api_key, prompt, "apartment")
response

'Neighborhood name: Parkside Heights\nProperty type: Apartment\nPrice: $350,000\nSize: 900 sqft\nNumber of bedrooms: 2\nNumber of bathrooms: 1\nDescription: Cozy and modern apartment located in the desirable Parkside Heights neighborhood. This unit features an open floor plan, updated kitchen with stainless steel appliances, spacious bedrooms, and a private balcony with stunning views. Perfect for first-time homebuyers or investors looking for a rental property.\nNeighborhood Description: Parkside Heights is a vibrant neighborhood known for its tree-lined streets, local cafes, and easy access to parks and public transportation. Residents enjoy a sense of community and convenience, with shopping centers and schools within walking distance.'

In [3]:
# Test generate_listings(num_of_listings, openai_api_key, prompt)
num_listings = 20
listings_df = generate_listings(num_listings, openai_api_key, prompt, property_types)

In [4]:
listings_df.head()

Unnamed: 0,neighborhood name,property type,price,size,number of bedrooms,number of bathrooms,description,neighborhood description
0,willow creek,ranch,"$750,000",10 acres,3,2,this charming ranch features a renovated farmh...,willow creek is a peaceful rural community kno...
1,midtown,condo,"$500,000","1,200 sqft",2,2,this modern and sleek condo in the heart of mi...,"midtown is known for its vibrant nightlife, tr..."
2,maplewood,apartment,"$300,000",900 sqft,2,1,this charming apartment in the heart of maplew...,maplewood is a vibrant and family-friendly nei...
3,rolling hills estates,ranch,"$1,500,000",20 acres,4,3,this stunning ranch property offers 20 acres o...,rolling hills estates is known for its picture...
4,oakridge heights,condo,"$300,000",900 sqft,2,1,this cozy condo in oakridge heights offers a c...,oakridge heights is a bustling neighborhood wi...


In [5]:
listings_df.to_csv("listings.csv")

## Step 2: Create embeddings for the listings

In [83]:
# load saved listing data
listings_df = pd.read_csv("./listings.csv", index_col=0)

In [84]:
# concatenate description and neighborhood description into one column
listings_df["combined_description"] = listings_df["description"].str.cat(listings_df["neighborhood description"], sep=" ")

In [85]:
listings_df["combined_description"][0]

'this charming ranch features a renovated farmhouse with a spacious living room, cozy fireplace, and a beautiful wrap-around porch. the property includes a barn with horse stables, a riding arena, and plenty of open pastures for livestock. perfect for those looking for a peaceful country retreat. willow creek is a peaceful rural community known for its large ranch properties and picturesque landscapes. residents enjoy the tranquility of country living while still being within a short drive to nearby towns for shopping and dining.'

In [86]:
# Using "text-embedding-ada-002" as text embedding model
client = OpenAI(api_key=openai_api_key)
def get_embedding(text):
    text = text.replace("\n", " ")
    model = "text-embedding-ada-002"
    return client.embeddings.create(input=[text], model=model).data[0].embedding

# Add embeddings list to dataframe
listings_df["ada_embeddings"] = listings_df["combined_description"].apply(get_embedding)

In [87]:
# convert price from string to float
# listings_df["price"] = listings_df["price"].str.replace("$", "").str.replace(",", "").astype(float)
listings_df.head()

Unnamed: 0,neighborhood name,property type,price,size,number of bedrooms,number of bathrooms,description,neighborhood description,combined_description,ada_embeddings
0,willow creek,ranch,"$750,000",10 acres,3,2.0,this charming ranch features a renovated farmh...,willow creek is a peaceful rural community kno...,this charming ranch features a renovated farmh...,"[-0.00754887517541647, 0.003767798189073801, -..."
1,midtown,condo,"$500,000","1,200 sqft",2,2.0,this modern and sleek condo in the heart of mi...,"midtown is known for its vibrant nightlife, tr...",this modern and sleek condo in the heart of mi...,"[0.005038858391344547, 0.006939304526895285, -..."
2,maplewood,apartment,"$300,000",900 sqft,2,1.0,this charming apartment in the heart of maplew...,maplewood is a vibrant and family-friendly nei...,this charming apartment in the heart of maplew...,"[-0.0019368311623111367, -0.005045743193477392..."
3,rolling hills estates,ranch,"$1,500,000",20 acres,4,3.0,this stunning ranch property offers 20 acres o...,rolling hills estates is known for its picture...,this stunning ranch property offers 20 acres o...,"[-0.009719268418848515, 0.0010025061201304197,..."
4,oakridge heights,condo,"$300,000",900 sqft,2,1.0,this cozy condo in oakridge heights offers a c...,oakridge heights is a bustling neighborhood wi...,this cozy condo in oakridge heights offers a c...,"[0.01581690087914467, -0.00694337347522378, 0...."


In [88]:
# check the length of embeddings
for i in range(0, len(listings_df)):
    print(len(listings_df.iloc[i]["ada_embeddings"]))

1536
1536
1536
1536
1536
1536
1536
1536
1536
1536
1536
1536
1536
1536
1536
1536
1536
1536
1536
1536


In [89]:
# save listings_df with embeddings
listings_df.to_csv("./listings_with_embedding.csv")

## Step 3: Store listings in a vector database

In [92]:
import lancedb
from lancedb.pydantic import vector, LanceModel

class PropertyListings(LanceModel):
    neighborhood_name: str
    property_type: str
    price: str
    size: str
    num_bedrooms: int
    num_bathrooms: float
    description: str
    neighborhood_description: str
    combined_description: str
    ada_embeddings: vector(1536)

db = lancedb.connect("./.lancedb")
table_name = "property_listings"
db.drop_table(table_name)
table = db.create_table(table_name, schema = PropertyListings)

In [93]:
listings_df.rename(columns={"neighborhood name": "neighborhood_name",
                            "property type": "property_type",
                            "number of bedrooms": "num_bedrooms",
                            "number of bathrooms": "num_bathrooms",
                            "neighborhood description": "neighborhood_description"
                           }, inplace=True)
table.add(listings_df)

# Step 4: Build a user interface to collect buyer preferences

## Step 2: Store listings in a vector database
## Step 3: Build a user interface to collect buyer preferences
## Step 4: Search listings based on buyer's preference
## Step 5: Alter the retrieved listing's description with the buyer's preferences