## Init

In [None]:
import os

os.environ["OPENAI_API_KEY"] = "sk-proj-"

# For convertting the CSV string to a DataFrame
import pandas as pd
from io import StringIO

# For embedding and vector store
from langchain.embeddings import OpenAIEmbeddings
from langchain.vectorstores import Chroma

from langchain.llms import OpenAI
from langchain.prompts import PromptTemplate
from langchain.chains import LLMChain
import json

from langchain.chat_models import ChatOpenAI

from langchain.chains.query_constructor.schema import AttributeInfo
from langchain.retrievers.self_query.base import SelfQueryRetriever


## Generate Real Estate Listings

In [None]:
# Define the prompt template with a variable for the number of listings
prompt_template = """
Generate {n_listings} real estate listings. Each listing should include the following fields:
Neighborhood, Price, Bedrooms, Bathrooms, House Size, Description, and Neighborhood Description.

Please output the result in CSV format with headers.

Example listing:
Neighborhood,Price,Bedrooms,Bathrooms,House Size,Description,Neighborhood Description
"Green Oaks","800000",3,2,"2000","Welcome to this eco-friendly oasis nestled in the heart of Green Oaks. This charming 3-bedroom, 2-bathroom home boasts energy-efficient features such as solar panels and a well-insulated structure. Natural light floods the living spaces, highlighting the beautiful hardwood floors and eco-conscious finishes. The open-concept kitchen and dining area lead to a spacious backyard with a vegetable garden, perfect for the eco-conscious family. Embrace sustainable living without compromising on style in this Green Oaks gem.","Green Oaks is a close-knit, environmentally-conscious community with access to organic grocery stores, community gardens, and bike paths. Take a stroll through the nearby Green Oaks Park or grab a cup of coffee at the cozy Green Bean Cafe. With easy access to public transportation and bike lanes, commuting is a breeze."
"""

# Create the prompt template object
prompt = PromptTemplate(
    input_variables=["n_listings"],
    template=prompt_template
)

# Initialize the OpenAI LLM with a fixed temperature for consistency
# model_name = "gpt-3.5-turbo-instruct"
model_name = "chatgpt-4o-latest"
temperature = 0
llm = ChatOpenAI(model=model_name, temperature=temperature)

# Create an LLMChain that ties the prompt and the LLM together
chain = LLMChain(llm=llm, prompt=prompt)

# Generate the listings by specifying the number of listings to generate
n_listings = 10 
response = chain.run(n_listings=n_listings)

# Check if the response is valid before printing
if response:
	# Print the CSV-formatted result
	print(response)
else:
	print("No response received from the LLM. Please check your configuration.")


Neighborhood,Price,Bedrooms,Bathrooms,House Size,Description,Neighborhood Description
"Maplewood","675000",4,3,"2400","This beautifully updated 4-bedroom, 3-bathroom home in Maplewood offers spacious living with modern finishes. The open-concept kitchen features granite countertops, stainless steel appliances, and a large island perfect for entertaining. A cozy fireplace anchors the living room, while the master suite includes a walk-in closet and spa-like bathroom. The backyard is fully fenced with a patio ideal for summer barbecues.","Maplewood is a family-friendly neighborhood known for its tree-lined streets, excellent schools, and community events. Residents enjoy weekend farmers markets, local parks, and a variety of dining options within walking distance."

"Brookside","950000",5,4,"3200","This stunning 5-bedroom, 4-bathroom home in Brookside combines classic charm with modern upgrades. Featuring hardwood floors throughout, a chef’s kitchen with double ovens, and a finished base

In [20]:
# Convert the CSV string to a DataFrame
df_listings = pd.read_csv(StringIO(response))
df_listings

Unnamed: 0,Neighborhood,Price,Bedrooms,Bathrooms,House Size,Description,Neighborhood Description
0,Maplewood,675000,4,3.0,2400,"This beautifully updated 4-bedroom, 3-bathroom...",Maplewood is a family-friendly neighborhood kn...
1,Brookside,950000,5,4.0,3200,"This stunning 5-bedroom, 4-bathroom home in Br...",Brookside is a historic neighborhood with a vi...
2,Sunset Hills,725000,3,2.5,2100,"Enjoy panoramic views in this 3-bedroom, 2.5-b...","Sunset Hills is known for its scenic vistas, q..."
3,Riverbend,580000,3,2.0,1850,"Charming 3-bedroom, 2-bathroom ranch-style hom...",Riverbend is a picturesque neighborhood along ...
4,Highland Park,1200000,4,3.5,3500,"This luxurious 4-bedroom, 3.5-bathroom home in...",Highland Park is an upscale neighborhood known...
5,Willow Creek,495000,3,2.0,1700,"This cozy 3-bedroom, 2-bathroom home in Willow...",Willow Creek is a peaceful suburban neighborho...
6,Lakeview,875000,4,3.0,2800,"Modern 4-bedroom, 3-bathroom home in Lakeview ...",Lakeview is a scenic lakeside community offeri...
7,Old Town,650000,2,2.0,1600,Historic charm meets modern convenience in thi...,"Old Town is a vibrant, walkable neighborhood f..."
8,Meadow Ridge,715000,4,2.5,2300,"Spacious 4-bedroom, 2.5-bathroom home in Meado...",Meadow Ridge is a growing suburban community w...
9,Northgate,540000,3,2.0,1900,"This well-maintained 3-bedroom, 2-bathroom hom...","Northgate is a quiet, established neighborhood..."


In [None]:
# Save the DataFrame to a CSV file
df_listings.to_csv("data/real_estate_listings.csv", index=False)

## Storing Listings in a Vector Database

In [100]:
def store_listings_in_vector_db(df_listing: pd.DataFrame)->Chroma:
    """"
    Store real estate listings in a vector database using Langchain and OpenAI."
    """

    # Combine the relevant fields into one document string for each listing
    def create_document(row):
        return (
            f"Neighborhood: {row['Neighborhood']}\n"
            f"Price: {row['Price']}\n"
            f"Bedrooms: {row['Bedrooms']}\n"
            f"Bathrooms: {row['Bathrooms']}\n"
            f"House Size: {row['House Size']}\n"
            f"Description: {row['Description']}\n"
            f"Neighborhood Description: {row['Neighborhood Description']}"
        )

    # Create a list of document texts
    documents = df_listing.apply(create_document, axis=1).tolist()

    # Use the row data as metadata for each document
    metadatas = df_listing.to_dict(orient="records")

    # Initialize OpenAIEmbeddings from Langchain
    embeddings = OpenAIEmbeddings()

    # Initialize the Chroma vector database collection with persistent storage.
    vectorstore = Chroma.from_texts(
        texts=documents,
        embedding=embeddings,
        metadatas=metadatas,
        collection_name="real_estate_listings",
    )

    return vectorstore


In [None]:
try: 
    # Store the listings in the vector database
    vectorstore = store_listings_in_vector_db(df_listings)
    print("Listings have been successfully stored in the vector database.")
except Exception as e:
    print(f"An error occurred while storing listings in the vector database: {e}")

## Load the List of User Preferences

In [120]:
# Load the buyer preferences CSV file into a DataFrame
df_preference = pd.read_csv("data/buyer_preferences.csv")
df_preference

Unnamed: 0,Preference
0,"Interested in homes under $500,000 in suburban..."
1,"Looking for properties in the $600,000 to $700..."
2,Want a family-friendly community with access t...
3,"Looking for a waterfront lifestyle, ideal for ..."
4,Looking for homes with at least 3 bedrooms and...
5,Prefer 4+ bedrooms for a larger family in Broo...
6,"Prefer modern features like updated kitchens, ..."
7,Interested in luxury features like chef's kitc...
8,Attracted to homes with 2000 to 2400 sqft for ...


In [112]:
buyer_preferences = df_preference['Preference'].to_list()

## Preference Search

Using [Langchain's self-querying retriever](https://python.langchain.com/docs/how_to/self_query/) that uses both semantic search and metadata filtering.

In [None]:
def get_retrievers(vectorstore:Chroma)->SelfQueryRetriever:
    """
    Get real estate recommendations based on buyer preferences using Langchain and OpenAI."
    """

    # Metadata fields of the real estate listings
    metadata_field_info = [
        AttributeInfo(name="Neighborhood", description="The neighborhood where the property is located.", type="string"),
        AttributeInfo(name="Price", description="The price of the property.", type="integer"),
        AttributeInfo(name="Bedrooms", description="The number of bedrooms in the property.", type="integer"),
        AttributeInfo(name="Bathrooms", description="The number of bathrooms in the property.", type="float"),
        AttributeInfo(name="House Size", description="The size of the house in square feet.", type="integer"),
        AttributeInfo(name="Description", description="A description of the property.", type="string"),
        AttributeInfo(name="NeighborhoodDescription", description="A description of the neighborhood.", type="string"),
    ] 

    document_content_description = "Real Estate Listings: Each listing contains information about the property, including neighborhood, price, number of bedrooms and bathrooms, house size, description, and neighborhood description."
    # Use a lower temperature for more deterministic output
    llm = ChatOpenAI(model_name = "chatgpt-4o-latest", temperature=0.2)

    retriever = SelfQueryRetriever.from_llm(
        llm, vectorstore, document_content_description, metadata_field_info, verbose=True)
    
    return retriever

In [99]:
# Test the retriever with a sample buyer preference

retriever = get_retrievers(vectorstore)

i=2
print(buyer_preferences[i])
similar_listings = retriever.invoke(buyer_preferences[i])
print(similar_listings[0].page_content)

Want a family-friendly community with access to parks, events, and top schools 
Neighborhood: Maplewood
Price: 675000
Bedrooms: 4
Bathrooms: 3.0
House Size: 2400
Description: This beautifully updated 4-bedroom, 3-bathroom home in Maplewood offers spacious living with modern finishes. The open-concept kitchen features granite countertops, stainless steel appliances, and a large island perfect for entertaining. A cozy fireplace anchors the living room, while the master suite includes a walk-in closet and spa-like bathroom. The backyard is fully fenced with a patio ideal for summer barbecues.
Neighborhood Description: Maplewood is a family-friendly neighborhood known for its tree-lined streets, excellent schools, and community events. Residents enjoy weekend farmers markets, local parks, and a variety of dining options within walking distance.


## Personalizing Listing Descriptions

In [None]:
def generate_personalized_listing_description(buyer_preference:str, retriever:SelfQueryRetriever)-> str:
    """
    Generate a personalized listing description based on the buyer's preference and the listing details.
    """

    # Define the prompt template to generate a personalized listing description.
    prompt_template = """
    You are a creative real estate assistant. Given the following listing details and a buyer's preference, generate a personalized listing description that highlights the aspects of the property aligning with the buyer's preferences, while preserving all factual details about the property. Do not modify any factual information.

    Listing:
    {listing}

    Buyer Preference:
    {buyer_preference}

    Personalized Description:
    """

    # Create a PromptTemplate object with two input variables.
    prompt = PromptTemplate(
        input_variables=["listing", "buyer_preference"],
        template=prompt_template
    )

    # Initialize the OpenAI LLM (adjust the temperature as needed for creativity)
    llm = ChatOpenAI(model_name = "chatgpt-4o-latest", temperature=0.7)

    # Create an LLMChain to combine the prompt and LLM.
    chain = LLMChain(llm=llm, prompt=prompt)

    similar_listings = retriever.invoke(buyer_preference)

    if not similar_listings:
        print("No similar listings found. Please try again with a different preference.")
        return None
    
    # Generate the personalized description using the LLMChain.
    listing = similar_listings[0].page_content
    print("Cloest match listing:")
    print(listing)

    personalized_description = chain.run(
        listing=listing,
        buyer_preference=buyer_preference
    )

    return personalized_description

In [119]:
# Test the function with the buyer preferences

for i in range(len(buyer_preferences)):
    buyer_preference = buyer_preferences[i]
    print("----------------------------------------------------------------------------------------------------")
    print(buyer_preference)
    personalized_description = generate_personalized_listing_description(buyer_preference, retriever)
    print('\n')
    print(personalized_description)


----------------------------------------------------------------------------------------------------
Interested in homes under $500,000 in suburban neighborhoods
Cloest match listing:
Neighborhood: Willow Creek
Price: 495000
Bedrooms: 3
Bathrooms: 2.0
House Size: 1700
Description: This cozy 3-bedroom, 2-bathroom home in Willow Creek features a newly renovated kitchen, hardwood floors, and a large backyard with mature trees. The open living and dining area is perfect for gatherings, and the primary suite includes a private bath.
Neighborhood Description: Willow Creek is a peaceful suburban neighborhood with excellent schools, parks, and a strong sense of community. It’s ideal for young families and those seeking a quiet retreat from the city.


Welcome to your perfect retreat in the heart of Willow Creek! Priced at $495,000, this charming 3-bedroom, 2-bathroom home offers 1,700 square feet of thoughtfully designed living space—ideal for creating lasting memories. Step into the newly ren

## Test a different buyer preference

In [117]:
buyer_preference = "I am looking for a modern home with at least 4 bedrooms and a spacious backyard, preferably in a family-friendly neighborhood with good schools."

print(buyer_preference)
personalized_description = generate_personalized_listing_description(buyer_preference, retriever)
print('\n')
print(personalized_description)

I am looking for a modern home with at least 4 bedrooms and a spacious backyard, preferably in a family-friendly neighborhood with good schools.
Cloest match listing:
Neighborhood: Maplewood
Price: 675000
Bedrooms: 4
Bathrooms: 3.0
House Size: 2400
Description: This beautifully updated 4-bedroom, 3-bathroom home in Maplewood offers spacious living with modern finishes. The open-concept kitchen features granite countertops, stainless steel appliances, and a large island perfect for entertaining. A cozy fireplace anchors the living room, while the master suite includes a walk-in closet and spa-like bathroom. The backyard is fully fenced with a patio ideal for summer barbecues.
Neighborhood Description: Maplewood is a family-friendly neighborhood known for its tree-lined streets, excellent schools, and community events. Residents enjoy weekend farmers markets, local parks, and a variety of dining options within walking distance.


Welcome to your ideal home in the heart of Maplewood—a fam