In [15]:
import os
import pandas as pd
import openai
from dotenv import load_dotenv
from tqdm.notebook import tqdm
import time

In [16]:
# Load OpenAI key
load_dotenv()
openai.api_key = os.getenv("OPENAI_API_KEY")

In [17]:
# Load the dataset
df = pd.read_csv("../data/processed/120k_properties.csv")

# Keep relevant columns
cols = ["property_id", "address", "city", "state", "postcode", "price", "property_type"]
df = df[cols].copy()

In [18]:
# define a prompt template
def format_prompt(row):
    return (
        f"Write a short and appealing real estate listing description under 100 words. "
        f"Details: A {row['property_type']} located at {row['address']}, {row['city']}, {row['state']} {row['postcode']}. "
        f"Listed for ${row['price']:,.0f}."
    )

In [19]:
def generate_descriptions(rows, max_retries=3):
    prompts = [format_prompt(row) for _, row in rows.iterrows()]
    messages = [{"role": "user", "content": p} for p in prompts]
    
    retries = 0
    while retries < max_retries:
        try:
            response = openai.ChatCompletion.create(
                model="gpt-3.5-turbo-instruct",
                messages=messages,
                temperature=0.7,
                max_tokens=100,
                n=1,
                stop=None
            )
            return [choice["message"]["content"] for choice in response["choices"]]
        except Exception as e:
            retries += 1
            time.sleep(2 ** retries)
            print(f"Retry {retries}/{max_retries} after error: {e}")
    return [""] * len(rows)  # fallback


In [20]:
descriptions = []
batch_size = 5

test_df = df.sample(100)  # Test with a small sample first

for i in tqdm(range(0, len(test_df), batch_size)):
    batch = df.iloc[i:i + batch_size]
    outputs = generate_descriptions(batch)
    descriptions.extend(outputs)
    time.sleep(0.6)  # be gentle on the rate limits

  0%|          | 0/20 [00:00<?, ?it/s]

Retry 1/3 after error: 

You tried to access openai.ChatCompletion, but this is no longer supported in openai>=1.0.0 - see the README at https://github.com/openai/openai-python for the API.

You can run `openai migrate` to automatically upgrade your codebase to use the 1.0.0 interface. 

Alternatively, you can pin your installation to the old version, e.g. `pip install openai==0.28`

A detailed migration guide is available here: https://github.com/openai/openai-python/discussions/742

Retry 2/3 after error: 

You tried to access openai.ChatCompletion, but this is no longer supported in openai>=1.0.0 - see the README at https://github.com/openai/openai-python for the API.

You can run `openai migrate` to automatically upgrade your codebase to use the 1.0.0 interface. 

Alternatively, you can pin your installation to the old version, e.g. `pip install openai==0.28`

A detailed migration guide is available here: https://github.com/openai/openai-python/discussions/742



KeyboardInterrupt: 

In [None]:
test_df["generated_description"] = descriptions
test_df.to_csv("../data/processed/100_properties_with_descriptions_test.csv", index=False)
print("✅ Descriptions saved.")