[![Open In Colab](https://colab.research.google.com/assets/colab-badge.svg)](https://colab.research.google.com/github/weaviate/recipes/blob/main/weaviate-features/model-providers/openai/rag_gpt-5.ipynb)

# RAG with OpenAI GPT-5

In this demo notebook we will show how to use OpenAI's latest `gpt-5` model for Retrieval Augmented Generation (RAG).

## Requirements

#### Create a `.env` file with the following variables:


- `OPENAI_API_KEY=<your_openai_api_key>`
- `WCD_URL=<your_weaviate_url>`
- `WCD_API_KEY=<your_weaviate_api_key>`

#### OpenAI API Key

Get your OpenAI API key from [here](https://platform.openai.com/) and update it in the `.env` file.


#### Weaviate Cluster

- You can create a 14-day free sandbox cluster on [Weaviate Cloud](https://console.weaviate.cloud/). 
- Then, get the Cluster URL and API key from the cluster details and update them in the `.env` file.
- Other options to use Weaviate can be found [here](https://docs.weaviate.io/deploy).

## Install the Weaviate and OpenAI Client

In [None]:
%pip install weaviate-client openai

## Configure Weaviate

In [None]:
import weaviate
from weaviate import Client
from weaviate.auth import Auth
from weaviate.classes.config import Configure, Property, DataType
from dotenv import load_dotenv
import os

load_dotenv()

headers = {
    "X-OpenAI-Api-Key": os.environ["OPENAI_API_KEY"],
}
client = weaviate.connect_to_weaviate_cloud(
    cluster_url=os.environ["WCD_URL"],
    auth_credentials=Auth.api_key(os.environ["WCD_API_KEY"]),
    headers=headers,
)

print(client.is_ready())

## Setup Products Collection

In [None]:
if not client.collections.exists("Products"):
    client.collections.create(
        "Products",
        description="A collection of ecommerce fashion products with their details",
        vector_config=[
            Configure.Vectors.text2vec_openai(
                name="product_vector",
                model="text-embedding-3-small",
                source_properties=[
                    "name",
                    "description",
                    "reviews",
                    "collection",
                    "tags",
                    "category",
                    "colors",
                    "subcategory",
                    "brand",
                ],
            )
        ],
        generative_config=Configure.Generative.openai(model="gpt-5"),
        properties=[
            Property(
                name="product_id",
                data_type=DataType.TEXT,
                description="The unique identifier of the product",
                skip_vectorization=True,
            ),
            Property(
                name="name",
                data_type=DataType.TEXT,
                description="The name of the product",
            ),
            Property(
                name="description",
                data_type=DataType.TEXT,
                description="A detailed description of the product",
            ),
            Property(
                name="reviews",
                data_type=DataType.TEXT_ARRAY,
                description="The reviews of the product",
            ),
            Property(
                name="collection",
                data_type=DataType.TEXT,
                description="The collection of the product",
            ),
            Property(
                name="tags",
                data_type=DataType.TEXT_ARRAY,
                description="Tags associated with the product",
            ),
            Property(
                name="image_url",
                data_type=DataType.TEXT,
                description="URL of the product image",
                skip_vectorization=True,
            ),
            Property(
                name="rating",
                data_type=DataType.NUMBER,
                description="The rating of the product",
                skip_vectorization=True,
            ),
            Property(
                name="price",
                data_type=DataType.NUMBER,
                description="The price of the product",
                skip_vectorization=True,
            ),
            Property(
                name="category",
                data_type=DataType.TEXT,
                description="The category of the product",
            ),
            Property(
                name="colors",
                data_type=DataType.TEXT_ARRAY,
                description="Colors associated with the product",
            ),
            Property(
                name="subcategory",
                data_type=DataType.TEXT,
                description="The subcategory of the product",
            ),
            Property(
                name="brand",
                data_type=DataType.TEXT,
                description="The brand of the product",
            ),
        ],
    )
    print("Products collection created.")

else:
    print("Products collection already exists.")

## Load the Dataset

In [None]:
import json

with open(
    "../../../datasets/ecommerce_products.json", "r", encoding="utf-8"
) as f:  # Or change the path according to your local setup
    products_data = json.load(f)
    products_collection = client.collections.get("Products")
    with products_collection.batch.dynamic() as batch:
        for item in products_data:
            uuid = item["product_id"]
            batch.add_object(
                properties={
                    "product_id": item["product_id"],
                    "name": item["name"],
                    "description": item["description"],
                    "reviews": item["reviews"],
                    "collection": item["collection"],
                    "tags": item["tags"],
                    "image_url": item["image_url"],
                    "rating": item["rating"],
                    "price": item["price"],
                    "category": item["category"],
                    "colors": item["colors"],
                    "subcategory": item["subcategory"],
                    "brand": item["brand"],
                },
                uuid=uuid,
            )
            print(f"Added product: {item['name']} with UUID: {uuid}")

## Retrieval Augmented Generation w/ GPT-5

In [None]:
import os
import json
from openai import OpenAI


def rag_fashion_email(query: str, limit: int = 3):
    collection = client.collections.get("Products")
    response = collection.generate.near_text(
        query=query,
        limit=limit,
        target_vector="product_vector",
    )
    product_descriptions = "\n".join(
        json.dumps(obj.properties, indent=2) for obj in response.objects
    )
    prompt = f"""
    Based on these products, write a personalized recommendation email for a customer interested in fashion. Include:
    1. A catchy subject line that aligns with the product theme
    2. A friendly, inviting introduction
    3. Highlights of each product, including their names and unique features or benefits
    The tone should be warm, conversational, and aligned with the product context. Keep the email around 150 words.
    Product descriptions:
    {product_descriptions}
    """
    openai_client = OpenAI(api_key=os.environ.get("OPENAI_API_KEY"))
    response = openai_client.responses.create(
        model="gpt-5",
        reasoning={"effort": "low"},
        instructions="You are a fashion expert and a marketing professional.",
        input=prompt,
    )
    return response.output_text

print(rag_fashion_email("Funky clothing for winter"))