# Local RAG with OpenAI gpt-oss:20b via Ollama

In this demo notebook we will show how to use OpenAI `gpt-oss:20b` model via Ollama for Retrieval Augmented Generation (RAG).

## Requirements

#### Ollama 

Installation instructions can be found [here](https://ollama.com/).

- Run the local generative model: `ollama run gpt-oss:20b`
- Run the local embedding model: `ollama pull snowflake-arctic-embed2`


#### Local Weaviate instance

- Steps for running Weaviate locally can be found [here](https://docs.weaviate.io/deploy/installation-guides/docker-installation).
- Don't forget to enable the `text2vec-ollama` and `generative-ollama` modules.


## Install the Weaviate Python Client

In [None]:
%pip install weaviate-client

## Configure Weaviate

In [None]:
import weaviate
from weaviate.classes.config import Configure, Property, DataType
from weaviate.config import Timeout, AdditionalConfig


client = weaviate.connect_to_local(
    port=8080,
    grpc_port=50051,
    additional_config=AdditionalConfig(
        timeout=Timeout(init=30, query=60, insert=120)  # Values in seconds
    ),
)

print(client.is_ready())

## Setup Products Collection

In [None]:
if not client.collections.exists("Products"):
    client.collections.create(
        "Products",
        description="A collection of ecommerce fashion products with their details",
        vector_config=[
            Configure.Vectors.text2vec_ollama(
                name="product_vector",
                source_properties=[
                    "name",
                    "description",
                    "reviews",
                    "collection",
                    "tags",
                    "category",
                    "colors",
                    "subcategory",
                    "brand",
                ],
                model="snowflake-arctic-embed2",
                api_endpoint="http://host.docker.internal:11434",
            )
        ],
        generative_config=Configure.Generative.ollama(
            model="gpt-oss:20b",
            api_endpoint="http://host.docker.internal:11434",
        ),
        properties=[
            Property(
                name="product_id",
                data_type=DataType.TEXT,
                description="The unique identifier of the product",
                skip_vectorization=True,
            ),
            Property(
                name="name",
                data_type=DataType.TEXT,
                description="The name of the product",
            ),
            Property(
                name="description",
                data_type=DataType.TEXT,
                description="A detailed description of the product",
            ),
            Property(
                name="reviews",
                data_type=DataType.TEXT_ARRAY,
                description="The reviews of the product",
            ),
            Property(
                name="collection",
                data_type=DataType.TEXT,
                description="The collection of the product",
            ),
            Property(
                name="tags",
                data_type=DataType.TEXT_ARRAY,
                description="Tags associated with the product",
            ),
            Property(
                name="image_url",
                data_type=DataType.TEXT,
                description="URL of the product image",
                skip_vectorization=True,
            ),
            Property(
                name="rating",
                data_type=DataType.NUMBER,
                description="The rating of the product",
                skip_vectorization=True,
            ),
            Property(
                name="price",
                data_type=DataType.NUMBER,
                description="The price of the product",
                skip_vectorization=True,
            ),
            Property(
                name="category",
                data_type=DataType.TEXT,
                description="The category of the product",
            ),
            Property(
                name="colors",
                data_type=DataType.TEXT_ARRAY,
                description="Colors associated with the product",
            ),
            Property(
                name="subcategory",
                data_type=DataType.TEXT,
                description="The subcategory of the product",
            ),
            Property(
                name="brand",
                data_type=DataType.TEXT,
                description="The brand of the product",
            ),
        ],
    )
    print("Products collection created.")

else:
    print("Products collection already exists.")

## Load the Dataset

In [None]:
import json

with open("../../../datasets/ecommerce_products.json", "r", encoding="utf-8") as f: # Or change the path according to your local setup
    products_data = json.load(f)
    products_collection = client.collections.get("Products")
    with products_collection.batch.dynamic() as batch:
        for item in products_data:
            uuid = item["product_id"]
            batch.add_object(
                properties={
                    "product_id": item["product_id"],
                    "name": item["name"],
                    "description": item["description"],
                    "reviews": item["reviews"],
                    "collection": item["collection"],
                    "tags": item["tags"],
                    "image_url": item["image_url"],
                    "rating": item["rating"],
                    "price": item["price"],
                    "category": item["category"],
                    "colors": item["colors"],
                    "subcategory": item["subcategory"],
                    "brand": item["brand"],
                },
                uuid=uuid,
            )
            print(f"Added product: {item['name']} with UUID: {uuid}")

## Retrieval Augmented Generation w/ gpt-oss:20b

In [None]:
from weaviate.classes.generate import GenerativeConfig

collection = client.collections.get("Products")
response = collection.generate.near_text(
    query="sustainable eco-friendly clothing for summer",
    limit=3,
    target_vector="product_vector",
    grouped_task="""Based on these products, write a personalized recommendation email for a customer interested in fashion. Include:
        Please generate a short, engaging email that includes:
        1. A catchy subject line that aligns with the product theme
        2. A friendly, inviting introduction
        3. Highlights of each product, including their names and unique features or benefits

        The tone should be warm, conversational, and aligned with the product context. Keep the email around 150 words.""",
    generative_provider=GenerativeConfig.ollama(
        api_endpoint="http://host.docker.internal:11434", model="gpt-oss:20b"
    ),
)

print(response.generated)