# Personalization Agent Demo

## Connect to the Weaviate Cloud instance

> Reminder: Weaviate Agents are only available for Weaviate Cloud instances.

Connect to your Weaviate instance, using credentials from the Weaviate Cloud console. Here, they are loaded from the `.env` file.

In [None]:
from dotenv import load_dotenv
import weaviate
import os

load_dotenv()

weaviate_url = os.getenv("WEAVIATE_URL")
weaviate_api_key = os.getenv("WEAVIATE_API_KEY")

client = weaviate.connect_to_weaviate_cloud(
    cluster_url=weaviate_url,
    auth_credentials=weaviate_api_key,
)

assert client.is_ready()

## Add data

We add two datasets here, one with books and another with movies. The datasets are loaded from the Hugging Face Hub, and they are pre-vectorized using `Snowflake/snowflake-arctic-embed-l-v2.0`. 

### Load data & inspect it briefly

In [None]:
from datasets import load_dataset

movies_dataset = load_dataset("jphwang/weaviate-demos", "movies", split="train", streaming=True)

In [None]:
for d in [movies_dataset]:
    print(f"Dataset: {d.config_name}")
    counter = 0
    for o in d:
        if counter >= 5:
            break
        print(o)
        counter += 1

### Prepare the Collections

Here we create collections and add the objects. 

In [None]:
# ONLY run this if you want to delete the existing collection & data
client.collections.delete(["Movie"])

In [None]:
from weaviate.classes.config import Configure, Property, DataType

if not client.collections.exists("Movie"):
    client.collections.create(
        "Movie",
        description="A dataset that lists movies, their ratings, original language etc..",
        properties=[
            Property(
                name="title",
                data_type=DataType.TEXT,
                description="The title of the movie",
            ),
            Property(
                name="release_year",
                data_type=DataType.INT,
                description="The release year of the movie",
            ),
            Property(
                name="overview",
                data_type=DataType.TEXT,
                description="Short description of the movie",
            ),
            Property(
                name="genres",
                data_type=DataType.TEXT_ARRAY,
                description="The genres of the movie, in an array format",
            ),
            Property(
                name="vote_average",
                data_type=DataType.NUMBER,
                description="The average user rating of the movie; range is 0-10",
            ),
            Property(
                name="vote_count",
                data_type=DataType.INT,
                description="The number of user votes for the movie",
            ),
            Property(
                name="popularity",
                data_type=DataType.NUMBER,
                description="Calculated popularity of the movie by weighing multiple factors; range is 0-100",
            ),
            Property(
                name="poster_url",
                data_type=DataType.TEXT,
                description="A TMDB URL of the movie poster image",
            ),
            Property(
                name="original_language",
                data_type=DataType.TEXT,
                description="A two-letter code (e.g. 'en') representing the original language of the movie",
            ),
        ],
        vectorizer_config=[
            Configure.NamedVectors.text2vec_weaviate(
                name="default",
                source_properties=["title", "description"],
                model="Snowflake/snowflake-arctic-embed-l-v2.0"
            )
        ],
    )

Import data

In [None]:
from tqdm import tqdm
from weaviate.util import generate_uuid5

movies = client.collections.get("Movie")

with movies.batch.fixed_size(batch_size=100) as batch:
    for item in tqdm(movies_dataset):
        obj = item["properties"]

        # Convert release_date to release_year
        obj["release_year"] = obj["release_date"].year
        obj.pop("release_date")

        # Add object to batch for import
        batch.add_object(
            properties=item["properties"],
            uuid=generate_uuid5(item["properties"]["title"]),
            vector={"default": item["vector"]},
        )

# Check for any failed objects during import
if movies.batch.failed_objects:
    print(f"{len(movies.batch.failed_objects)} objects failed during import:")
    for failed in movies.batch.failed_objects[:3]:
        print(failed.message)

## Connect to the Personalization Agent

You can initialize the Personalization Agent, or connect to an existing one, as shown below.

In [None]:
from weaviate.agents.personalization import PersonalizationAgent
from weaviate.classes.config import DataType

collection_name = "Movie"

if PersonalizationAgent.exists(client, collection_name):
    pa = PersonalizationAgent.connect(
        # Fill in "client", "reference_collection" and "vector_name" parameters
        # ADD YOUR CODE HERE
    )
else:
    pa = PersonalizationAgent.create(
        # Fill in "client", "reference_collection" and "vector_name" parameters
        # ADD YOUR CODE HERE
        user_properties={
            "age": DataType.NUMBER,
            "favorite_genres": DataType.TEXT_ARRAY,
            "favorite_years": DataType.NUMBER_ARRAY,
            "language": DataType.TEXT,
        },
    )

## Create a persona

A "persona" is where the agent stores its knowledge about a user. You can add a persona or use an existing one. 

In [None]:
from weaviate.agents.classes import Persona
from weaviate.util import generate_uuid5
from uuid import uuid4  # If you want to generate a random UUID

persona_id = generate_uuid5("jphwang")  # To generate a deterministic UUID
# persona_id = uuid4()  # To generate a random UUID

# You can delete a persona if you want to remove it from the system
pa.delete_persona(persona_id)

if pa.has_persona(persona_id):
    print(f"Persona with ID {persona_id} already exists.")
else:
    print(f"Creating new persona with ID {persona_id}.")
    pa.add_persona(
        # Try adding your persona here
        Persona(
            # Add "persona_id" as above
            # Add "properties" according to the data schema defined above
            # ADD YOUR CODE HERE
        )
    )

## Add interactions

This is how the agent learns each persona's preferences.

In [None]:
from weaviate.agents.classes import PersonaInteraction
from helpers import get_movie_uuid  # Helper to get the UUID of a movie

pa.add_interactions(interactions=[
    PersonaInteraction(
        # Provide the "persona_id" and "weight" parameters
        # ADD YOUR CODE HERE
        # We need the item UUID - here, we'll use a helper function
        # defined in helpers.py to make the job easier
        item_id=get_movie_uuid(client, "Independence Day")
    ),
])

## Queries

We can already perform queries.

### Basic queries

Fast, most basic personalized queries

- Uses vectors of interaction history only

In [None]:
response = pa.get_objects(
    persona_id,
    limit=50,
    use_agent_ranking=False
)

Print the results out - we've defined a helper function to do this. 

Note the `ranking_rationale` field, which explains any agentic reasons if any, and the `objects` field, which contains a list of objects returned by the agent.

In [None]:
from weaviate.agents.classes import PersonalizationAgentGetObjectsResponse


def print_movie_response_details(response: PersonalizationAgentGetObjectsResponse, n_objects: int = 5, verbose = False) -> None:
    if response.ranking_rationale is not None:
        print(f"Ranking rationale: {response.ranking_rationale}")

    for i, obj in enumerate(response.objects[:n_objects]):
        print(f"*****{i}*****")
        print(obj.properties["title"])
        print(obj.properties["genres"])
        if verbose:
            print(obj.properties["overview"])
            print(obj.properties["release_date"])
            print(f"vote_average: {obj.properties['vote_average']}")
            print(f"vote_count: {obj.properties['vote_count']}")
            print(f"popularity: {obj.properties['popularity']}")
        if obj.original_rank is not None:
            print(f"original rank: {obj.original_rank}, personalized rank: {obj.personalized_rank}")


In [None]:
print_movie_response_details(response, 5)

### Agent reranking

The agent can smartly rerank the results based on the information about the persona, as well as the interactions.

In [None]:
response = pa.get_objects(
    persona_id,
    limit=50,
    # Set the `use_agent_ranking` parameter (actually, it's True by default - but nice to know what it does :) )
    # ADD YOUR CODE HERE
)

print_movie_response_details(response, 5)

### With Reranker + Instruction

- Uses vectors of interaction history and AI-based reranker
- Instructions used to guide the reranker

In [None]:
response = pa.get_objects(
    persona_id,
    limit=50,
    use_agent_ranking=True,
    # Set a custom instruction here
    # ADD YOUR CODE HERE
)

print_movie_response_details(response, 5)

### Add more interactions

Over time, you will add more interactions to the agent, which will help it learn more about the persona's preferences.

Note each interaction can be positive or negative. 
(1: most positive, 0: neutral, -1: most negative)

In [None]:
interactions = [
    # Add further interactions here
    PersonaInteraction(
        persona_id=persona_id, item_id=get_movie_uuid(client, "Iron Man"), weight=0.9  # very positive
    ),
    PersonaInteraction(
        persona_id=persona_id, item_id=get_movie_uuid(client, "The Grand Budapest Hotel"), weight=0.9
    ),
    PersonaInteraction(
        persona_id=persona_id, item_id=get_movie_uuid(client, "Sleepless in Seattle"), weight=0.8
    ),
    PersonaInteraction(
        persona_id=persona_id, item_id=get_movie_uuid(client, "The Mummy"), weight=0.0  # neutral
    ),
    PersonaInteraction(
        persona_id=persona_id, item_id=get_movie_uuid(client, "A Nightmare on Elm Street"), weight=-0.3,  # little bit negative
    ),
    PersonaInteraction(
        persona_id=persona_id, item_id=get_movie_uuid(client, "The Cloverfield Paradox"), weight=-0.9  # very negative
    ),
]

pa.add_interactions(interactions=interactions)

### Retry with the updated knowledge

In [None]:
# Try getting objects again - what happens?
# ADD YOUR CODE HERE

print_movie_response_details(response, 5)

### With Reranker + Instruction + Filter

- The most complex personalized queries
- Uses vectors of interaction history and AI-based reranker
- Instructions used to guide the reranker
- Filters out items that are not relevant to the user

In [None]:
from weaviate.classes.query import Filter

# With Reranker + Instruction + Filter
response = pa.get_objects(
    persona_id,
    limit=50,
    use_agent_ranking=True,
    instruction="The user is looking for a classic drama, that is suitable for a date night.",
    # Add a filter - can you filter by movies released in 2000 or earlier?
    # ADD YOUR CODE HERE
)

In [None]:
print_movie_response_details(response, 10)

## Combine personalization with other queries

From `pa.query`, you can perform the common Weaviate searches, such as `near_text`, `bm25` and `hybrid`

In [None]:
response = pa.query(persona_id=persona_id, strength=0.95).hybrid(
    query="historical adventure",
    limit=10
)

for o in response.objects:
    print(f"Title: {o.properties['title']}")
    print(f"Genres: {o.properties['genres']}")

In [None]:
# Try a semantic (near_text) search for titles similar to the word "europe"
# ADD YOUR CODE HERE

for o in response.objects:
    print(f"Title: {o.properties['title']}")
    print(f"Genres: {o.properties['genres']}")

In [None]:
client.close()