## Setup

In [1]:
import csv
import re
from pinecone import Pinecone

from ollama import chat
from ollama import ChatResponse

# Initialize dotenv to load environment variables.
from dotenv import load_dotenv

load_dotenv()

True

## Create Pinecone index

In [2]:
import os

pinecone_api_key = os.getenv('PINECONE_API_KEY')

pc = Pinecone(api_key=pinecone_api_key)

index_name = "rag-example"

if not pc.has_index(index_name):
    pc.create_index_for_model(
        name=index_name,
        cloud="aws",
        region="us-east-1",
        embed={
            "model": "llama-text-embed-v2",

            "field_map": {"text": "chunk_text"}
        }
    )


## Create batches

In [3]:
import json

records_batches = {}

with open('./reddit_jokes.json', 'r') as file:
    json_reader = json.load(file)
    i = 0
    batch = 0
    for joke in json_reader:
        if i % 50 == 0 and i != 0:
            batch += 1
        records_batches.setdefault(batch, [])
        i += 1
        full_joke = joke["title"] + '\n\n' + joke["body"]
        records_batches[batch].append({
            "_id": f"rec{i}",
            "chunk_text": full_joke
        })

print(f"Number of joke record batches: {len(records_batches.keys())}")

Number of joke record batches: 28


In [4]:
import time

# Retrieve the connection to the index
dense_index = pc.Index(index_name)
pinecone_index_namespace = "reddit_jokes_namespace"

# Insert records in batches, the sleep is to avoid running into rate limit errors
for batch_index, batch_data in records_batches.items():
    dense_index.upsert_records(pinecone_index_namespace, batch_data)
    time.sleep(5)

# Print index's stats to confirm that the records were created:
stats = dense_index.describe_index_stats()
print(stats)

  from .autonotebook import tqdm as notebook_tqdm


{'dimension': 1024,
 'index_fullness': 0.0,
 'metric': 'cosine',
 'namespaces': {'reddit_jokes_namespace': {'vector_count': 1395}},
 'total_vector_count': 1395,
 'vector_type': 'dense'}


## RAG Example

In [5]:
pinecone_index_namespace = "reddit_jokes_namespace"
query = "Share with me the best jokes that involve dogs."

results = dense_index.search(
    namespace=pinecone_index_namespace,
    query={
        "top_k": 3,
        "inputs": {
            'text': query
        }
    }
)

augmentation_data = []

print(f"Got {len(results['result']['hits'])} results from the Vector Database")

for hit in results['result']['hits']:
    augmentation_data.append(hit['fields']['chunk_text'])

separator = '\n\n\n====================================\n\n\n'
augmentation_data = separator.join(augmentation_data)

response: ChatResponse = chat(model='llama3.2', messages=[
    {
        'role': 'system',
        'content': 'You are a helpful assistant',
    },
    {
        'role': 'system',
        'content': """
The user's question is:
{query}

Aproach this task step-by-step, take your time and do not skip steps.

Based on the user's question and using the following data as input:

{augmentation_data}

1. Read each paragraph from the previous augmentation text.
2. Create a response that summarizes ONLY the text relevant to the user's query.
3. Create a message so the user can laugh at the jokes found in the data found.

"""
    },
    {
        'role': 'user',
        'content': query,
    }
])

print(response['message']['content'])

Got 3 results from the Vector Database
I'd be happy to share some dog-gone good jokes with you! Here are a few:

1. Why did the dog go to the vet?

Because he was feeling ruff!

2. What did the dog say when his owner asked him if he wanted to go for a walk?

"I'm paws-itive I do!"

3. Why did the dog go to the gym?

To get a paws-itive workout!

4. What do you call a dog that does magic tricks?

A labracadabrador!

5. Why did the dog go to the beauty parlor?

Because he wanted to get a paws-itively gorgeous haircut!

6. Why did the dog go to the therapist?

Because he was feeling a little ruff around the edges!

7. What do you call a dog that's a good listener?

A retriever!

8. Why did the dog become a detective?

Because he was great at sniffing out clues!

9. What do you call a dog that's a good singer?

A howl-lywood star!

10. Why did the dog go to the amusement park?

To ride the paws-coaster!

I hope these jokes made you howl with laughter!
