# Lesson 2 - Retrieval Augmented Generation (RAG)

### Import the Needed Packages

In [1]:
import warnings
warnings.filterwarnings('ignore')

from datasets import load_dataset
from openai import OpenAI
from pinecone import Pinecone, ServerlessSpec
from tqdm.auto import tqdm
from DLAIUtils import Utils

import ast
import os
import pandas as pd

In [2]:
# get api key
utils = Utils()
PINECONE_API_KEY = utils.get_pinecone_api_key()

### Setup Pinecone

In [3]:
# pinecone = Pinecone(api_key=PINECONE_API_KEY)

# # Create a Pinecone index (vector DB)
# utils = Utils()
# INDEX_NAME = utils.create_dlai_index_name('dl-ai')
# if INDEX_NAME in [index.name for index in pinecone.list_indexes()]:
#   pinecone.delete_index(INDEX_NAME)

# pinecone.create_index(name=INDEX_NAME, dimension=1536, metric='cosine',
#   spec=ServerlessSpec(cloud='aws', region='us-west-2'))

# index = pinecone.Index(INDEX_NAME)

### Load the Dataset

<p style="background-color:#fff1d7; padding:15px; "> <b>(Note: <code>max_articles_num = 500</code>):</b> To achieve a more comprehensive context for the Language Learning Model, a larger number of articles is generally more beneficial. In this lab, we've initially set <code>max_articles_num</code> to 500 for speedier results, allowing you to observe the outcomes faster. Once you've done an initial run, consider increasing this value to 750 or 1,000. You'll likely notice that the context provided to the LLM becomes richer and better. You can experiment by gradually raising this variable for different queries to observe the improvements in the LLM's contextual understanding.</p>

In [4]:
max_articles_num = 500
# This corpus has already been embedded
df = pd.read_csv('./data/wiki.csv', nrows=max_articles_num)
df.head()

Unnamed: 0,id,metadata,values
1,1-0,"{'chunk': 0, 'source': 'https://simple.wikiped...","[-0.011254455894231796, -0.01698738895356655, ..."
2,1-1,"{'chunk': 1, 'source': 'https://simple.wikiped...","[-0.0015197008615359664, -0.007858820259571075..."
3,1-2,"{'chunk': 2, 'source': 'https://simple.wikiped...","[-0.009930099360644817, -0.012211072258651257,..."
4,1-3,"{'chunk': 3, 'source': 'https://simple.wikiped...","[-0.011600767262279987, -0.012608098797500134,..."
5,1-4,"{'chunk': 4, 'source': 'https://simple.wikiped...","[-0.026462381705641747, -0.016362832859158516,..."


### Prepare the Embeddings and Upsert to Pinecone

In [5]:
# prepped = []
# for i, row in tqdm(df.iterrows(), total=df.shape[0]):
#     meta = ast.literal_eval(row['metadata'])
#     prepped.append({'id':row['id'], 
#                     'values':ast.literal_eval(row['values']), 
#                     'metadata':meta})
#     if len(prepped) >= 250:
#         index.upsert(prepped)
#         prepped = []

In [6]:
# index.describe_index_stats()
# # Check that the dimension is same as that of text-embedding-ada-002 (OpenAI)

### Store in local vector DB

In [7]:
# Store in the local vector DB
import chromadb

persist_directory = '../docs/chroma-pinecone/'
!rm -rf ../docs/chroma-pinecone/  # remove old database files if any

client = chromadb.PersistentClient(path=persist_directory)
collection = client.get_or_create_collection(name="dl-ai")

In [8]:
embeddings = []
documents = []
metadatas = []
ids = []
for i, row in tqdm(df.iterrows(), total=df.shape[0]):
    meta = ast.literal_eval(row['metadata'])
    ids.append(row["id"])
    embeddings.append(ast.literal_eval(row["values"]))
    metadatas.append(meta)
    documents.append(meta["text"])
    
collection.add(embeddings=embeddings, metadatas=metadatas,
               ids=ids, documents=documents)

  0%|          | 0/500 [00:00<?, ?it/s]

In [9]:
# collection.peek(1)

### Connect to OpenAI

In [10]:
OPENAI_API_KEY = utils.get_openai_api_key()
openai_client = OpenAI(api_key=OPENAI_API_KEY)

def get_embeddings(articles, model="text-embedding-ada-002"):
   return openai_client.embeddings.create(input = articles, model=model)

In [11]:
from langchain_community.vectorstores import Chroma
from langchain_community.embeddings.openai import OpenAIEmbeddings

embedding_function = OpenAIEmbeddings(model="text-embedding-ada-002")

vectordb = Chroma(
    client=client,
    collection_name="dl-ai",
    embedding_function=embedding_function
)

print("There are", vectordb._collection.count(), "in the collection")

There are 500 in the collection


  warn_deprecated(


### Run Your Query

In [12]:
# query = "What is the Babylon gate?"

# embed = get_embeddings([query])
# res = index.query(vector=embed.data[0].embedding, top_k=3, include_metadata=True)
# text = [r['metadata']['text'] for r in res['matches']]
# print('\n'.join(text))

In [13]:
def pretty_print_docs(docs):
    print(f"\n{'-' * 100}\n".join([f"Document {i+1}:\n\n" + 
                                   d.page_content for i, d in enumerate(docs)]))

In [14]:
question = "What is the Babylon gate?"

res = vectordb.similarity_search(question, k=3)
# text = [r.page_content for r in res]
# print('\n'.join(text))
pretty_print_docs(res)

Document 1:

Ezekiel 28:13-14
13. Thou hast been in Eden the garden of God; every precious stone was thy covering, the sardius, topaz, and the diamond, the beryl, the onyx, and the jasper, the sapphire, the emerald, and the carbuncle and gold: the workmanship of thy tabrets and of thy pipes was prepared in thee in the day that thou wast created.
14. Thou art the anointed cherub that covereth; and I have set thee so: thou wast upon the holy mountain of God; thou hast walked up and down in the midst of the stones of fire.

It describes the sound of their wings, "like the roar of rushing waters."

Ezekiel 10:5-7 ; Ezekiel 10:8 reveals that they have hands like a man under their wings .

Ezekiel 1:7 KJV reveals that they look like man but are different because they have "straight feet" and four wings and four faces.

Ezekiel ch 1, and 10 describe the cherubim creatures ascending and descending from the earth with wheels. Ezekiel 1:14-20 ; Ezekiel 10:16

Ezekiel 10:9-13 describes what the w

### Try to compress the contexts

In [15]:
from langchain.retrievers import ContextualCompressionRetriever
from langchain.retrievers.document_compressors import LLMChainExtractor
from langchain.chat_models import ChatOpenAI

# Invoke the LLM
llm_name = "gpt-3.5-turbo"
llm = ChatOpenAI(model_name=llm_name, temperature=0)

# Wrap our vectorstore
compressor = LLMChainExtractor.from_llm(llm)

compression_retriever = ContextualCompressionRetriever(
    base_compressor=compressor,
    base_retriever=vectordb.as_retriever(search_kwargs={"k": 3})
)

  warn_deprecated(


In [19]:
question = "What is the Berlin wall?"

compressed_docs = compression_retriever.get_relevant_documents(question)

In [21]:
# compressed_docs
pretty_print_docs(compressed_docs)

Document 1:

Berlin Wall
----------------------------------------------------------------------------------------------------
Document 2:

The city was split into West Berlin and East Berlin after World War Two. After the Berlin Wall was built in 1961 very few people were allowed to cross from East Berlin into West Berlin. The wall divided the city until 1989 when the East German government decided to allow anyone to cross, and people decided to tear down the wall.
----------------------------------------------------------------------------------------------------
Document 3:

The Berlin Wall was built by the communist government of East Germany between the two halves of Berlin.


### Build the Prompt

In [31]:
# query = "write an article titled: what is the Babylon gate?"
query = "Write a short article titled: What is the Berlin wall?"

# embed = get_embeddings([query])
# res = index.query(vector=embed.data[0].embedding, top_k=3, include_metadata=True)

# contexts = [
#     x['metadata']['text'] for x in res['matches']
# ]

contexts = [d.page_content for d in compressed_docs]

prompt_start = (
    "Answer the question based on the context below.\n\n"+
    "Context:\n"
)

prompt_end = (
    f"\n\nQuestion: {query}\nAnswer:"
)

prompt = (
    prompt_start + "\n\n---\n\n".join(contexts) + 
    prompt_end
)

print(prompt)

Answer the question based on the context below.

Context:
Berlin Wall

---

The city was split into West Berlin and East Berlin after World War Two. After the Berlin Wall was built in 1961 very few people were allowed to cross from East Berlin into West Berlin. The wall divided the city until 1989 when the East German government decided to allow anyone to cross, and people decided to tear down the wall.

---

The Berlin Wall was built by the communist government of East Germany between the two halves of Berlin.

Question: Write a short article titled: What is the Berlin wall?
Answer:


### Get the Summary 

In [24]:
res = openai_client.completions.create(
    model="gpt-3.5-turbo-instruct",
    prompt=prompt,
    temperature=0,
    max_tokens=636,
    top_p=1,
    frequency_penalty=0,
    presence_penalty=0,
    stop=None
)
print('-' * 80)
print(res.choices[0].text)

--------------------------------------------------------------------------------
 The Berlin Wall was a physical barrier that divided the city of Berlin into two halves, East Berlin and West Berlin, after World War Two. It was built in 1961 by the communist government of East Germany, in an effort to prevent people from fleeing to the democratic West. The wall was heavily guarded and very few people were allowed to cross from East Berlin into West Berlin. This division lasted for almost three decades until 1989, when the East German government made the historic decision to allow anyone to cross the wall. This led to a momentous event in history, as people from both sides came together to tear down the wall, symbolizing the end of the Cold War and the reunification of Germany. The Berlin Wall serves as a reminder of the political and social tensions that existed during the Cold War era, and its fall represents the triumph of freedom and unity over division and oppression. Today, fragmen

In [25]:
# Try on Mistral 7B model (offline)
from llama_cpp import Llama

model_path = "/Users/trucvietle/Downloads/llm-models/mistral-7b-instruct-v0.1.Q6_K.gguf"
llm = Llama(model_path=model_path,
            n_ctx=8192, n_batch=512,
            n_threads=7, n_gpu_layers=2,
            verbose=False, seed=42)

llama_model_loader: loaded meta data with 20 key-value pairs and 291 tensors from /Users/trucvietle/Downloads/llm-models/mistral-7b-instruct-v0.1.Q6_K.gguf (version GGUF V2)
llama_model_loader: Dumping metadata keys/values. Note: KV overrides do not apply in this output.
llama_model_loader: - kv   0:                       general.architecture str              = llama
llama_model_loader: - kv   1:                               general.name str              = mistralai_mistral-7b-instruct-v0.1
llama_model_loader: - kv   2:                       llama.context_length u32              = 32768
llama_model_loader: - kv   3:                     llama.embedding_length u32              = 4096
llama_model_loader: - kv   4:                          llama.block_count u32              = 32
llama_model_loader: - kv   5:                  llama.feed_forward_length u32              = 14336
llama_model_loader: - kv   6:                 llama.rope.dimension_count u32              = 128
llama_model_loader:

In [30]:
# prompt = "What is the Babylon gate?"
# output = llm(prompt, echo=True, stream=False, max_tokens=4096)

# output_str = output["choices"][0]["text"].replace(prompt, "")
# print(output_str)

In [35]:
output = llm(prompt, echo=True, stream=False, max_tokens=4096)

output_str = output["choices"][0]["text"].replace(prompt, "")
print(output_str)

 The Berlin Wall was a physical barrier that separated the city of Berlin into two halves, West Berlin and East Berlin after the end of World War Two. It was constructed by the communist government of East Germany in 1961 and remained standing for almost 30 years until it was finally torn down in 1989. For most of its existence very few people were allowed to cross from East Berlin into West Berlin, making it a symbol of division and separation. However, when the East German government decided to allow anyone to cross in 1989, thousands of people flocked to the wall and began tearing it down with their own hands. Today, the Berlin Wall stands as a reminder of the Cold War era and a testament to the power of human freedom.
