In [None]:
%pip install -qU langchain==0.0.292 openai==0.28.0 datasets==2.10.1 pinecone-client==2.2.4 tiktoken==0.5.1 python-dotenv

In [None]:
import os
from langchain.chat_models import ChatOpenAI

os.environ["OPENAI_API_KEY"] = os.getenv("OPENAI_API_KEY")

chat = ChatOpenAI(
    openai_api_key=os.environ["OPENAI_API_KEY"],
    model='gpt-3.5-turbo'
)

In [None]:
from langchain.schema import (
    SystemMessage,
    HumanMessage,
    AIMessage
)

In [None]:
messages = [
    SystemMessage(content="You are a helpful assistant."),
    HumanMessage(content="Hi AI, how are you today?"),
    AIMessage(content="I'm great thank you. How can I help you?"),
    HumanMessage(content="I'd like to understand string theory.")
]

In [None]:
res = chat(messages)
print(res.content)

In [None]:
# append the previous message to the thread so history is kept
messages.append(res)

prompt = HumanMessage(
    content="Why do physicists believe it can produce a unified theory?"
)

messages.append(prompt)

In [None]:
res = chat(messages)
messages.append(res)
print(res.content)

In [None]:
prompt = HumanMessage(
    content="What is so special about Llama 2?"
)
messages.append(prompt)

res = chat(messages)
print(res.content)

# end of demo code to highlight that this GPT model doesn't have current information about Llama 2
# next section begins RAG

In [None]:
from datasets import load_dataset

dataset = load_dataset(
    "jamescalam/llama-2-arxiv-papers-chunked",
    split="train"
)

#test the dataset
dataset[0]

In [None]:
import pinecone
from dotenv import load_dotenv

load_dotenv()

PINECONE_API_KEY = os.getenv('PINECONE_API_KEY')

pinecone.init(
    api_key=PINECONE_API_KEY,
    environment="gcp-starter" #get this from pinecone next to the API key you're using
)

In [None]:
import time

index_name = "llama-2-rag"
if index_name not in pinecone.list_indexes():
    pinecone.create_index(
        index_name,
        dimension=1536,
        metric='cosine'
    )

    while not pinecone.describe_index(index_name).status['ready']:
        time.sleep(1)

index = pinecone.Index(index_name)

In [None]:
# test index connection
index.describe_index_stats()

In [None]:
from langchain.embeddings.openai import OpenAIEmbeddings

embed_model = OpenAIEmbeddings(model="text-embedding-ada-002")

In [None]:
# batch the data and send to embeddings model
# send those embeddings to pinecone
# use tqdm for progress bar

from tqdm.auto import tqdm

data = dataset.to_pandas()

batch_size = 100

for i in tqdm(range(0, len(data), batch_size)):
    i_end = min(len(data), i+batch_size)
    batch = data.iloc[i:i_end]
    ids = [f"{x['doi']}-{x['chunk-id']}" for i, x in batch.iterrows()]
    texts = [x['chunk'] for _, x in batch.iterrows()]
    embeds = embed_model.embed_documents(texts)
    metadata = [
        {'text': x['chunk'],
         'source': x['source'],
         'title': x['title']} for i, x in batch.iterrows()
    ]
    index.upsert(vectors=zip(ids, embeds, metadata))


In [None]:
index.describe_index_stats()

In [None]:
from langchain.vectorstores import Pinecone

text_field = "text"

vectorstore = Pinecone(
    index, embed_model.embed_query, text_field
)

In [None]:
query = "What is so special about Llama 2?"

vectorstore.similarity_search(query, k=3)

In [None]:
# create function to format tuned prompt w/ context
def augment_prompt(query:str):
    results = vectorstore.similarity_search(query, k=3)
    source_knowledge = "\n".join([x.page_content for x in results])
    augmented_prompt = f"""Using the contexts below, answer the query.
    
    Contexts: {source_knowledge}
    Query: {query}
    """
    return augmented_prompt

In [None]:
# test augment prompt
print(augment_prompt(query))

In [None]:
# send augmented prompt to foundation model
prompt = HumanMessage(
    content=augment_prompt(query)
)

messages.append(prompt)

res = chat(messages)

print(res.content)