In [36]:
%pip install llama-index-embeddings-openai
%pip install llama-index-vector-stores-pinecone
%pip install llama-index-llms-openai

Note: you may need to restart the kernel to use updated packages.
Note: you may need to restart the kernel to use updated packages.
Note: you may need to restart the kernel to use updated packages.


In [16]:
# pip install llama-index
# pip -q install python-dotenv pinecone-client llama-index pymupdf

# Setup

In [67]:
import pinecone
from pinecone import Pinecone, ServerlessSpec
import os
from dotenv import load_dotenv
import fitz
from llama_index.core.node_parser import SentenceSplitter
from llama_index.core.schema import TextNode
from llama_index.embeddings.openai import OpenAIEmbedding
from llama_index.core import VectorStoreIndex
from llama_index.core import StorageContext

In [38]:
load_dotenv(dotenv_path='secrets.env')

True

In [54]:
api_key = os.environ['PINECONE_API_KEY']
environment = os.environ['PINECONE_ENVIRONMENT']
# print(api_key)
pinecone = Pinecone(api_key=api_key, environment=environment)
index_name = "llamaindex-rag-fs"

In [55]:
if index_name not in pinecone.list_indexes().names():
    pinecone.create_index(
        index_name, dimension=1536, metric="euclidean", spec=ServerlessSpec(cloud='aws', region='us-east-1') 
    ) # text-embedding-ada-002
pinecone_index = pinecone.Index(index_name)

## Create PineconeVectorStore

In [56]:
from llama_index.vector_stores.pinecone import PineconeVectorStore
vector_store = PineconeVectorStore(pinecone_index=pinecone_index)

## Load Data

In [None]:
file_path = "./data/llama2.pdf"
doc = fitz.open(file_path)
# print(doc.get_page_text(0))

## Text splitter

In [57]:
text_parser = SentenceSplitter(
    chunk_size = 1024
)

In [58]:
text_chunks = []
doc_idxs = []
for doc_idx, page in enumerate(doc):
    page_text = page.get_text("text")
    cur_text_chunks = text_parser.split_text(page_text)
    text_chunks.extend(cur_text_chunks)               # textfrompg1 textfrompg1 textfrompg1 textfrompg2 textfrompg2
    doc_idxs.extend([doc_idx] * len(cur_text_chunks)) # 1 1 1 2 2

# Construct Nodes from Text chunks
Convert chunks into `TextNode` objects.

In [59]:
nodes = []
for idx, text_chunk in enumerate(text_chunks):
    node = TextNode(
        text = text_chunk,
    )
    src_doc_idx = doc_idxs[idx]
    src_page = doc[src_doc_idx]
    nodes.append(node)

In [61]:
from llama_index.core.extractors import (
    QuestionsAnsweredExtractor,
    TitleExtractor,
)
from llama_index.core.ingestion import IngestionPipeline
from llama_index.llms.openai import OpenAI

llm = OpenAI(model="gpt-3.5-turbo")

extractors = [
    TitleExtractor(nodes=5, llm=llm),
    QuestionsAnsweredExtractor(questions=3, llm=llm),
]

In [62]:
pipeline = IngestionPipeline(
    transformations=extractors,
)
nodes = await pipeline.arun(nodes=nodes, in_place=False)

100%|██████████| 5/5 [00:01<00:00,  4.27it/s]
100%|██████████| 107/107 [00:51<00:00,  2.08it/s]


In [63]:
print(nodes[0].metadata)


{'document_title': 'Advancements in Llama 2-Chat: Pretraining, Fine-Tuning, Safety, and Responsible Release Strategy', 'questions_this_excerpt_can_answer': '1. How does the performance of Llama 2-Chat models compare to open-source chat models on various benchmarks?\n2. What safety improvements have been implemented in Llama 2-Chat models, as evaluated through human assessments?\n3. What is the scale range of the pretrained and fine-tuned large language models (LLMs) in the Llama 2 collection, and how do they differ in terms of parameters?'}


## Generate embeddings for each node

In [64]:
embed_model = OpenAIEmbedding()

In [65]:
for node in nodes:
    node_embedding = embed_model.get_text_embedding(
        node.get_content(metadata_mode="all")
    )
    node.embedding = node_embedding

## Load nodes into Vector store

In [66]:
vector_store.add(nodes)

Upserted vectors:   0%|          | 0/107 [00:00<?, ?it/s]

['18cdea43-0844-4a87-8433-a8d9fe47ee1e',
 'd88b79ae-0bd2-4d7d-807d-20a14a6ea65b',
 '9e78f7cc-e143-4913-87ff-87621c596ba1',
 '31a652ef-65d1-41fb-b511-b0cf33b51621',
 '18a10c4a-cd40-423b-9f65-b35b0c4fe111',
 'dd11b4f5-5f52-43b3-a0d5-5d588d005d2e',
 '4bd965ca-4539-460b-a746-cbe7b4944f5e',
 '7d841bdc-5d3f-42c3-bb01-abf694c1c920',
 'f3071bf0-4952-47a4-9d69-fd1631351407',
 'b82c1188-4a65-4e2f-ba93-430018c0ae57',
 'd4df8199-9385-4ce7-81e4-df32e89bf07a',
 '408928b8-87dd-43db-a786-26d590662b42',
 'a0af831d-177f-4c73-b635-14570b4b69b3',
 'edbc57f2-138a-477c-9abf-4b2a4ffbc526',
 'e217715a-66ed-464e-bea6-d7ca8d05b915',
 '9b20d0c7-4b46-4e9f-ab95-360d8ebe27bc',
 '8ebbc4fe-0e29-4e57-acd7-19c712f6480e',
 'f37beb49-b66b-44f1-b1ff-13569a7d6b66',
 'ce8d3c95-1dfd-44f9-ae27-50861db562bc',
 'eb49430c-44dd-4dc7-b623-4694a5462ac6',
 '3bce9f19-9dd2-4c6e-b424-41fad24fab71',
 '4cf59ae2-e93e-42a3-9e92-de9ae75594d1',
 '471a7bed-dbba-45f9-8c46-929117af104e',
 'b02d5b87-e1f5-4168-93d2-820e833b4fc4',
 '8e6d8dd9-8f32-

# Retrieve and Query from Vector Store

In [73]:
index = VectorStoreIndex.from_vector_store(vector_store)
query_engine = index.as_query_engine()
query_str = "what are the sizes of the variants of Llama-2 that they are releasing"

In [74]:
response = query_engine.query(query_str)
print(str(response))

The sizes of the variants of Llama-2 that are being released are 7B, 13B, and 70B.
