In [31]:
#importing necessary libraries
import os
from dotenv import load_dotenv
from openai import OpenAI
from pinecone import Pinecone, ServerlessSpec
from llama_parse import LlamaParse
from llama_index.core import SimpleDirectoryReader, Document
from llama_index.core import VectorStoreIndex
from llama_index.vector_stores.pinecone import PineconeVectorStore
import nest_asyncio
load_dotenv()

True

In [None]:
#Llama Parsing
parser = LlamaParse(
    api_key=os.getenv("LLAMA_CLOUD_API_KEY"),
    result_type="markdown"  # "markdown" and "text" are available
)
file_extractor = {".pdf": parser}
nest_asyncio.apply()

In [21]:
#Llama Parsing
parser = LlamaParse(
    api_key=os.getenv("LLAMA_CLOUD_API_KEY"),
    result_type="markdown"  # "markdown" and "text" are available
)
file_extractor = {".pdf": parser}
nest_asyncio.apply()

In [33]:
#Open AI setup
client = OpenAI(api_key=os.getenv("OPENAI_API_KEY"))

#Pinecone setup
pc = Pinecone(api_key=os.getenv("PINECONE_API_KEY"))

#Pinecone index name
index_name = 'ai-tutor'
#Add the embeddings to the index
#Did this if statement because earlier I was re-running the code and it was outputting error so I added this if statement to check if the index is already created or not
if index_name not in pc.list_indexes().names():
    pc.create_index(
        name=index_name,
        dimension=1536,
        metric='cosine',
        spec=ServerlessSpec(
            cloud='aws',
            region='us-east-1'
        )
    )


In [19]:
#Processing the PDF file
def process_pdf(file_path):
    documents = SimpleDirectoryReader(input_files=file_path, file_extractor=file_extractor).load_data()
    vector_store = PineconeVectorStore(pinecone_index=pc.Index(index_name))
    index = VectorStoreIndex(vector_store=vector_store)
    return index

#Generate a response 
def generate_response(query, index):
    retriever = index.as_retriever(similarity_top_k=3)
    retrieved_nodes = retriever.retrieve(query)
    context='\n'.join([node.node.text for node in retrieved_nodes])
    prompt = f"Context:\n{context}\n\nQuestion: {query}\n\nAnswer:"
    completion = client.chat.completions.create(
        model='gpt-4-o-mini',
        messages=[
            {"role": "system", "content": "You are a knowledgeable AI tutor. Use the provided context to answer the question."},
            {"role": "user", "content": prompt}
        ]
    )

    return completion.choices[0].message.content


#This function will be called in future NextJS API route to process the PDF
def handle_pdf(filepath):
    index = process_pdf(filepath)
    return 'PDF processed successfully'


#This function will be called from API route as well, as the output
def handle_query(query):
    index = VectorStoreIndex.from_vector_store(PineconeVectorStore(pinecone_index=pc.Index(index_name)))
    response=generate_response(query, index)
    return response





