In [21]:
from dotenv import load_dotenv
import os
import openai
import time
load_dotenv() 
openai.api_key = os.getenv('OPEN_AI')

In [14]:
'''
This cell imports the dependencies and creates the service context.
The service context can either be OpenAI or Tiny-Vicuna-1B
'''
from llama_index import VectorStoreIndex, SimpleDirectoryReader
from llama_index import ServiceContext, llms, PromptTemplate, set_global_service_context

# For chatGPT
service_context = ServiceContext.from_defaults(
    llm=llms.OpenAI(temperature=0.20, model="gpt-3.5-turbo")
)

# For Vicuna
# service_context = ServiceContext.from_defaults(
#   llm = llms.HuggingFaceLLM(
#     model_name = "Jiayi-Pan/Tiny-Vicuna-1B",
#     tokenizer_name = "Jiayi-Pan/Tiny-Vicuna-1B",
#     query_wrapper_prompt = PromptTemplate("<|system|>\nYou are a chatbot who can help students!</s>\n<|user|>\n{query_str}</s>\n<|assistant|>\n"),
#     device_map = "balanced"
#   ),
#   embed_model = llms.HuggingFaceLLM(model_name='microsoft/mpnet-base',
#                                     tokenizer_name='microsoft/mpnet-base')
# )


In [15]:
set_global_service_context(service_context)
# Create the vector store
documents = SimpleDirectoryReader("documents").load_data()
index = VectorStoreIndex.from_documents(documents, service_context=service_context,
                                        show_progress=True)
# Save the vector store to file
index.storage_context.persist('./index_storage/')
# Build the query engine
query_engine = index.as_query_engine(
    response_mode="tree_summarize",
    similarity_top_k=3
)

Parsing nodes: 100%|██████████| 7/7 [00:00<00:00, 631.92it/s]
Generating embeddings: 100%|██████████| 7/7 [00:00<00:00, 19.21it/s]


In [43]:
# Define a function to put it all together and run the function
def get_response(query):
    st = time.time()
    response = query_engine.query(query)
    time_elapsed = time.time()-st
    page_numbers = [response.metadata[i]['page_label'] for i in response.metadata]
    document_labels = set([response.metadata[i]['file_name'] for i in response.metadata])
    return response.response, time_elapsed, page_numbers, document_labels

get_response('How is the class graded?')

('The class is graded based on activities and a final project. There will be a total of 12 activities throughout the semester, but only the 10 best activity grades will count towards the final grade. Each activity is worth 14 points, making up 70% of the final grade. The final project is worth 60 points, which accounts for 30% of the final grade. The grading scale is provided, ranging from A to F, with specific percentage ranges for each grade. Late work is accepted but will be worth 80% of the overall grade for the activity.',
 2.421814441680908,
 ['2', '3', '6'],
 {'PSY-GS-8875_Syllabus_S2024.pdf'})

In [41]:
# Load the vector store from file and run the query again
from llama_index import StorageContext, load_index_from_storage
storage_context = StorageContext.from_defaults(
    persist_dir='./index_storage/'
)
index = load_index_from_storage(storage_context)
get_response('How is the class graded?')