In [75]:
import os
import openai
import sys
sys.path.append('../..')

import panel as pn  # GUI
pn.extension()

from dotenv import load_dotenv, find_dotenv
_ = load_dotenv(find_dotenv()) # read local .env file

openai.api_key  = os.environ['OPENAI_API_KEY']

In [76]:
import datetime
current_date = datetime.datetime.now().date()
if current_date < datetime.date(2023, 9, 2):
    llm_name = "gpt-3.5-turbo-0301"
else:
    llm_name = "gpt-3.5-turbo"
print(llm_name)

gpt-3.5-turbo


In [77]:
from langchain.vectorstores import Chroma
from langchain.embeddings.openai import OpenAIEmbeddings
persist_directory = '../docs/chroma/'
embedding = OpenAIEmbeddings()
vectordb = Chroma(persist_directory=persist_directory, embedding_function=embedding)

In [78]:
question = "What are major topics for this class?"
docs = vectordb.similarity_search(question,k=3)
len(docs)

3

In [79]:
from langchain.chat_models import ChatOpenAI
llm = ChatOpenAI(model_name=llm_name, temperature=0)
llm.predict("Hello world!")

'Hello! How can I assist you today?'

In [80]:
# Build prompt
from langchain.prompts import PromptTemplate
template = """Use the following pieces of context to answer the question at the end. If you don't know the answer, just say that you don't know, don't try to make up an answer. Use three sentences maximum. Keep the answer as concise as possible. Always say "thanks for asking!" at the end of the answer. 
{context}
Question: {question}
Helpful Answer:"""
QA_CHAIN_PROMPT = PromptTemplate(input_variables=["context", "question"],template=template,)

# Run chain
from langchain.chains import RetrievalQA
question = "Is probability a class topic?"
qa_chain = RetrievalQA.from_chain_type(llm,
                                       retriever=vectordb.as_retriever(),
                                       return_source_documents=True,
                                       chain_type_kwargs={"prompt": QA_CHAIN_PROMPT})


result = qa_chain({"query": question})
result["result"]

'Yes, probability is a topic assumed to be familiar to students in this class, as mentioned by the instructor. Thanks for asking!'

In [81]:
# trying out new question, note if i ask about "who is the TA" model does not know if i mention "Teaching Assistant of the class" then it returns the result
result = qa_chain({"query": "who are the TAs"})
result["result"]

"I don't know.\nThanks for asking!"

In [82]:
# trying out new question, note if i ask about "who is the TA" model does not know if i mention "Teaching Assistant of the class" then it returns the result
result = qa_chain({"query": "who are the Teaching assistant of the class"})
result["result"]

'The teaching assistants for the class are Paul Baumstarck, Catie Chang, Tom Do, Zico Kolter, and Daniel Ramage. Thanks for asking!'

In [83]:
# since there is no chat history the result returns i don't know it has no context about whose qualification am i asking about, so it
# either returns i am sorry or it mantions about big-O notation and knowledge of DSA probably thinking about prerequisite of the course
result = qa_chain({"query": "what is there qualification"})
result["result"]

"I don't know, as the context provided does not mention the qualifications of the individuals in the group. Thanks for asking!"

In [84]:
# Memory
from langchain.memory import ConversationBufferMemory
memory = ConversationBufferMemory(
    memory_key="chat_history",
    return_messages=True
)

In [85]:
# ConversationalRetrievalChain
from langchain.chains import ConversationalRetrievalChain
retriever=vectordb.as_retriever()
qa = ConversationalRetrievalChain.from_llm(
    llm,
    retriever=retriever,
    memory=memory
)

In [86]:
"""
Please note the answer is different from the course, unlike the course content this actually returns correct answer maybe due 
to model update, so it's better to ask the TA question that i have mentioned above that's a better test
"""
question = "Is probability a class topic?"
result = qa({"question": question})

In [87]:
result['answer']

'Yes, probability is a class topic in the course being described. The instructor assumes familiarity with basic probability and statistics, so it will likely be covered in the context of machine learning and related algorithms.'

In [88]:
question = "why are those prerequesites needed?"
result = qa({"question": question})

In [89]:
result['answer']

'Familiarity with basic probability and statistics is needed for the course because the material covered will involve concepts such as random variables, expectation, variance, and other statistical concepts. Understanding these basics is essential for grasping the machine learning concepts that will be taught in the course.'

In [96]:
question = "who are the TAs?"
result = qa({"question": question})

In [97]:
"""
This time it provided correct answer 'The TAs are Teaching Assistants who assist in teaching the class and grading homework problems.'
"""
result['answer']

'The Teaching Assistants in the course "MachineLearning-Lecture01" are:\n1. Paul Baumstarck: Works in machine learning and computer vision.\n2. Catie Chang: A neuroscientist who applies machine learning algorithms to understand the human brain.\n3. Tom Do: Works in computational biology and the basic fundamentals of human learning.\n4. Zico Kolter: Head TA, works in machine learning and applies them to robots.\n5. Daniel Ramage: Applies learning algorithms (not present during the introduction).\n\nAdditionally, students are encouraged to form study groups to help each other understand the concepts and tackle the challenging problem sets in the course. The TAs can be reached collectively at cs229-qa@cs.stanford.edu for quick responses to questions related to assignments or course material.'

In [92]:
question = "what is there qualification?"
result = qa({"question": question})

In [93]:
"""
This returns 'The qualifications of the Teaching Assistants (TAs) mentioned in the course being described are not explicitly mentioned
in the provided context.' although that's a separate issue.

Important thing to note here is that we did not mentioned in this query that we are talking about TAs but the answer still reflects
that our model understood using chat history that we are talking about TA.
"""
result['answer']

'The Teaching Assistants (TAs) in the course have backgrounds in machine learning, computer vision, neuroscience, computational biology, and human learning. They are all graduate students doing research in or related to machine learning.'

In [94]:
# ConversationalRetrievalChain
from langchain.chains import ConversationalRetrievalChain
retriever=vectordb.as_retriever()
qa = ConversationalRetrievalChain.from_llm(
    llm,
    retriever=retriever,
    memory=memory,
    chain_type="refine"
)

question = "what is there qualification of Teaching Assistants?"
result = qa({"question": question})


In [95]:
"""
Unlike the answer from above when we used refine chain type we got correct answer about qualification of TAs, this question
was only written by me here to see difference between stuffed doc vs refine chain type (this has nothing to do with the chat history
limitation
"""
result['answer']

'Based on the new context provided, the Teaching Assistants in the course have qualifications related to machine learning and various other fields. The TAs are all graduate students conducting research in or related to machine learning. Specifically, Paul Baumstarck works in machine learning and computer vision, Catie Chang is a neuroscientist applying machine learning algorithms to understand the human brain, Tom Do works in computational biology and human learning fundamentals, and Zico Kolter, the head TA, applies machine learning to robots. Daniel Ramage also applies learning algorithms, although he is not present in the current discussion.'