In [1]:
from dotenv import load_dotenv , find_dotenv

_ = load_dotenv(find_dotenv())

In [2]:
from langchain.vectorstores import Chroma
from langchain.embeddings.openai import OpenAIEmbeddings

openai_embedding = OpenAIEmbeddings()
vectordb = Chroma(embedding_function=openai_embedding,persist_directory="db/chroma/")

In [3]:
# basic similarity search
question = "What are major subjects for this class?"
similar_docs = vectordb.similarity_search(question,k = 4)

In [4]:
# language model
from langchain.chat_models import ChatOpenAI
llm = ChatOpenAI(temperature=0.0)

llm.predict("hello")

'Hello! How can I assist you today?'

In [5]:
# build a prompt using template 

from langchain.prompts import PromptTemplate
qa_template = """
Use the context delimited by ``` to answer the question at the end.
Note: If you cannot answer the question, respond with "Not enough information in Provided Context", do not make up an answer on your own.
Context : 
```{context}```
Question : {question}
Answer is : 
"""

QA_PROMPT = PromptTemplate.from_template(qa_template)

In [16]:
from langchain.chains import RetrievalQA

question = "Is probability a class topic ?"

qa_chain = RetrievalQA.from_chain_type(
    llm=llm,
    chain_type="stuff",
    retriever = vectordb.as_retriever(),
    return_source_documents = True,
    chain_type_kwargs={"prompt":QA_PROMPT}
)


In [17]:
import langchain
langchain.debug = True
result = qa_chain({"query":question})

[32;1m[1;3m[chain/start][0m [1m[1:chain:RetrievalQA] Entering Chain run with input:
[0m{
  "query": "Is probability a class topic ?"
}
[32;1m[1;3m[chain/start][0m [1m[1:chain:RetrievalQA > 3:chain:StuffDocumentsChain] Entering Chain run with input:
[0m[inputs]
[32;1m[1;3m[chain/start][0m [1m[1:chain:RetrievalQA > 3:chain:StuffDocumentsChain > 4:chain:LLMChain] Entering Chain run with input:
[0m{
  "question": "Is probability a class topic ?",
  "context": "of this class will not be very program ming intensive, although we will do some \nprogramming, mostly in either MATLAB or Octa ve. I'll say a bit more about that later.  \nI also assume familiarity with basic proba bility and statistics. So most undergraduate \nstatistics class, like Stat 116 taught here at Stanford, will be more than enough. I'm gonna \nassume all of you know what ra ndom variables are, that all of you know what expectation \nis, what a variance or a random variable is. And in case of some of you, it'

In [19]:
print(result['result'])

Yes, probability is a class topic.


Add memory to qa_retrieval

In [20]:
from langchain.memory import ConversationBufferMemory
memory = ConversationBufferMemory(
    memory_key="chat_history",
    return_messages=True
)

In [30]:
from langchain.chains import ConversationalRetrievalChain
retriever=vectordb.as_retriever()
qa_chat = ConversationalRetrievalChain.from_llm( # takes history and new question and condenses it into a new qustion , to pass to vector store
    llm,
    retriever=retriever,
    memory=memory
)

In [23]:
langchain.debug = False
question = "Is probability a class topic?"
result = qa_chat({"question": question})

In [26]:
print(result['answer'])

Yes, probability is covered in this class. The instructor assumes familiarity with basic probability and statistics.


In [28]:
question = "Why are those prerequisites needed?"
result = qa_chat({"question":question})

In [29]:
print(result['answer'])

The instructor assumes familiarity with basic probability and statistics because these concepts are fundamental to understanding and applying machine learning algorithms. Probability and statistics provide the foundation for understanding uncertainty, making predictions, and evaluating the performance of machine learning models. Without a basic understanding of these concepts, it would be challenging to grasp the underlying principles and techniques used in machine learning.
