### Evaluation of LLM Response

In [1]:
import os
from dotenv import load_dotenv
from langchain_openai import ChatOpenAI
from langchain.prompts import PromptTemplate

In [2]:
load_dotenv()

model_name = "gpt-3.5-turbo-0125"
openai_api_key = os.environ["OPENAI_API_KEY"]
llm = ChatOpenAI(
    model_name = model_name,
    temperature = 0.1,
    max_tokens = 2000,
    openai_api_key = openai_api_key
)

In [3]:
from langchain.vectorstores import FAISS
from langchain.chains import RetrievalQA
from langchain.document_loaders import TextLoader
from langchain.evaluation.qa import QAEvalChain
from langchain_openai import OpenAIEmbeddings

In [4]:
loader = TextLoader("./worked.txt")
document = loader.load()

print(f"You have now {len(document)} document!")
print(f"Totally {len(document[0].page_content)} characters in the document!")

You have now 1 document!
Totally 75040 characters in the document!


In [5]:
from langchain.text_splitter import RecursiveCharacterTextSplitter

text_splitter = RecursiveCharacterTextSplitter(chunk_size = 3000, chunk_overlap = 400)
documents = text_splitter.split_documents(document)

In [6]:
embeddings = OpenAIEmbeddings(openai_api_key = openai_api_key)
document_search = FAISS.from_documents(documents, embeddings)

In [7]:
chain = RetrievalQA.from_chain_type(
    llm=llm,
    chain_type="stuff",
    retriever=document_search.as_retriever(),
    input_key="question")

In [None]:
question_answers = [
    {
        "question": "which company sold the microcomputer kit that his friend built himself?",
        "answer": "Heathkit"
    },
    {
        "question": "what was the small city he talked about in the city that is the financial capital of USA?",
        "answer": "Yorkville, NY"
    },
    {
        "question": "which are the grad schools he had applied ?",
        "answer": "MIT, Yale, Harvard"
    }
]

In [20]:
predictions = chain.apply(question_answers)

predictions

[{'question': 'which company sold the microcomputer kit that his friend built himself?',
  'answer': 'Heathkit',
  'result': 'The company that sold the microcomputer kit that his friend built himself was Heathkit.'},
 {'question': 'what was the small city he talked about in the city that is the financial capital of USA?',
  'answer': 'Yorkville, NY',
  'result': 'The small city mentioned in the context is Santa Cruz, located in California. The financial capital of the USA is New York City.'},
 {'question': 'which are the grad schools he had applied ?',
  'answer': 'MIT, Yale, Harva',
  'result': 'The user applied to MIT, Yale, and Harvard for grad school.'}]

In [21]:
evaluation_chain = QAEvalChain.from_llm(llm)

In [22]:
graded_output = evaluation_chain.evaluate(
    question_answers,
    predictions,
    question_key="question",
    prediction_key="result",
    answer_key="answer"
)

In [23]:
graded_output

[{'results': 'CORRECT'}, {'results': 'INCORRECT'}, {'results': 'CORRECT'}]