In [1]:
# Evaluation 
%reset -f
# Evaluating a LLM-based application using LLMs
def get_api_key():
    import os
    from dotenv import load_dotenv
    # Unset OPENAI_API_KEY if it exists
    os.environ.pop('OPENAI_API_KEY', None)
    # Provide path and ignore currently set environment variable
    dotenv_path = '.env'
    load_dotenv(dotenv_path=dotenv_path, override=True)
    # Access the environment variable
    api_key = os.getenv('OPENAI_API_KEY')
    if api_key is None:
        print("API_KEY not found.")
    return api_key

In [25]:
import openai
openai.api_key = get_api_key()
import langchain
import docarray
import tiktoken

import sys
print(f'python version = {sys.version}')

import langchain_community
print(f'langchain version = {langchain.__version__}')
print(f'openai version = {openai.__version__}')
print(f'docarray version = {docarray.__version__}')
print(f'LangChain Community Version = {langchain_community.__version__}')

# *** Course dependencies ***
# python version = 3.11.6 (main, Feb  2 2024, 13:53:38) [GCC 11.4.0]
# langchain version = 0.1.5
# openai version = 1.10.0
# docarray version = 0.32.1
# LangChain Community Version = 0.0.1
# *** Course dependencies ***

python version = 3.11.6 (main, Feb  2 2024, 13:53:38) [GCC 11.4.0]
langchain version = 0.1.5
openai version = 1.10.0
docarray version = 0.32.1
LangChain Community Version = 0.0.17


In [3]:
llm_model = "gpt-3.5-turbo"

In [4]:
from langchain.chains import RetrievalQA
from langchain.chat_models import ChatOpenAI
from langchain.document_loaders import CSVLoader
from langchain.indexes import VectorstoreIndexCreator
from langchain.vectorstores import DocArrayInMemorySearch

In [5]:
file = 'OutdoorClothingCatalog_1000.csv'
loader = CSVLoader(file_path=file)
csv_data = loader.load()

In [6]:
index = VectorstoreIndexCreator(
    vectorstore_cls=DocArrayInMemorySearch
).from_loaders([loader])

  warn_deprecated(


In [7]:
llm = ChatOpenAI(temperature = 0.0, model=llm_model)

retrievalQA_chain = RetrievalQA.from_chain_type(
    llm=llm, 
    chain_type="stuff", 
    retriever=index.vectorstore.as_retriever(), 
    verbose=True,
    chain_type_kwargs = {
        "document_separator": "<<<<>>>>>"
    }
)

  warn_deprecated(


In [8]:
# Sample document from CSV

csv_data[10]

Document(page_content=": 10\nname: Cozy Comfort Pullover Set, Stripe\ndescription: Perfect for lounging, this striped knit set lives up to its name. We used ultrasoft fabric and an easy design that's as comfortable at bedtime as it is when we have to make a quick run out.\n\nSize & Fit\n- Pants are Favorite Fit: Sits lower on the waist.\n- Relaxed Fit: Our most generous fit sits farthest from the body.\n\nFabric & Care\n- In the softest blend of 63% polyester, 35% rayon and 2% spandex.\n\nAdditional Features\n- Relaxed fit top with raglan sleeves and rounded hem.\n- Pull-on pants have a wide elastic waistband and drawstring, side pockets and a modern slim leg.\n\nImported.", metadata={'source': 'OutdoorClothingCatalog_1000.csv', 'row': 10})

In [9]:
# Sample document from CSV

csv_data[11]

Document(page_content=': 11\nname: Ultra-Lofty 850 Stretch Down Hooded Jacket\ndescription: This technical stretch down jacket from our DownTek collection is sure to keep you warm and comfortable with its full-stretch construction providing exceptional range of motion. With a slightly fitted style that falls at the hip and best with a midweight layer, this jacket is suitable for light activity up to 20° and moderate activity up to -30°. The soft and durable 100% polyester shell offers complete windproof protection and is insulated with warm, lofty goose down. Other features include welded baffles for a no-stitch construction and excellent stretch, an adjustable hood, an interior media port and mesh stash pocket and a hem drawcord. Machine wash and dry. Imported.', metadata={'source': 'OutdoorClothingCatalog_1000.csv', 'row': 11})

In [10]:
# Manually create question / answer pairs for evaluation
evaluation_queries = [
    {
        "query": "Do the Cozy Comfort Pullover Set have side pockets?",
        "answer": "Yes"
    },
    {
        "query": "What collection is the Ultra-Lofty 850 Stretch Down Hooded Jacket from?",
        "answer": "The DownTek collection"
    }
]

In [11]:
# So we can import the QA generation chain. And this will take in documents and it will create a 
# question answer pair from each document. It'll do this using a language model itself.

from langchain.evaluation.qa import QAGenerateChain

In [12]:
# Generate questions and answers based on data

evalution_qa_pair_chain = QAGenerateChain.from_llm(ChatOpenAI(model=llm_model))

In [13]:
# wrap product description in list of dict with `doc` key
evalution_qa_pairs = evalution_qa_pair_chain.apply_and_parse(
    [{"doc": t} for t in csv_data[:5]]
)
for i in range(0,len(evalution_qa_pairs)):
    print(evalution_qa_pairs[i])
evalution_qa_pairs = [ q['qa_pairs'] for q in evalution_qa_pairs]



{'qa_pairs': {'query': "What is the description of the Women's Campside Oxfords?", 'answer': "The Women's Campside Oxfords are ultracomfortable lace-to-toe Oxfords made of super-soft canvas. They have thick cushioning and quality construction, providing a broken-in feel from the first time they are worn."}}
{'qa_pairs': {'query': 'What are the dimensions of the small size of the Recycled Waterhog Dog Mat, Chevron Weave?', 'answer': 'The dimensions of the small size of the Recycled Waterhog Dog Mat, Chevron Weave are 18" x 28".'}}
{'qa_pairs': {'query': "What are some features of the Infant and Toddler Girls' Coastal Chill Swimsuit?", 'answer': "Some features of the Infant and Toddler Girls' Coastal Chill Swimsuit include bright colors, ruffles, exclusive whimsical prints, four-way-stretch and chlorine-resistant fabric, UPF 50+ rated fabric for sun protection, crossover no-slip straps, fully lined bottom for a secure fit and maximum coverage, and it can be machine washed and line dried 

In [14]:
for i in range(0,len(evalution_qa_pairs)):
    print(evalution_qa_pairs[i])


{'query': "What is the description of the Women's Campside Oxfords?", 'answer': "The Women's Campside Oxfords are ultracomfortable lace-to-toe Oxfords made of super-soft canvas. They have thick cushioning and quality construction, providing a broken-in feel from the first time they are worn."}
{'query': 'What are the dimensions of the small size of the Recycled Waterhog Dog Mat, Chevron Weave?', 'answer': 'The dimensions of the small size of the Recycled Waterhog Dog Mat, Chevron Weave are 18" x 28".'}
{'query': "What are some features of the Infant and Toddler Girls' Coastal Chill Swimsuit?", 'answer': "Some features of the Infant and Toddler Girls' Coastal Chill Swimsuit include bright colors, ruffles, exclusive whimsical prints, four-way-stretch and chlorine-resistant fabric, UPF 50+ rated fabric for sun protection, crossover no-slip straps, fully lined bottom for a secure fit and maximum coverage, and it can be machine washed and line dried for best results."}
{'query': 'What is th

In [15]:
csv_data[0]

Document(page_content=": 0\nname: Women's Campside Oxfords\ndescription: This ultracomfortable lace-to-toe Oxford boasts a super-soft canvas, thick cushioning, and quality construction for a broken-in feel from the first time you put them on. \n\nSize & Fit: Order regular shoe size. For half sizes not offered, order up to next whole size. \n\nSpecs: Approx. weight: 1 lb.1 oz. per pair. \n\nConstruction: Soft canvas material for a broken-in feel and look. Comfortable EVA innersole with Cleansport NXT® antimicrobial odor control. Vintage hunt, fish and camping motif on innersole. Moderate arch contour of innersole. EVA foam midsole for cushioning and support. Chain-tread-inspired molded rubber outsole with modified chain-tread pattern. Imported. \n\nQuestions? Please contact us for any inquiries.", metadata={'source': 'OutdoorClothingCatalog_1000.csv', 'row': 0})

In [16]:
# Combine manually create QA pairs with generated pairs
evaluation_queries += evalution_qa_pairs

In [17]:
print(evaluation_queries[0])
# Convenience method for executing chain
retrievalQA_chain.run(evaluation_queries[0]["query"])

{'query': 'Do the Cozy Comfort Pullover Set have side pockets?', 'answer': 'Yes'}


[1m> Entering new RetrievalQA chain...[0m


  warn_deprecated(



[1m> Finished chain.[0m


'Yes, the Cozy Comfort Pullover Set does have side pockets.'

In [18]:
# Turn on debug mode

import langchain
langchain.debug = True
langchain.debug = False

print(evaluation_queries)

[{'query': 'Do the Cozy Comfort Pullover Set have side pockets?', 'answer': 'Yes'}, {'query': 'What collection is the Ultra-Lofty 850 Stretch Down Hooded Jacket from?', 'answer': 'The DownTek collection'}, {'query': "What is the description of the Women's Campside Oxfords?", 'answer': "The Women's Campside Oxfords are ultracomfortable lace-to-toe Oxfords made of super-soft canvas. They have thick cushioning and quality construction, providing a broken-in feel from the first time they are worn."}, {'query': 'What are the dimensions of the small size of the Recycled Waterhog Dog Mat, Chevron Weave?', 'answer': 'The dimensions of the small size of the Recycled Waterhog Dog Mat, Chevron Weave are 18" x 28".'}, {'query': "What are some features of the Infant and Toddler Girls' Coastal Chill Swimsuit?", 'answer': "Some features of the Infant and Toddler Girls' Coastal Chill Swimsuit include bright colors, ruffles, exclusive whimsical prints, four-way-stretch and chlorine-resistant fabric, UP

In [19]:
for i in range(0,len(evaluation_queries)):
    print((evaluation_queries[i]))

# Call the chain on all inputs in the list.
# Deprecated since version 0.1.0: Use batch instead.
# https://api.python.langchain.com/en/latest/chains/langchain.chains.retrieval_qa.base.RetrievalQA.html
predictions = retrievalQA_chain.apply(evaluation_queries)
print(evaluation_queries)
print("*******\n")
print(predictions)

print(f"evaluation_queries[0]['query'] = {evaluation_queries[0]['query']}")
print(f"evaluation_queries[0]['answer'] = {evaluation_queries[0]['answer']}")

print(f"predictions[0]['query'] = {predictions[0]['query']}")
print(f"predictions[0]['answer'] = {predictions[0]['answer']}")
print(f"predictions[0]['result'] = {predictions[0]['result']}")


{'query': 'Do the Cozy Comfort Pullover Set have side pockets?', 'answer': 'Yes'}
{'query': 'What collection is the Ultra-Lofty 850 Stretch Down Hooded Jacket from?', 'answer': 'The DownTek collection'}
{'query': "What is the description of the Women's Campside Oxfords?", 'answer': "The Women's Campside Oxfords are ultracomfortable lace-to-toe Oxfords made of super-soft canvas. They have thick cushioning and quality construction, providing a broken-in feel from the first time they are worn."}
{'query': 'What are the dimensions of the small size of the Recycled Waterhog Dog Mat, Chevron Weave?', 'answer': 'The dimensions of the small size of the Recycled Waterhog Dog Mat, Chevron Weave are 18" x 28".'}
{'query': "What are some features of the Infant and Toddler Girls' Coastal Chill Swimsuit?", 'answer': "Some features of the Infant and Toddler Girls' Coastal Chill Swimsuit include bright colors, ruffles, exclusive whimsical prints, four-way-stretch and chlorine-resistant fabric, UPF 50+

  warn_deprecated(



[1m> Finished chain.[0m


[1m> Entering new RetrievalQA chain...[0m

[1m> Finished chain.[0m


[1m> Entering new RetrievalQA chain...[0m

[1m> Finished chain.[0m


[1m> Entering new RetrievalQA chain...[0m

[1m> Finished chain.[0m


[1m> Entering new RetrievalQA chain...[0m

[1m> Finished chain.[0m


[1m> Entering new RetrievalQA chain...[0m

[1m> Finished chain.[0m


[1m> Entering new RetrievalQA chain...[0m

[1m> Finished chain.[0m
[{'query': 'Do the Cozy Comfort Pullover Set have side pockets?', 'answer': 'Yes'}, {'query': 'What collection is the Ultra-Lofty 850 Stretch Down Hooded Jacket from?', 'answer': 'The DownTek collection'}, {'query': "What is the description of the Women's Campside Oxfords?", 'answer': "The Women's Campside Oxfords are ultracomfortable lace-to-toe Oxfords made of super-soft canvas. They have thick cushioning and quality construction, providing a broken-in feel from the first time they are worn."}, {'query': 'What are the dimension

In [20]:
from langchain.evaluation.qa import QAEvalChain

In [21]:
llm = ChatOpenAI(temperature=0, model=llm_model)
eval_chain = QAEvalChain.from_llm(llm)

In [22]:
graded_outputs = eval_chain.evaluate(evaluation_queries, predictions)
print(graded_outputs)

[{'results': 'CORRECT'}, {'results': 'CORRECT'}, {'results': 'CORRECT'}, {'results': 'CORRECT'}, {'results': 'CORRECT'}, {'results': 'CORRECT'}, {'results': 'CORRECT'}]


In [23]:
# Compare 'answer' and 'result'
for i, eg in enumerate(evaluation_queries):
    print(f"Example {i}:")
    print("Question: " + predictions[i]['query'])
    print("Real Answer: " + predictions[i]['answer'])
    print("Predicted Answer: " + predictions[i]['result'])
    # print("Predicted Grade: " + graded_outputs[i]['text'])
    print("Predicted Grade: " + graded_outputs[i]['results'])
    print()

Example 0:
Question: Do the Cozy Comfort Pullover Set have side pockets?
Real Answer: Yes
Predicted Answer: Yes, the Cozy Comfort Pullover Set does have side pockets.
Predicted Grade: CORRECT

Example 1:
Question: What collection is the Ultra-Lofty 850 Stretch Down Hooded Jacket from?
Real Answer: The DownTek collection
Predicted Answer: The Ultra-Lofty 850 Stretch Down Hooded Jacket is from the DownTek collection.
Predicted Grade: CORRECT

Example 2:
Question: What is the description of the Women's Campside Oxfords?
Real Answer: The Women's Campside Oxfords are ultracomfortable lace-to-toe Oxfords made of super-soft canvas. They have thick cushioning and quality construction, providing a broken-in feel from the first time they are worn.
Predicted Answer: The description of the Women's Campside Oxfords is: "This ultracomfortable lace-to-toe Oxford boasts a super-soft canvas, thick cushioning, and quality construction for a broken-in feel from the first time you put them on."
Predicted 