In [1]:
from langchain.prompts import PromptTemplate
from dotenv import load_dotenv
from langchain_openai import ChatOpenAI
from langchain_core.output_parsers import StrOutputParser

load_dotenv()

In [3]:
template = """Your task is to evaluate the correctness of the response of another language model to a specific question \
related to customer service for an ecommerce bookstore. You are provided with the question, the gold standard answer \
and the response of the language model. Output your response as either 'CORRECT' or 'INCORRECT' based on the \
similarity of the response to the gold standard answer. 

Question: {question}

Gold standard answer: {answer}

Response to evaluate: {response}"""

prompt = PromptTemplate(
    template=template,
    input_variables=["question", "answer", "response"]
)

llm = ChatOpenAI(model="gpt-3.5-turbo", temperature=0)

str_parser = StrOutputParser()

chain = prompt | llm | str_parser

In [11]:
predictions = [
{
    'question': 'What are your shipping options for international orders?',
    'answer': 'We offer various shipping options for international orders to cater to our customers worldwide. You can select your preferred shipping method during the checkout process. Our system will provide you with estimated delivery times and costs based on your location.',
    'response': 'Thank you for your inquiry! We offer various shipping options for international orders to cater to our customers worldwide. You can select your preferred shipping method during the checkout process. Our system will provide you with estimated delivery times and costs based on your location. If you have any further questions, feel free to ask.',
},
{
    'question': 'The book I bought yesterday arrived in perfect condition. Recommended!',
    'answer': "We're happy to hear that you're satisfied with your recent order. Providing excellent service and ensuring your books arrive in perfect condition is always our priority.",
    'response': "Thank you for your feedback! We're glad to hear that the book you purchased is working wonderfully. We appreciate your recommendation and hope you continue to enjoy our products. If you have any further questions or need assistance, please don't hesitate to reach out."
}, 
{
    'question': 'What books do you have on self-help rated at least 4.7 stars?',
    'answer': 'We have "12 Rules for Life: An Antidote to Chaos" by Jordan B. Peterson, which is rated 4.7 stars and is priced at $20.0, and "Atomic Habits: An Easy & Proven Way to Build Good Habits & Break Bad Ones" by James Clear, which is rated 4.8 stars and is priced at $25.99.',
    'response': 'Thank you for your inquiry! We have two self-help books that are rated at least 4.7 stars. The first one is "12 Rules for Life: An Antidote to Chaos" by Jordan B. Peterson, which is rated 4.7 stars and is priced at $20.0. The second book is "Atomic Habits: An Easy & Proven Way to Build Good Habits & Break Bad Ones" by James Clear, which is rated 4.8 stars and is priced at $25.99. Let me know if you would like more information on either of these books or if you have any other questions.',
},
{
    'question': 'I am billed an incorrect amount for my purchase. Can I get help to have the bill corrected?',
    'answer': 'We are sorry to hear about that. Our billing team will look into this and be in contact with you shortly.',
    'response': 'We apologize for the billing error on your purchase. Please provide us with your order details and the correct amount you should have been billed. Our customer service team will investigate this issue and work to correct the billing error promptly. Thank you for bringing this to our attention.',
},
{
    'question': 'What is the rating and selling price of the book ‘Holly’ by Stephen King?',
    'answer': 'We do not have the book ‘Holly’ by Stephen King in our store.',
    'response': "Thank you for your inquiry. Unfortunately, we do not have a book titled 'Holly' by Stephen King in our current inventory. If you are interested in other books by Stephen King, please let us know, and we would be happy to provide you with more information.",
}
]

In [5]:
chain.invoke(predictions[0])

'CORRECT'

In [6]:
chain.invoke(predictions[1])

'INCORRECT'

In [14]:
chain.invoke(predictions[2])

'CORRECT'

In [8]:
chain.invoke(predictions[3])

'CORRECT'

In [13]:
chain.invoke(predictions[4])

'CORRECT'