In [2]:
from dotenv import load_dotenv
load_dotenv(r'C:\Users\DELL\OneDrive\Desktop\chatbot\env')

True

In [6]:
# Required Libraries
from langchain.chains import RetrievalQA
from langchain.chat_models import ChatOpenAI
from langchain.document_loaders import CSVLoader
from langchain.indexes import VectorstoreIndexCreator
from langchain.vectorstores import DocArrayInMemorySearch
from langchain.embeddings import OpenAIEmbeddings
from langchain.evaluation.qa import QAGenerateChain, QAEvalChain
import langchain

# Step 1: Load CSV Data
# Define the path to the CSV file and load the data using CSVLoader
file_path = 'OutdoorClothingCatalog_1000.csv'  # Specify the file path of your CSV
loader = CSVLoader(file_path=file_path)  # Load the CSV data using the CSVLoader
data = loader.load()  # Load the data into a variable `data`

# Step 2: Set Up the Embedding Model
# Create the embedding model using OpenAI's embeddings
embeddings = OpenAIEmbeddings()  # Using OpenAI's pre-trained embeddings for vectorizing text

# Step 3: Create Index for Retrieval
# Use the VectorstoreIndexCreator to create an index from the loaded data
index = VectorstoreIndexCreator(
    vectorstore_cls=DocArrayInMemorySearch,  # Use in-memory search for indexing
    embedding=embeddings  # Set the embedding model for vectorizing the data
).from_loaders([loader])  # Load the index using the data from the CSV

# Step 4: Initialize Chat Model (LLM)
# Create the ChatOpenAI model with a specific temperature (0.0 for deterministic answers)
llm = ChatOpenAI(temperature=0.0, model='gpt-3.5-turbo')  # Using GPT-3.5-turbo for language generation

# Step 5: Create Retrieval QA Chain
# Set up the RetrievalQA chain using the ChatOpenAI model and the created index
qa_chain = RetrievalQA.from_chain_type(
    llm=llm,  # Language model (LLM) for question answering
    chain_type="stuff",  # Type of chain for handling the document retrieval and answering
    retriever=index.vectorstore.as_retriever(),  # Use the vectorstore's retriever for fetching relevant documents
    verbose=True,  # Enable verbose output to see additional debug information
    chain_type_kwargs={"document_separator": "<<<<>>>>>"}
)

# Step 6: Example Queries for Testing
# Define a list of example queries and expected answers for testing
examples = [
    {"query": "Do the Cozy Comfort Pullover Set have side pockets?", "answer": "Yes"},
    {"query": "What collection is the Ultra-Lofty 850 Stretch Down Hooded Jacket from?", "answer": "The DownTek collection"}
]

# Step 7: Generate Additional Examples Using LLM
# Generate new question-answer examples from the first few rows of the data
example_gen_chain = QAGenerateChain.from_llm(ChatOpenAI(model='gpt-3.5-turbo'))  # Create a chain to generate QA pairs
new_examples = example_gen_chain.apply_and_parse([{"doc": t} for t in data[:5]])  # Apply the chain to the first 5 rows of data

# Ensure new examples have the correct structure
# We will now iterate over the newly generated examples to check for the 'query' key.
for ex in new_examples:
    if 'query' not in ex:
        print(f"Skipping malformed example: {ex}")
    else:
        examples.append(ex)  # Only add valid examples that contain 'query'

# Step 8: Testing a Query
# Test one of the queries from the examples list
print("Testing Query:", examples[0]["query"])  # Print the query being tested
print("Response:", qa_chain.run(examples[0]["query"]))  # Get and print the response to the query

# Step 9: LLM-Assisted Evaluation (Optional)
# Ensure the examples are formatted correctly before applying the QA chain
formatted_examples = [{"query": example["query"]} for example in examples if 'query' in example]  # Filter out examples without 'query'

# Now we apply the `qa_chain` to the formatted examples
# The `apply()` method requires input in the form of a list of dictionaries containing at least the 'query' key
predictions = qa_chain.apply(formatted_examples)  # Apply the QA chain to the formatted examples

# Step 10: Initialize Evaluation Chain
# Create the QAEvalChain to evaluate the predictions generated by the QA chain
eval_chain = QAEvalChain.from_llm(ChatOpenAI(temperature=0, model='gpt-3.5-turbo'))

# Step 11: Evaluate Predictions and Compare to Expected Answers
# Compare the predicted answers to the expected answers and grade them
graded_outputs = eval_chain.evaluate(examples, predictions)

# Step 12: Display Evaluation Results
# Loop through each example to display the results of the evaluation
for i, example in enumerate(examples):
    print(f"\nExample {i + 1}:")
    print(f"Question: {predictions[i]['query']}")  # Print the predicted question
    print(f"Expected Answer: {example['answer']}")  # Print the expected answer
    print(f"Predicted Answer: {predictions[i]['result']}")  # Print the predicted answer from the model
    grade = graded_outputs[i].get('text', graded_outputs[i].get('evaluation', 'No grade available'))  # Get the grade for the prediction
    print(f"Predicted Grade: {grade}")  # Print the grade for the predicted answer

# Step 13: Access LangChain Plus (Optional)
# Print LangChain Plus details for evaluation and further improvements
print("\nAccess LangChain Plus Evaluation Platform:")
print("URL: https://www.langchain.plus/")  # LangChain Plus platform URL
print("Invite Code: lang_learners_2023")  # Example invite code for LangChain Plus




Skipping malformed example: {'qa_pairs': {'query': "What materials are the Women's Campside Oxfords made of, and what features contribute to their comfort?", 'answer': "The Women's Campside Oxfords are made of soft canvas material for a broken-in feel and look. They also feature a comfortable EVA innersole with Cleansport NXT® antimicrobial odor control, a moderate arch contour, EVA foam midsole for cushioning and support, and a chain-tread-inspired molded rubber outsole with a modified chain-tread pattern."}}
Skipping malformed example: {'qa_pairs': {'query': 'What are the dimensions of the small and medium sizes of the Recycled Waterhog Dog Mat?', 'answer': 'The small size has dimensions of 18" x 28" and the medium size has dimensions of 22.5" x 34.5".'}}
Skipping malformed example: {'qa_pairs': {'query': "What are some key features of the Infant and Toddler Girls' Coastal Chill Swimsuit, Two-Piece as described in the document?", 'answer': 'Some key features of the swimsuit include b