In [3]:
from dotenv import load_dotenv
load_dotenv(r'C:\Users\DELL\OneDrive\Desktop\chatbot\env')  # Load environment variables, such as API keys

True

In [6]:
# Import necessary modules
from langchain.chains import RetrievalQA  # For setting up a retrieval-based QA system
from langchain.chat_models import ChatOpenAI  # For using OpenAI's chat models for generating responses
from langchain.document_loaders import CSVLoader  # For loading CSV files as documents
from langchain.vectorstores import DocArrayInMemorySearch  # In-memory search for document similarity search
from IPython.display import display, Markdown  # For displaying markdown output in Jupyter notebooks
from langchain.llms import OpenAI  # For integrating OpenAI's language models
import pandas as pd  # For reading and manipulating data with DataFrames

# Import VectorstoreIndexCreator from langchain.indexes
from langchain.indexes import VectorstoreIndexCreator  # Import the missing class

# Set up OpenAI model (GPT-3.5 turbo with instruction fine-tuning)
llm = OpenAI(model='gpt-3.5-turbo-instruct', temperature=0.9)  # Set up the language model with specific parameters

# Load the dataset into a pandas dataframe
df = pd.read_csv('OutdoorClothingCatalog_1000.csv')  # Load the catalog CSV file into a DataFrame
df  # Display the DataFrame to inspect the content

# Initialize a document loader for the CSV file (used in langchain for loading documents)
file = 'OutdoorClothingCatalog_1000.csv'  # File path to the CSV
loader = CSVLoader(file_path=file)  # Use CSVLoader to load the CSV as documents

# Embedding model setup using OpenAI Embeddings for semantic search
from langchain.embeddings import OpenAIEmbeddings  # Import OpenAI's embedding model

embedding = OpenAIEmbeddings()  # Initialize the embedding model

# Create a vector store using the loaded documents and embeddings
index = VectorstoreIndexCreator(
    vectorstore_cls=DocArrayInMemorySearch,  # Choose in-memory search for vector storage
    embedding=embedding  # Use the defined embedding model
).from_loaders([loader])  # Load documents via the CSV loader into the vector store

# Query to search for shirts with sun protection from the catalog using the index
query = "Please list all your shirts with sun protection in a table in markdown and summarize each one."  # Query to the model
response = index.query(query, llm=llm)  # Query the index for relevant documents and generate a response using the model
display(Markdown(response))  # Display the generated response as markdown



|Name| Summary|
|---|---|
|Men's Tropical Plaid Short-Sleeve Shirt| Rated UPF 50+ for superior protection from the sun's UV rays. Made of 100% polyester, wrinkle-resistant with front and back cape venting and two front bellows pockets. Provides highest rated sun protection possible. |
|Men's Plaid Tropic Shirt, Short-Sleeve| Rated UPF 50+ and great for extended travel. SunSmart technology blocks 98% of the sun's harmful UV rays. Made with 52% polyester and 48% nylon, wrinkle-free and quick-drying. Front and back cape venting, two front bellows pockets. |
|Men's TropicVibe Shirt, Short-Sleeve| Built-in UPF 50+ coverage with a lightweight feel. Traditional fit with front and back cape venting and two front bellows pockets. Made with 71% nylon and 29% polyester. |
|Sun Shield Shirt by| High-performance sun shirt guaranteed to protect from harmful UV rays. Made with 78% nylon and 22% Lycra Xtra Life fiber, rated UPF 50+. Moisture-wicking and abrasion-resistant. Fits comfortably over

In [7]:
# Reload documents and initialize embeddings again (this section is redundant in the context of the previous code)
from langchain.document_loaders import CSVLoader  # Re-import CSVLoader (this is already imported earlier)
from langchain.embeddings import OpenAIEmbeddings  # Re-import OpenAI embeddings (already done earlier)
from langchain.llms import OpenAI  # Re-import OpenAI LLM (already done earlier)
from langchain.vectorstores import DocArrayInMemorySearch  # Re-import vectorstore (already done earlier)

# Load documents from the CSV again
file = 'OutdoorClothingCatalog_1000.csv'  # File path to the CSV
loader = CSVLoader(file_path=file)  # Use CSVLoader to load the CSV file into documents
docs = loader.load()  # Load documents into a list

# Generate query embeddings for semantic search
embeddings = OpenAIEmbeddings()  # Initialize the embedding model
embed = embeddings.embed_query("Hi my name is Harrison")  # Embed a sample query ("Hi my name is Harrison") into vector space
print(len(embed))  # Print the length of the embedded vector (how many dimensions it has)
print(embed[:5])  # Print the first 5 values of the embedding vector for inspection

# Create the vector store from the documents using the generated embeddings
db = DocArrayInMemorySearch.from_documents(docs, embeddings)  # Create an in-memory vector store from documents using embeddings

# Perform a similarity search with a query to find the most relevant documents
query = "Please suggest a shirt with sunblocking"  # Query asking for a shirt with sun protection
docs = db.similarity_search(query)  # Search for similar documents in the vector store
qdocs = "".join([doc.page_content for doc in docs])  # Combine the contents of the retrieved documents into a single string

# LLM setup for generating responses
llm = OpenAI(temperature=0)  # Initialize OpenAI model again, this time with lower temperature for deterministic responses

# Generate response using the retrieved documents
response = llm(f"{qdocs} Question: Please list all your shirts with sun protection in a table in markdown and summarize each one.")  
# Generate a response from the model using the retrieved documents and query, formatted with additional context
print(response)  # Print the response to the console

# Create a retriever from the vector store to be used in the QA system
retriever = db.as_retriever()  # Create a retriever object from the vector store to query it

# Initialize a RetrievalQA system using the created retriever and language model
qa_stuff = RetrievalQA.from_chain_type(
    llm=llm,  # Language model to use for answering the query
    chain_type="stuff",  # Specify the chain type (stuff means the model is given all the retrieved documents)
    retriever=retriever,  # The retriever that will get the relevant documents from the vector store
    verbose=True  # Set verbose to True to see the steps the system is performing
)

# Query for retrieving information on shirts with sun protection
query =  "Please list all your shirts with sun protection in a table in markdown and summarize each one."  # Query asking for shirts with sun protection
response = qa_stuff.run(query)  # Run the QA system to get a response based on the query and retriever

# Display the final response as markdown in the Jupyter notebook
display(Markdown(response))  # Display the final markdown response

# Query the index again using the initial query and generate a response (duplicate of previous code)
response = index.query(query, llm=llm)  # Query the index again for the same question
display(Markdown(response))  # Display the response as markdown

# Create a vector store from the CSV data and embeddings again (redundant setup, already done earlier)
index = VectorstoreIndexCreator(
    vectorstore_cls=DocArrayInMemorySearch,  # Choose in-memory search for vector storage
    embedding=embeddings,  # Use the defined embedding model
).from_loaders([loader])  # Load documents into the vector store

# Display the final response again as markdown
display(Markdown(response))  # Display the response as markdown

1536
[-0.021990482046978697, 0.006746508733548099, -0.018174780766530476, -0.039186236021381875, -0.014045289898302837]


  response = llm(f"{qdocs} Question: Please list all your shirts with sun protection in a table in markdown and summarize each one.")




| Name | Description | SPF Rating |
|------|-------------|------------|
| Sun Shield Shirt | High-performance sun shirt with UPF 50+ protection. Moisture-wicking and abrasion resistant. | 374 |
| Men's Plaid Tropic Shirt | Ultracomfortable shirt with UPF 50+ coverage. Wrinkle-free and quick-drying. | 535 |
| Men's TropicVibe Shirt | Lightweight shirt with built-in UPF 50+ protection. Front and back cape venting for cool breezes. | 618 |
| Men's Tropical Plaid Short-Sleeve Shirt | Lightest hot-weather shirt with UPF 50+ rating. Wrinkle-resistant and features front and back cape venting. | 98% |


[1m> Entering new RetrievalQA chain...[0m


  response = qa_stuff.run(query)  # Run the QA system to get a response based on the query and retriever



[1m> Finished chain.[0m




| Name | Description | Sun Protection Rating |
| --- | --- | --- |
| Men's Tropical Plaid Short-Sleeve Shirt | Made of 100% polyester, UPF 50+ rated, wrinkle-resistant, front and back cape venting, two front bellows pockets, imported | SPF 50+, blocks 98% of harmful UV rays |
| Men's Plaid Tropic Shirt, Short-Sleeve | Made of 52% polyester and 48% nylon, UPF 50+ rated, SunSmart technology, wrinkle-free, front and back cape venting, two front bellows pockets, imported | SPF 50+, blocks 98% of harmful UV rays |
| Men's TropicVibe Shirt, Short-Sleeve | Made of 71% nylon and 29% polyester, UPF 50+ rated, wrinkle-resistant, front and back cape venting, two front bellows pockets, imported | SPF 50+, blocks 98% of harmful UV rays |
| Sun Shield Shirt | Made of 78% nylon and 22% Lycra Xtra Life fiber, UPF 50+ rated, moisture-wicking, fits comfortably over swimsuit, abrasion-resistant, imported | SPF



| Name | Description | Sun Protection Rating |
| --- | --- | --- |
| Men's Tropical Plaid Short-Sleeve Shirt | Made of 100% polyester, UPF 50+ rating, wrinkle-resistant, front and back cape venting, two front bellows pockets | SPF 50+, blocks 98% of harmful UV rays |
| Men's Plaid Tropic Shirt, Short-Sleeve | Made of 52% polyester and 48% nylon, UPF 50+ rating, SunSmart technology, wrinkle-free, front and back cape venting, two front bellows pockets | SPF 50+, blocks 98% of harmful UV rays |
| Men's TropicVibe Shirt, Short-Sleeve | Made of 71% nylon and 29% polyester, UPF 50+ rating, front and back cape venting, two front bellows pockets | SPF 50+, blocks 98% of harmful UV rays |
| Sun Shield Shirt | Made of 78% nylon and 22% Lycra Xtra Life fiber, UPF 50+ rating, moisture-wicking, abrasion-resistant, fits over swimsuit | SPF 50+, blocks 98% of harmful UV rays |



| Name | Description | Sun Protection Rating |
| --- | --- | --- |
| Men's Tropical Plaid Short-Sleeve Shirt | Made of 100% polyester, UPF 50+ rating, wrinkle-resistant, front and back cape venting, two front bellows pockets | SPF 50+, blocks 98% of harmful UV rays |
| Men's Plaid Tropic Shirt, Short-Sleeve | Made of 52% polyester and 48% nylon, UPF 50+ rating, SunSmart technology, wrinkle-free, front and back cape venting, two front bellows pockets | SPF 50+, blocks 98% of harmful UV rays |
| Men's TropicVibe Shirt, Short-Sleeve | Made of 71% nylon and 29% polyester, UPF 50+ rating, front and back cape venting, two front bellows pockets | SPF 50+, blocks 98% of harmful UV rays |
| Sun Shield Shirt | Made of 78% nylon and 22% Lycra Xtra Life fiber, UPF 50+ rating, moisture-wicking, abrasion-resistant, fits over swimsuit | SPF 50+, blocks 98% of harmful UV rays |