In [1]:
pip install langchain langchain-community langchain-openai chromadb jq langchainhub

Collecting langchain
  Downloading langchain-0.1.9-py3-none-any.whl.metadata (13 kB)
Collecting langchain-community
  Downloading langchain_community-0.0.24-py3-none-any.whl.metadata (8.1 kB)
Collecting langchain-openai
  Downloading langchain_openai-0.0.8-py3-none-any.whl.metadata (2.5 kB)
Collecting chromadb
  Downloading chromadb-0.4.24-py3-none-any.whl.metadata (7.3 kB)
Collecting jq
  Downloading jq-1.6.0-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl.metadata (6.8 kB)
Collecting langchainhub
  Downloading langchainhub-0.1.14-py3-none-any.whl.metadata (478 bytes)
Collecting langchain-core<0.2,>=0.1.26 (from langchain)
  Downloading langchain_core-0.1.28-py3-none-any.whl.metadata (6.0 kB)
Collecting langsmith<0.2.0,>=0.1.0 (from langchain)
  Downloading langsmith-0.1.10-py3-none-any.whl.metadata (13 kB)
Collecting openai<2.0.0,>=1.10.0 (from langchain-openai)
  Downloading openai-1.13.3-py3-none-any.whl.metadata (18 kB)
Collecting tiktoken<1,>=0.5.2 (from langchain-open

# ChatBot Without Memory

## Code to Initialize Chatbot

In [None]:
from langchain.prompts import ChatPromptTemplate, MessagesPlaceholder
from langchain_community.vectorstores import Chroma
from langchain_community.document_loaders import JSONLoader
from langchain_community.embeddings import OpenAIEmbeddings
from langchain_core.output_parsers import StrOutputParser
from langchain_core.runnables import RunnableLambda, RunnablePassthrough
from langchain_openai import ChatOpenAI, OpenAIEmbeddings
from langchain.text_splitter import RecursiveCharacterTextSplitter
from langchain_core.output_parsers import StrOutputParser
from langchain import hub
from langchain.retrievers.multi_query import MultiQueryRetriever
import os

os.environ["OPENAI_API_KEY"] = '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'

def split_docs(documents,chunk_size=1000,chunk_overlap=100):
    text_splitter = RecursiveCharacterTextSplitter(chunk_size=chunk_size, chunk_overlap=chunk_overlap)
    docs = text_splitter.split_documents(documents)
    return docs

embedding_function = OpenAIEmbeddings()


loader = JSONLoader(file_path="/kaggle/input/data-articles-qa/data.json", jq_schema=".[]", text_content=False)

documents = loader.load()

docs = split_docs(documents)

llm = ChatOpenAI(model_name="gpt-3.5-turbo", temperature = 0)

db = Chroma.from_documents(documents=docs, embedding=embedding_function)

retriever = db.as_retriever()

retriever_from_llm = MultiQueryRetriever.from_llm(
    retriever=retriever, llm=llm
)


prompt = hub.pull("rlm/rag-prompt")


def format_docs(docs):
    return "\n\n".join(doc.page_content for doc in docs)


rag_chain = (
    {"context": retriever_from_llm | format_docs, "question": RunnablePassthrough()}
    | prompt
    | llm
    | StrOutputParser()
)


## Code to send User query to ChatBot

In [10]:
answer = rag_chain.invoke("What are the most sustainable fabric options available for clothing?")
print(answer)

The most sustainable fabric options available for clothing include organic hemp, organic linen, recycled cotton, recycled wool, organic cotton, TENCEL, and Monocel. These materials have a lower environmental impact compared to conventional materials like polyester and acrylic. It is important to prioritize biodegradable fabrics and materials when choosing clothing to reduce environmental harm.


# ChatBot with Memory Storage

## Code to Initialize Chatbot

In [27]:
from langchain.prompts import ChatPromptTemplate, MessagesPlaceholder
from langchain_community.vectorstores import Chroma
from langchain_community.document_loaders import JSONLoader
from langchain_community.embeddings import OpenAIEmbeddings
from langchain_core.output_parsers import StrOutputParser
from langchain_core.runnables import RunnableLambda, RunnablePassthrough
from langchain_openai import ChatOpenAI, OpenAIEmbeddings
from langchain.text_splitter import RecursiveCharacterTextSplitter
from langchain_core.output_parsers import StrOutputParser
from langchain import hub
from langchain.retrievers.multi_query import MultiQueryRetriever
from langchain_core.messages import AIMessage, HumanMessage
import os

#Add OpenAI key
os.environ["OPENAI_API_KEY"] = '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'
#Add your filepath
file_path = "/kaggle/input/data-articles-qa/data.json"

def split_docs(documents,chunk_size=1000,chunk_overlap=100):
    text_splitter = RecursiveCharacterTextSplitter(chunk_size=chunk_size, chunk_overlap=chunk_overlap)
    docs = text_splitter.split_documents(documents)
    return docs

embedding_function = OpenAIEmbeddings()


loader = JSONLoader(file_path=file_path, jq_schema=".[]", text_content=False)

documents = loader.load()

docs = split_docs(documents)

llm = ChatOpenAI(model_name="gpt-3.5-turbo-0125", temperature = 0)

db = Chroma.from_documents(documents=docs, embedding=embedding_function)

retriever = db.as_retriever()

retriever_from_llm = MultiQueryRetriever.from_llm(
    retriever=retriever, llm=llm
)


qa_system_prompt = """You are an assistant for question-answering tasks. \
Use the following pieces of retrieved context to answer the question. \
If you don't know the answer, just say that you don't know. \
Use three sentences maximum and keep the answer concise.\

{context}"""
qa_prompt = ChatPromptTemplate.from_messages(
    [
        ("system", qa_system_prompt),
        MessagesPlaceholder(variable_name="chat_history"),
        ("human", "{question}"),
    ]
)

contextualize_q_system_prompt = """Given a chat history and the latest user question \
which might reference context in the chat history, formulate a standalone question \
which can be understood without the chat history. Do NOT answer the question, \
just reformulate it if needed and otherwise return it as is."""
contextualize_q_prompt = ChatPromptTemplate.from_messages(
    [
        ("system", contextualize_q_system_prompt),
        MessagesPlaceholder(variable_name="chat_history"),
        ("human", "{question}"),
    ]
)
contextualize_q_chain = contextualize_q_prompt | llm | StrOutputParser()

def format_docs(docs):
    return "\n\n".join(doc.page_content for doc in docs)

def contextualized_question(input: dict):
    if input.get("chat_history"):
        return contextualize_q_chain
    else:
        return input["question"]


rag_chain = (
    RunnablePassthrough.assign(
        context=contextualized_question | retriever_from_llm | format_docs
    )
    | qa_prompt
    | llm
)

## Code to send User query to ChatBot

In [31]:
from langchain_core.messages import AIMessage, HumanMessage
chat_history = []

question = "What are the most sustainable fabric options available for clothing?"
ai_msg = rag_chain.invoke({"question": question, "chat_history": chat_history})
print(ai_msg.content)
print()
chat_history.extend([HumanMessage(content=question), ai_msg])
# print(chat_history)

The most sustainable fabric options for clothing include organic hemp, organic linen, recycled cotton, recycled wool, organic cotton, TENCEL, and Monocel. These materials have a lower environmental impact compared to synthetic materials like polyester and acrylic, which are not biodegradable and release microfibres into waterways. Linen, for example, is grown from flax plants, requires little water, and is biodegradable.



In [32]:
second_question = "can you mention some companies that use these materials"
ai_msg2 = rag_chain.invoke({"question": second_question, "chat_history": chat_history})
print(ai_msg2.content)

Some companies that use sustainable materials like organic cotton, recycled wool, and organic hemp in their clothing production include Patagonia, Eileen Fisher, Reformation, Mara Hoffman, and Outerknown. These brands prioritize sustainability and transparency in their supply chains, offering eco-friendly options for conscious consumers looking to support ethical fashion practices.
