In [1]:
pip install langchain langchain-community langchain-openai chromadb jq langchainhub

Collecting langchain
  Downloading langchain-0.1.10-py3-none-any.whl.metadata (13 kB)
Collecting langchain-community
  Downloading langchain_community-0.0.25-py3-none-any.whl.metadata (8.1 kB)
Collecting langchain-openai
  Downloading langchain_openai-0.0.8-py3-none-any.whl.metadata (2.5 kB)
Collecting chromadb
  Downloading chromadb-0.4.24-py3-none-any.whl.metadata (7.3 kB)
Collecting jq
  Downloading jq-1.6.0-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl.metadata (6.8 kB)
Collecting langchainhub
  Downloading langchainhub-0.1.14-py3-none-any.whl.metadata (478 bytes)
Collecting langchain-core<0.2,>=0.1.28 (from langchain)
  Downloading langchain_core-0.1.28-py3-none-any.whl.metadata (6.0 kB)
Collecting langchain-text-splitters<0.1,>=0.0.1 (from langchain)
  Downloading langchain_text_splitters-0.0.1-py3-none-any.whl.metadata (2.0 kB)
Collecting langsmith<0.2.0,>=0.1.0 (from langchain)
  Downloading langsmith-0.1.13-py3-none-any.whl.metadata (13 kB)
Collecting openai<2.0.0

# ChatBot Without Memory

## Code to Initialize Chatbot

In [2]:
from langchain.prompts import ChatPromptTemplate, MessagesPlaceholder
from langchain_community.vectorstores import Chroma
from langchain_community.document_loaders import JSONLoader
from langchain_community.embeddings import OpenAIEmbeddings
from langchain_core.output_parsers import StrOutputParser
from langchain_core.runnables import RunnableLambda, RunnablePassthrough
from langchain_openai import ChatOpenAI, OpenAIEmbeddings
from langchain.text_splitter import RecursiveCharacterTextSplitter
from langchain_core.output_parsers import StrOutputParser
from langchain import hub
from langchain.retrievers.multi_query import MultiQueryRetriever
import os

os.environ["OPENAI_API_KEY"] = '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'

def split_docs(documents,chunk_size=1000,chunk_overlap=100):
    text_splitter = RecursiveCharacterTextSplitter(chunk_size=chunk_size, chunk_overlap=chunk_overlap)
    docs = text_splitter.split_documents(documents)
    return docs

embedding_function = OpenAIEmbeddings()


loader = JSONLoader(file_path="/kaggle/input/data-articles-qa/data.json", jq_schema=".[]", text_content=False)

documents = loader.load()

docs = split_docs(documents)

llm = ChatOpenAI(model_name="gpt-3.5-turbo", temperature = 0)

db = Chroma.from_documents(documents=docs, embedding=embedding_function)

retriever = db.as_retriever()

retriever_from_llm = MultiQueryRetriever.from_llm(
    retriever=retriever, llm=llm
)


prompt = hub.pull("rlm/rag-prompt")


def format_docs(docs):
    return "\n\n".join(doc.page_content for doc in docs)


rag_chain = (
    {"context": retriever_from_llm | format_docs, "question": RunnablePassthrough()}
    | prompt
    | llm
    | StrOutputParser()
)


# Validation and Feedback Loop

In [36]:
def validate_response(question, response, trusted_sources):
    # Logic to validate the response
    # For example, check if key facts in the response match those in trusted_sources
    
    prompt = ChatPromptTemplate.from_template(""" You are good at validating a response by comparing it\
    to the context provided. Validate the response by comparing it to the provided context and return 'True'\
    if it is valid otherwise return 'False'.
    
    context: {context}
    
    response: {response}
    
    """)
    model = ChatOpenAI(model="gpt-3.5-turbo")
    output_parser = StrOutputParser()
    
    context = trusted_sources.get_relevant_documents(question)

#     print(context[1])
#     print()
#     print(response)
    
    chain = {"context": retriever_from_llm | format_docs, "response": RunnablePassthrough()} | prompt | model | output_parser

    result = chain.invoke(response)
    
#     print(result)
    
    
    if "True" in result:
        is_valid = True
    elif "False" in result:
        is_valid = False
        
    return is_valid


## Code to send User query to ChatBot

In [5]:
query = "What are the most sustainable fabric options available for clothing?"
answer = rag_chain.invoke(query)
print(answer)

The most sustainable fabric options available for clothing include organic hemp, organic linen, recycled cotton, recycled wool, organic cotton, TENCEL, and Monocel. Lower-impact materials are recommended, such as recycled cotton, recycled wool, organic hemp, or organic linen. Choosing biodegradable fabrics like linen and avoiding synthetic materials like polyester can also contribute to sustainability in clothing choices.


In [30]:
Validation_result = validate_response(query,answer,retriever_from_llm)

if Validation_result == False:
    response = "I'm not sure about that. Let me get more information and get back to you"

page_content='natural fibres. The claim that \\u201cnatural\\u201d fabrics are always best for the environment is questionable at best, as every fabric has its pros and cons, though there are of course better options.\\nBeyond that, there\\u2019s little chance that brands and shoppers are going to abandon synthetics anytime soon. For some products, like swimwear and rainproof outerwear, synthetic material is just way more practical and the best option we currently have.\\nSo what can we do to reduce microfibre pollution in the ocean and the air?\\nNow that we\\u2019ve covered the background, it\\u2019s time to get practical. Let\\u2019s look at what to do about microfibres in clothing on a case by case basis.\\nBuy less (new) stuff\\nThe number one way to reduce the environmental impact of our clothing choices is to buy less stuff, especially less new stuff. Consider spending (less) of your hard-earned dollars on second hand clothing to extend the life of fabrics already in existence.\

# ChatBot with Memory Storage

## Code to Initialize Chatbot

In [32]:
from langchain.prompts import ChatPromptTemplate, MessagesPlaceholder
from langchain_community.vectorstores import Chroma
from langchain_community.document_loaders import JSONLoader
from langchain_community.embeddings import OpenAIEmbeddings
from langchain_core.output_parsers import StrOutputParser
from langchain_core.runnables import RunnableLambda, RunnablePassthrough
from langchain_openai import ChatOpenAI, OpenAIEmbeddings
from langchain.text_splitter import RecursiveCharacterTextSplitter
from langchain_core.output_parsers import StrOutputParser
from langchain import hub
from langchain.retrievers.multi_query import MultiQueryRetriever
from langchain_core.messages import AIMessage, HumanMessage
import os

#Add OpenAI key
os.environ["OPENAI_API_KEY"] = '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'
#Add your filepath
file_path = "/kaggle/input/data-articles-qa/data.json"

def split_docs(documents,chunk_size=1000,chunk_overlap=100):
    text_splitter = RecursiveCharacterTextSplitter(chunk_size=chunk_size, chunk_overlap=chunk_overlap)
    docs = text_splitter.split_documents(documents)
    return docs

embedding_function = OpenAIEmbeddings()


loader = JSONLoader(file_path=file_path, jq_schema=".[]", text_content=False)

documents = loader.load()

docs = split_docs(documents)

llm = ChatOpenAI(model_name="gpt-3.5-turbo-0125", temperature = 0)

db = Chroma.from_documents(documents=docs, embedding=embedding_function)

retriever = db.as_retriever()

retriever_from_llm = MultiQueryRetriever.from_llm(
    retriever=retriever, llm=llm
)


qa_system_prompt = """You are an assistant for question-answering tasks. \
Use the following pieces of retrieved context to answer the question. \
If you don't know the answer, just say that you don't know. \
Use three sentences maximum and keep the answer concise.\

{context}"""
qa_prompt = ChatPromptTemplate.from_messages(
    [
        ("system", qa_system_prompt),
        MessagesPlaceholder(variable_name="chat_history"),
        ("human", "{question}"),
    ]
)

contextualize_q_system_prompt = """Given a chat history and the latest user question \
which might reference context in the chat history, formulate a standalone question \
which can be understood without the chat history. Do NOT answer the question, \
just reformulate it if needed and otherwise return it as is."""
contextualize_q_prompt = ChatPromptTemplate.from_messages(
    [
        ("system", contextualize_q_system_prompt),
        MessagesPlaceholder(variable_name="chat_history"),
        ("human", "{question}"),
    ]
)
contextualize_q_chain = contextualize_q_prompt | llm | StrOutputParser()

def format_docs(docs):
    return "\n\n".join(doc.page_content for doc in docs)

def contextualized_question(input: dict):
    if input.get("chat_history"):
        return contextualize_q_chain
    else:
        return input["question"]


rag_chain = (
    RunnablePassthrough.assign(
        context=contextualized_question | retriever_from_llm | format_docs
    )
    | qa_prompt
    | llm
)

## Code to send User query to ChatBot

In [38]:
from langchain_core.messages import AIMessage, HumanMessage
chat_history = []

query = "What are the most sustainable fabric options available for clothing?"
ai_msg = rag_chain.invoke({"question": query, "chat_history": chat_history})


Validation_result = validate_response(query,ai_msg.content,retriever_from_llm)

if Validation_result == False:
    response = "I'm not sure about that. Let me get more information and get back to you"
else:
    print(ai_msg.content)
    chat_history.extend([HumanMessage(content=query), ai_msg])
# print(chat_history)

The most sustainable fabric options for clothing include recycled cotton, recycled wool, organic hemp, and organic linen. These materials have lower environmental impacts compared to conventional options like conventional cotton or virgin polyester. Choosing biodegradable fabrics like linen can also contribute to sustainability in fashion.


In [40]:
second_question = "can you mention some companies that use these materials"
ai_msg2 = rag_chain.invoke({"question": second_question, "chat_history": chat_history})

Validation_result = validate_response(second_question,ai_msg2.content,retriever_from_llm)

if Validation_result == False:
    response = "I'm not sure about that. Let me get more information and get back to you"
    
else:
    print(ai_msg2.content)  

Major brands like Adidas, ASOS, H&M, and Burberry have pledged to use 100% sustainable cotton by 2025. New technologies like blockchain are also being used to trace cotton supply chains and ensure ethical and sustainable practices. Additionally, brands like Afends, Mila.Vert, and Natasha Tonic are known for making sustainable hemp clothing that is on-trend and eco-friendly.
