<a href="https://colab.research.google.com/github/viniasbr/chatbot-vestibulando/blob/main/colab-notebooks/Responsive_ChatBot.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [34]:
%%capture --no-stderr
%pip install --upgrade --quiet  langchain langchain-community langchain-chroma bs4

## Imports

In [131]:
from google.colab import userdata

import os
import bs4

from langchain_openai import ChatOpenAI
from langchain_openai import OpenAIEmbeddings
from langchain_community.chat_message_histories import ChatMessageHistory
from langchain_community.document_loaders import TextLoader
from langchain_chroma import Chroma
from langchain_core.chat_history import BaseChatMessageHistory
from langchain_core.runnables.history import RunnableWithMessageHistory
from langchain_core.messages import HumanMessage
from langchain_core.messages import AIMessage
from langchain_core.prompts import ChatPromptTemplate, MessagesPlaceholder
from langchain_core.output_parsers import StrOutputParser
from langchain_core.runnables import RunnablePassthrough
from langchain.chains import create_retrieval_chain, create_history_aware_retriever
from langchain.chains.combine_documents import create_stuff_documents_chain
from langchain_text_splitters import RecursiveCharacterTextSplitter

## API Key and LLM selection

In [6]:
os.environ["OPENAI_API_KEY"] = userdata.get('OpenAIKey')

model = ChatOpenAI(model="gpt-3.5-turbo")

## Function Definitions

In [7]:
#This is the volatile memory storage. We do not intend to save conversations in a file.

store = {}

In [8]:
def get_session_history(session_id: str) -> BaseChatMessageHistory:
    if session_id not in store:
        store[session_id] = ChatMessageHistory()
    return store[session_id]

In [20]:
def filter_history(history, k=10):
    return history[-k:]

In [86]:
def format_docs(docs):
    return "\n\n".join(doc.page_content for doc in docs)

## Creating Base Prompt and Defining Runner

In [10]:
prompt = ChatPromptTemplate.from_messages(
    [
        (
            "system",
            "You're an assistant who speaks in {language}. Respond in 20 words or fewer",
        ),
        MessagesPlaceholder(variable_name="history"),
        ("human", "{input}"),
    ]
)

In [16]:
runnable = (
    RunnablePassthrough.assign(history=lambda x: filter_history(x["history"]))
    | prompt
    | model
)

In [18]:
runnable_with_history = RunnableWithMessageHistory(
    runnable,
    get_session_history,
    input_messages_key="input",
    history_messages_key="history",
)

## Input Test

In [None]:
runnable_with_history.invoke(
    {"language": "portuguese", "input": "hi im vinicius!"},
    config={"configurable": {"session_id": "2"}},
)

In [25]:
for r in runnable_with_history.stream(
    {"language": "portuguese", "input": "Qual é meu nome?"},
    config={"configurable": {"session_id": "2"}},):
  print(r.content, end = "")

Seu nome é Vinicius. Como posso ajudar você hoje?

# Main Simple ChatBot Loop
This is the simplest implementation of the ChatBot. It is nice to see it working!

In [32]:
user_input = ""
config = {"configurable": {"session_id": "abc3"}}

while user_input != "Fim.":
  user_input = input("Usuário: ")
  print("ChatBot:",end=" ")
  for r in runnable_with_history.stream(
      {"language": "portuguese", "input": user_input},
    config=config,
    ):
    print(r.content, end="")
  print("")

Usuário: oi
ChatBot: Olá! Como posso ajudar você hoje?
Usuário: eu quero aprender ingles
ChatBot: Posso te ajudar com isso! Vamos praticar juntos? Estou aqui para responder suas perguntas.
Usuário: como se diz brigadeiro em ingles
ChatBot: Brigadeiro em inglês é "brigadeiro".
Usuário: como que monta um cubo m[agico
ChatBot: Para montar um cubo mágico, siga tutoriais online ou use métodos de resolução passo a passo. Boa sorte!
Usuário: eu gosto de voce
ChatBot: Fico feliz em saber! Estou aqui para ajudar no que precisar. Obrigado pelo carinho.
Usuário: o que eu te perguntei em ingles mesmo?
ChatBot: Você perguntou "como se diz brigadeiro em inglês?"
Usuário: Fim.
ChatBot: Se precisar de mais alguma coisa, estou à disposição. Até mais!


##Document Parsing

In [56]:
#Loading already parsed data, extracted from https://www.pg.unicamp.br/norma/31594/0

loader = TextLoader("clean_data.txt")
docs = loader.load()

In [51]:
text_splitter = RecursiveCharacterTextSplitter(
    chunk_size=1000, chunk_overlap=200, add_start_index=True
)
all_splits = text_splitter.split_documents(docs)

len(all_splits)

278

In [59]:
for i in all_splits:
  i.metadata['source'] = 'https://www.pg.unicamp.br/norma/31594/0'

In [62]:
vectorstore = Chroma.from_documents(documents=all_splits, embedding=OpenAIEmbeddings())

In [126]:
retriever = vectorstore.as_retriever(search_type="similarity", search_kwargs={"k": 6})

In [64]:
retrieved_docs = retriever.invoke("Quantas vagas de ampla concorrência tem o curso de Ciência de Computação na Unicamp?")

In [65]:
len(retrieved_docs)

6

In [68]:
print(retrieved_docs[2].page_content)

Cursos: Engenharia de Computação (Integral), Total Vagas Regulares: 90, Total Vagas VU: 63, Ampla Concorrência (Mínimo): 39, Ampla Concorrência (Máximo): 49, Reserva de Vagas para PP (15%*): 14, Reserva de Vagas para PP (27,2%*): 24
Cursos: Engenharia de Controle e Automação (Noturno), Total Vagas Regulares: 50, Total Vagas VU: 33, Ampla Concorrência (Mínimo): 19, Ampla Concorrência (Máximo): 25, Reserva de Vagas para PP (15%*): 8, Reserva de Vagas para PP (27,2%*): 14
Cursos: Engenharia de Manufatura (Integral), Total Vagas Regulares: 60, Total Vagas VU: 45, Ampla Concorrência (Mínimo): 29, Ampla Concorrência (Máximo): 36, Reserva de Vagas para PP (15%*): 9, Reserva de Vagas para PP (27,2%*): 16
Cursos: Engenharia de Produção (Integral), Total Vagas Regulares: 60, Total Vagas VU: 45, Ampla Concorrência (Mínimo): 29, Ampla Concorrência (Máximo): 36, Reserva de Vagas para PP (15%*): 9, Reserva de Vagas para PP (27,2%*): 16


In [97]:
prompt = ChatPromptTemplate.from_messages(
    [
        (
            "system",
            "You're an assistant for question-answering tasks who speaks in portuguese. Use the following pieces of retrieved context to answer questions about vestibular da Unicamp.If you don't know the answer, just say that you don't know. Use three sentences maximum and keep the answer concise.\n\n {context}",
        ),
        ("human", "{input}"),
    ]
)

In [122]:
rag_chain = (
    {"context": retriever | format_docs, "input": RunnablePassthrough()}
    | prompt
    | model
    | StrOutputParser()
)

In [129]:
rag_chain.invoke("Quais são as matérias que pesam mais em engenharia de computação?")

'As matérias que mais pesam em Engenharia de Computação na Unicamp são MAT, LPL e FIS, com pesos 3, 2 e 1, respectivamente.'

In [132]:
contextualize_q_system_prompt = """Given a chat history and the latest user question \
which might reference context in the chat history, formulate a standalone question \
which can be understood without the chat history. Do NOT answer the question, \
just reformulate it if needed and otherwise return it as is."""

contextualize_q_prompt = ChatPromptTemplate.from_messages(
    [
        ("system", contextualize_q_system_prompt),
        MessagesPlaceholder("chat_history"),
        ("human", "{input}"),
    ]
)

In [133]:
history_aware_retriever = create_history_aware_retriever(
    model, retriever, contextualize_q_prompt
)

In [134]:
qa_system_prompt = """You're an assistant for question-answering tasks who speaks in portuguese. Use the following pieces of retrieved context to answer questions about vestibular da Unicamp.If you don't know the answer, just say that you don't know. Use three sentences maximum and keep the answer concise.\n\n {context}"""

In [135]:
qa_prompt = ChatPromptTemplate.from_messages(
    [
        ("system", qa_system_prompt),
        MessagesPlaceholder("chat_history"),
        ("human", "{input}"),
    ]
)

In [136]:
question_answer_chain = create_stuff_documents_chain(model, qa_prompt)

rag_chain = create_retrieval_chain(history_aware_retriever, question_answer_chain)

In [141]:
conversational_rag_chain = RunnableWithMessageHistory(
    (RunnablePassthrough.assign(chat_history=lambda x: filter_history(x["chat_history"])) | rag_chain),
    get_session_history,
    input_messages_key="input",
    history_messages_key="chat_history",
    output_messages_key="answer",
)

In [None]:
user_input = ""
config = {"configurable": {"session_id": "abc5"}}

while user_input != "Fim.":
  user_input = input("Usuário: ")
  print("ChatBot:",end=" ")
  for r in conversational_rag_chain.stream(
      {"input": user_input},
    config=config,
    ):
    if('answer' in r.keys()):
      print(r['answer'], end="")
  print("")