# **Chat with Document**

In [1]:
!pip install langchain==0.0.184
!pip install PyPDF2==3.0.1
!pip install python-dotenv==1.0.0
!pip install faiss-cpu==1.7.4
!pip install altair==4
!pip install tiktoken==0.4.0
!pip install langchain_community langchain_aws

Collecting PyPDF2==3.0.1
  Using cached pypdf2-3.0.1-py3-none-any.whl.metadata (6.8 kB)
Using cached pypdf2-3.0.1-py3-none-any.whl (232 kB)
Installing collected packages: PyPDF2
Successfully installed PyPDF2-3.0.1
Collecting faiss-cpu==1.7.4
  Downloading faiss_cpu-1.7.4-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl.metadata (1.3 kB)
Downloading faiss_cpu-1.7.4-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (17.6 MB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m17.6/17.6 MB[0m [31m43.9 MB/s[0m eta [36m0:00:00[0m
[?25hInstalling collected packages: faiss-cpu
Successfully installed faiss-cpu-1.7.4
Collecting altair==4
  Downloading altair-4.0.0-py2.py3-none-any.whl.metadata (12 kB)
Downloading altair-4.0.0-py2.py3-none-any.whl (709 kB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m709.0/709.0 kB[0m [31m32.6 MB/s[0m eta [36m0:00:00[0m
[?25hInstalling collected packages: altair
  Attempting uninstall: altair
    Found ex

In [2]:
import os
from google.colab import userdata
os.environ["AWS_ACCESS_KEY_ID"] = userdata.get('AWS_ACCESS_KEY_ID')
os.environ["AWS_SECRET_ACCESS_KEY"] = userdata.get('AWS_SECRET_ACCESS_KEY')
os.environ["AWS_DEFAULT_REGION"] = userdata.get('AWS_DEFAULT_REGION')

In [3]:
from PyPDF2 import PdfReader
from langchain.text_splitter import CharacterTextSplitter
from langchain_aws import BedrockEmbeddings
from langchain.vectorstores import FAISS
from langchain_aws import ChatBedrock
from langchain.memory import ConversationBufferMemory
from langchain.chains import ConversationalRetrievalChain

In [4]:
# This method takes a list of PDF files as input, reads them using PdfReader from PyPDF2,
# and concatenates the text content from all pages into a single string
def get_pdf_text(pdf):
    text = ""
    pdf_reader = PdfReader(pdf)
    for page in pdf_reader.pages:
        text += page.extract_text()
    return text

In [5]:
# This method splits the provided text into smaller chunks. It uses the CharacterTextSplitter from langchain library
def get_text_chunks(text):
    text_splitter = CharacterTextSplitter(
        separator="\n",
        chunk_size=1000,
        chunk_overlap=200,
        length_function=len
    )
    chunks = text_splitter.split_text(text)
    return chunks

In [6]:
# This method converts text chunks into embeddings and creates a FAISS vector store.
def get_vectorstore(text_chunks):
    embeddings = BedrockEmbeddings(
        model_id="amazon.titan-embed-text-v2:0"
    )
    vectorstore = FAISS.from_texts(texts=text_chunks, embedding=embeddings)
    return vectorstore

In [7]:
# This method initializes a conversational retrieval chain using a language model and a vector store
def get_conversation_chain(vectorstore):
    llm = ChatBedrock(
        model_id="mistral.mistral-7b-instruct-v0:2",
        temperature=0.5
    )
    memory = ConversationBufferMemory(
        memory_key='chat_history', return_messages=True)
    conversation_chain = ConversationalRetrievalChain.from_llm(
        llm=llm,
        retriever=vectorstore.as_retriever(),
        memory=memory
    )
    return conversation_chain

In [9]:
# get pdf text
raw_text = get_pdf_text("/content/cwd_data/IIHT-Vijay-Chatbots.pdf")

# get the text chunks
text_chunks = get_text_chunks(raw_text)

# create vector store
vectorstore = get_vectorstore(text_chunks)

# create conversation chain
conversation_chain = get_conversation_chain(vectorstore)



  memory = ConversationBufferMemory(


In [10]:
response = conversation_chain({'question': "How many companies did vijay work in?"})
chat_history = response['chat_history']

  response = conversation_chain({'question': "How many companies did vijay work in?"})


In [11]:
response = conversation_chain({'question': "List out all companies"})
chat_history = response['chat_history']

In [12]:
for i, message in enumerate(chat_history):
    if i % 2 == 0:
        print("User: " + message.content)
    else:
        print("Agent: " + message.content)

User: How many companies did vijay work in?
Agent:  Vijay has worked in a total of 5 companies as per the given context:

1. Katra Soft Pvt Ltd
2. Focus Softek Pvt Ltd
3. Nuance Transcription Services Pvt Ltd
4. Thomson Reuters India Pvt Ltd
5. Swayaan Digital Solutions Pvt Ltd (as a co-founder)

Therefore, the answer is 5.
User: List out all companies
Agent:  Based on the context provided, Vijay has worked for Swayaan Digital Solutions Pvt Ltd and Focus EduVation Pvt. Ltd.
