In [1]:
!pip install streamlit PyPDF2 python-dotenv langchain faiss-cpu sentence-transformers

Collecting streamlit
  Downloading streamlit-1.39.0-py2.py3-none-any.whl.metadata (8.5 kB)
Collecting PyPDF2
  Downloading pypdf2-3.0.1-py3-none-any.whl.metadata (6.8 kB)
Collecting python-dotenv
  Downloading python_dotenv-1.0.1-py3-none-any.whl.metadata (23 kB)
Collecting langchain
  Downloading langchain-0.3.4-py3-none-any.whl.metadata (7.1 kB)
Collecting faiss-cpu
  Downloading faiss_cpu-1.9.0-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl.metadata (4.4 kB)
Collecting sentence-transformers
  Downloading sentence_transformers-3.2.1-py3-none-any.whl.metadata (10 kB)
Collecting pydeck<1,>=0.8.0b4 (from streamlit)
  Downloading pydeck-0.9.1-py2.py3-none-any.whl.metadata (4.1 kB)
Collecting watchdog<6,>=2.1.5 (from streamlit)
  Downloading watchdog-5.0.3-py3-none-manylinux2014_x86_64.whl.metadata (41 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m41.9/41.9 kB[0m [31m1.7 MB/s[0m eta [36m0:00:00[0m
Collecting langchain-core<0.4.0,>=0.3.12 (from langch

In [4]:
pip install -q mistralai

In [5]:
pip install -qU langchain-mistralai


In [104]:
%%writefile app.py

# Import necessary libraries
import streamlit as st
from PyPDF2 import PdfReader
from langchain.text_splitter import CharacterTextSplitter
from langchain_core.vectorstores import InMemoryVectorStore
from langchain_mistralai.chat_models import ChatMistralAI
from langchain_mistralai import MistralAIEmbeddings
from langchain.memory import ConversationBufferMemory
from langchain.chains import ConversationalRetrievalChain
import os

# Directly set the Mistral API key (use asterisks for security in the example)
api_key = "**********"
os.environ["MISTRALAI_API_KEY"] = api_key

# Define CSS and HTML templates for the chat interface styling
css = '''
<style>
    body {
        font-family: Arial, sans-serif;
        background-color: #f4f4f4;
        padding: 20px;
    }
    .chat-container {
        display: flex;
        flex-direction: column;
        gap: 10px;
    }
    .chat-message {
        padding: 15px;
        border-radius: 20px;
        margin-bottom: 1rem;
        display: flex;
        width: fit-content;
        max-width: 80%;
        box-sizing: border-box;
    }
    .chat-message.bot {
        background-color: #6c757d;
        color: #ffffff;
        margin-right: auto;
        flex-direction: row;
    }
    .chat-message.user {
        background-color: #007bff;
        color: #ffffff;
        margin-left: auto;
        flex-direction: row-reverse;
    }
    .chat-message .avatar {
        width: 40px;
        min-width: 40px;
        margin: 0 8px;
    }
    .chat-message .avatar img {
        max-width: 40px;
        max-height: 40px;
        border-radius: 50%;
        object-fit: cover;
    }
    .chat-message .message {
        padding: 0 12px;
        text-align: left;
    }
</style>
'''

bot_template = '''
<div class="chat-message bot">
    <div class="avatar">
        <img src="https://i.ibb.co/cN0nmSj/Screenshot-2023-05-28-at-02-37-21.png">
    </div>
    <div class="message">{{MSG}}</div>
</div>
'''

user_template = '''
<div class="chat-message user">
    <div class="avatar">
        <img src="https://i.ibb.co/dMQH1Bx/obito-uchiha-by-zetsumaro-dedj9u4-pre.jpg">
    </div>
    <div class="message">{{MSG}}</div>
</div>
'''

# Instantiate the embeddings model for text processing
embeddings = MistralAIEmbeddings(model="mistral-embed", api_key=api_key)

# Function to extract text from uploaded PDF files
def get_pdf_text(pdf_docs):
    text = ""
    for pdf in pdf_docs:
        pdf_reader = PdfReader(pdf)
        for page in pdf_reader.pages:
            text += page.extract_text()
    return text

# Function to split text into smaller chunks for embedding
def get_text_chunk(text):
    text_splitter = CharacterTextSplitter(
        separator="\n",
        chunk_size=1000,
        chunk_overlap=200,
        length_function=len
    )
    chunk = text_splitter.split_text(text)
    return chunk

# Function to create an in-memory vector store from text chunks
def get_VectorStore(text_chunks):
    try:
        vectorstore = InMemoryVectorStore.from_texts(
            text_chunks,
            embedding=embeddings,
        )
        return vectorstore
    except Exception as e:
        st.error(f"Error creating vector store: {str(e)}")
        return None

# Set up conversation retrieval chain using vectorstore and language model
def get_conversation_chain(vectorstore):
    llm = ChatMistralAI(model="mistral-small", mistral_api_key=api_key, temperature=0.7)
    memory = ConversationBufferMemory(memory_key='chat_history', return_messages=True)
    conversation_chain = ConversationalRetrievalChain.from_llm(
        llm=llm,
        retriever=vectorstore.as_retriever(),
        memory=memory
    )
    return conversation_chain

# Handle user questions and display chat history in interface
def handle_user_input(question):
    if st.session_state.conversation is None:
        st.warning("Please upload a PDF first before asking questions.")
        return

    response = st.session_state.conversation({'question': question})
    st.session_state.chat_history = response['chat_history']
    for i, message in enumerate(st.session_state.chat_history):
        if i % 2 == 0:  # User message
            st.write(user_template.replace("{{MSG}}", message.content), unsafe_allow_html=True)
        else:  # Bot message
            st.write(bot_template.replace("{{MSG}}", message.content), unsafe_allow_html=True)

# Main application function to define Streamlit layout and logic
def main():
    st.set_page_config(page_title="Ask your PDF", page_icon=":books:")
    st.markdown(css, unsafe_allow_html=True)  # Apply CSS styles
    st.header("Ask your PDF 💬")

    question = st.text_input('Ask any question about your PDF')
    if question:
       handle_user_input(question)

    if 'conversation' not in st.session_state:
        st.session_state.conversation = None
    if 'chat_history' not in st.session_state:
        st.session_state.chat_history = []

    with st.sidebar:
        st.subheader("Your PDF")
        pdf_doc = st.file_uploader("Upload your PDF", type='pdf', accept_multiple_files=True)

        if st.button('Process'):
            with st.spinner('Processing'):
                raw_text = get_pdf_text(pdf_doc)
                if not raw_text:
                    st.error("No text found in the PDF.")
                    return

                text_chunk = get_text_chunk(raw_text)
                vectorstore = get_VectorStore(text_chunk)

                if vectorstore is not None:
                    st.session_state.conversation = get_conversation_chain(vectorstore)
                    st.success("PDF processed successfully!")

if __name__ == '__main__':
    main()


Overwriting app.py


In [None]:
import urllib
print("Password/Enpoint IP for localtunnel is:",urllib.request.urlopen('https://ipv4.icanhazip.com').read().decode('utf8').strip("\n"))

In [106]:
!streamlit run app.py & npx localtunnel --port 8501


Collecting usage statistics. To deactivate, set browser.gatherUsageStats to false.
[0m
[0m
[34m[1m  You can now view your Streamlit app in your browser.[0m
[0m
[34m  Local URL: [0m[1mhttp://localhost:8501[0m
[34m  Network URL: [0m[1mhttp://172.28.0.12:8501[0m
[34m  External URL: [0m[1mhttp://35.237.64.37:8501[0m
[0m
your url is: https://flat-apes-check.loca.lt
  memory = ConversationBufferMemory(memory_key='chat_history', return_messages=True)
  response = st.session_state.conversation({'question': question})
[34m  Stopping...[0m
^C
