In [None]:
# Install necessary libraries
!pip install streamlit PyPDF2 langchain-google-genai faiss-cpu langchain_community
!pip install ngrok
!pip install -q pyngrok


# Set your Google API Key (replace with your actual key)
import os
os.environ["GOOGLE_API_KEY"] = "google api key"
os.environ["NGROK_AUTH_TOKEN"] = "ngrok auth token"#--- IMPORTANT: REPLACE THIS!


In [None]:

### `app.py` Code
%%writefile app.py
import streamlit as st
import os
import PyPDF2
from langchain.text_splitter import RecursiveCharacterTextSplitter
from langchain_community.vectorstores import FAISS
from langchain_google_genai import GoogleGenerativeAIEmbeddings
from langchain_google_genai import ChatGoogleGenerativeAI
from langchain.chains import ConversationalRetrievalChain
from langchain.memory import ConversationBufferMemory
from langchain_huggingface import HuggingFaceEmbeddings  # Add this import
import asyncio

# Helper Functions
def get_pdf_text(pdf_docs):
    """Extracts text from uploaded PDF files."""
    text = ""
    for pdf in pdf_docs:
        try:
            pdf_reader = PyPDF2.PdfReader(pdf)
            for page in pdf_reader.pages:
                page_text = page.extract_text()
                if page_text:
                    text += page_text
        except Exception as e:
            st.error(f"Error reading file {pdf.name}: {e}")
            raise
    return text

def get_text_chunks(text):
    """Splits text into manageable chunks."""
    splitter = RecursiveCharacterTextSplitter(chunk_size=10000, chunk_overlap=1000)
    return splitter.split_text(text)

def get_vector_store(text_chunks):
    """Creates a vector store from text chunks using local Hugging Face Embeddings."""
    try:
        embeddings = HuggingFaceEmbeddings(
            model_name="sentence-transformers/all-MiniLM-L6-v2"
        )
        vector_store = FAISS.from_texts(text_chunks, embedding=embeddings)
        return vector_store
    except Exception as e:
        st.error(f"Error creating vector store: {e}")
        raise

def get_conversational_chain(vector_store):
    """Initializes the conversational chain with Gemini and memory."""
    try:
        llm = ChatGoogleGenerativeAI(
            model="gemini-2.5-flash",
            temperature=0.5,
            # Explicitly avoid passing max_retries or other invalid params
            google_api_key=os.getenv("GOOGLE_API_KEY")
        )
        memory = ConversationBufferMemory(memory_key="chat_history", return_messages=True)
        conversation_chain = ConversationalRetrievalChain.from_llm(
            llm=llm,
            retriever=vector_store.as_retriever(),
            memory=memory
        )
        return conversation_chain
    except Exception as e:
        st.error(f"Error initializing conversational chain: {e}")
        raise

# Main Streamlit Application Logic
def main():
    st.set_page_config(page_title="DocuQuery: PDF AI Assistant", page_icon="📄")
    st.title("📄 DocuQuery: AI-Powered PDF Knowledge Assistant")
    st.markdown("Upload your PDFs and ask questions about their content!")

    # Initialize Streamlit session state variables
    if "chat_history" not in st.session_state:
        st.session_state.chat_history = []
    if "conversation_chain" not in st.session_state:
        st.session_state.conversation_chain = None

    with st.sidebar:
        st.header("1. Upload Documents")
        pdf_docs = st.file_uploader(
            "Upload your PDF files here",
            accept_multiple_files=True,
            type="pdf"
        )
        if st.button("Process Documents"):
            with st.spinner("Processing PDFs..."):
                if pdf_docs:
                    try:
                        raw_text = get_pdf_text(pdf_docs)
                        if not raw_text.strip():
                            st.warning("No extractable text found in the PDFs.")
                        else:
                            text_chunks = get_text_chunks(raw_text)
                            vector_store = get_vector_store(text_chunks)
                            st.session_state.conversation_chain = get_conversational_chain(vector_store)
                            st.session_state.chat_history = [] # Clear chat history on new docs
                            st.success("Documents processed! You can now ask questions.")
                    except Exception as e:
                        st.error(f"Failed to process documents: {e}")
                else:
                    st.warning("Please upload PDF files first.")
        st.markdown("---")
        st.caption("Powered by Vamsi(Data Science Student)")

    # Display chat messages from session state
    for i, message in enumerate(st.session_state.chat_history):
        if i % 2 == 0: # User message
            with st.chat_message("user"):
                st.write(message)
        else: # AI message
            with st.chat_message("assistant"):
                st.write(message)

    # User input chat box
    user_question = st.chat_input("Ask a question about your documents...")

    if user_question:
        if st.session_state.conversation_chain:
            with st.spinner("Thinking..."):
                try:
                    response = st.session_state.conversation_chain({"question": user_question})
                    st.session_state.chat_history.append(user_question)
                    st.session_state.chat_history.append(response["answer"])
                    st.rerun() # Rerun to update chat display
                except Exception as e:
                    st.error(f"Error during conversation: {e}")
                    st.session_state.chat_history.append(user_question)
                    st.session_state.chat_history.append(f"Sorry, an error occurred: {e}")
                    st.rerun()
        else:
            st.warning("Please upload and process PDF documents first.")
            st.session_state.chat_history.append(user_question)
            st.session_state.chat_history.append("Please upload and process PDF documents first.")
            st.rerun()

if __name__ == "__main__":
    main()

In [None]:
from pyngrok import ngrok, conf
from pyngrok.exception import PyngrokNgrokError

ngrok.kill()
# --- Configuration ---
STREAMLIT_PORT = 8000 # The port your Streamlit app is running on

# --- ngrok setup ---
if NGROK_AUTH_TOKEN == "YOUR_NGROK_AUTH_TOKEN":
    print("WARNING: Please replace 'YOUR_NGROK_AUTH_TOKEN' with your actual ngrok auth token.")
    print("You can get it from https://dashboard.ngrok.com/get-started/your-authtoken")
else:
    ngrok.set_auth_token(NGROK_AUTH_TOKEN)

# 1. Kill any existing ngrok processes
print("Attempting to kill existing ngrok processes...")


# 2. Start Streamlit app in the background (if not already running)
print(f"Starting Streamlit app on port {STREAMLIT_PORT}...")
# This assumes your 'app.py' is in the current directory.
# If your Streamlit app is already guaranteed to be running, you can comment this out.
!nohup streamlit run app.py --server.port {STREAMLIT_PORT} &

# 3. Create a new ngrok tunnel
print(f"Creating ngrok tunnel for port {STREAMLIT_PORT}...")
try:
    public_url = ngrok.connect(STREAMLIT_PORT)
    print("🚀 Streamlit App Public URL:", public_url)
    print("Remember to shut down the ngrok tunnel and Streamlit app when done.")
except PyngrokNgrokError as e:
    print(f"❌ Error creating ngrok tunnel: {e}")
    print("Ensure ngrok auth token is correct and Streamlit is running on the specified port.")
except Exception as e:
    print(f"❌ An unexpected error occurred: {e}")