<a href="https://colab.research.google.com/github/vamsi8394/Precision-medicine-AI/blob/main/Medical%20Chatbot.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
import streamlit as st
import os
from dotenv import load_dotenv
from PyPDF2 import PdfReader
from langchain.text_splitter import RecursiveCharacterTextSplitter
from langchain_google_genai import GoogleGenerativeAIEmbeddings
from langchain_community.vectorstores import FAISS
from langchain_google_genai import ChatGoogleGenerativeAI
from langchain.chains import ConversationalRetrievalChain
from langchain.memory import ConversationBufferMemory

# Load environment variables
load_dotenv()
GOOGLE_API_KEY = os.getenv("GOOGLE_API_KEY")

# --- Configuration ---
# Ensure the API key is set
if not GOOGLE_API_KEY:
    st.error("GOOGLE_API_KEY not found in .env file. Please set it up.")
    st.stop()

# Initialize Google Generative AI components
embeddings = GoogleGenerativeAIEmbeddings(model="models/embedding-001", google_api_key=GOOGLE_API_KEY)
llm = ChatGoogleGenerativeAI(model="gemini-pro", temperature=0.3, google_api_key=GOOGLE_API_KEY)

# --- Helper Functions ---

def get_pdf_text(pdf_docs):
    """Extracts text from a list of PDF documents."""
    text = ""
    for pdf in pdf_docs:
        pdf_reader = PdfReader(pdf)
        for page in pdf_reader.pages:
            text += page.extract_text() or "" # Ensure text is not None
    return text

def get_text_chunks(text):
    """Splits text into manageable chunks."""
    text_splitter = RecursiveCharacterTextSplitter(
        chunk_size=1000,
        chunk_overlap=200,
        length_function=len
    )
    chunks = text_splitter.split_text(text)
    return chunks

def get_vector_store(text_chunks):
    """Creates a FAISS vector store from text chunks using Google embeddings."""
    # Note: For a real application, you might want to save/load this to disk
    # using FAISS.save_local and FAISS.load_local for persistence.
    # For this demo, it's in-memory.
    vector_store = FAISS.from_texts(text_chunks, embedding=embeddings)
    return vector_store

def get_conversation_chain(vector_store):
    """Creates a conversational retrieval chain with memory."""
    memory = ConversationBufferMemory(memory_key='chat_history', return_messages=True)
    conversation_chain = ConversationalRetrievalChain.from_llm(
        llm=llm,
        retriever=vector_store.as_retriever(),
        memory=memory
    )
    return conversation_chain

def handle_userinput(user_question):
    """Processes user questions and displays responses."""
    if st.session_state.conversation:
        with st.spinner("Thinking..."):
            response = st.session_state.conversation({'question': user_question})
        st.session_state.chat_history = response['chat_history']

        # Display chat history
        for i, message in enumerate(st.session_state.chat_history):
            if i % 2 == 0: # User message
                with st.chat_message("user"):
                    st.write(message.content)
            else: # AI message
                with st.chat_message("assistant"):
                    st.write(message.content)
    else:
        st.warning("Please upload PDF documents first to start the conversation.")

# --- Streamlit UI ---

def main():
    st.set_page_config(page_title="Medical AI Assistant", page_icon="🩺")

    st.title("🩺 Medical AI Assistant")
    st.markdown("Upload medical PDFs and ask questions about their content.")

    # Initialize session state variables if they don't exist
    if "conversation" not in st.session_state:
        st.session_state.conversation = None
    if "chat_history" not in st.session_state:
        st.session_state.chat_history = []
    if "processed_pdfs" not in st.session_state:
        st.session_state.processed_pdfs = False

    # Sidebar for PDF upload
    with st.sidebar:
        st.header("Your Documents")
        pdf_docs = st.file_uploader(
            "Upload your medical PDFs here and click 'Process'",
            accept_multiple_files=True,
            type=["pdf"]
        )
        if st.button("Process Documents"):
            if pdf_docs:
                with st.spinner("Processing... This may take a moment."):
                    # Get PDF text
                    raw_text = get_pdf_text(pdf_docs)

                    # Get text chunks
                    text_chunks = get_text_chunks(raw_text)

                    # Create vector store
                    vector_store = get_vector_store(text_chunks)

                    # Create conversation chain
                    st.session_state.conversation = get_conversation_chain(vector_store)
                    st.session_state.processed_pdfs = True
                    st.success("Documents processed! You can now ask questions.")
                    st.session_state.chat_history = [] # Clear history on new processing
            else:
                st.warning("Please upload at least one PDF document.")

    # Main chat interface
    if st.session_state.processed_pdfs:
        st.subheader("Ask a question about your documents:")

        # Display existing chat history
        for i, message in enumerate(st.session_state.chat_history):
            if i % 2 == 0: # User message
                with st.chat_message("user"):
                    st.write(message.content)
            else: # AI message
                with st.chat_message("assistant"):
                    st.write(message.content)

        user_question = st.chat_input("Type your question here...")
        if user_question:
            handle_userinput(user_question)
    else:
        st.info("Upload PDF documents in the sidebar to begin.")

if __name__ == '__main__':
    main()



✅ Done! Ready to download.


In [None]:
from google.colab import files
files.download("my-medical-chatbot.zip")


<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>