<a href="https://colab.research.google.com/github/usmanakhter/graysanatomy_agent/blob/main/graysanatomy_agent.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:

# -------------------------------
# Step 0: Install dependencies
# -------------------------------
!pip install --quiet langchain sentence-transformers transformers gradio faiss-cpu langchain_huggingface langchain-community

In [None]:
# =========================================
# Gray's Anatomy QA - Fast Vector Store (GPU + Batched)
# =========================================

# Step 0: Install dependencies
!pip install --quiet langchain sentence-transformers transformers gradio faiss-cpu

# Step 1: Download Gray's Anatomy
import requests
import os

filepath = "grays_anatomy.txt"
if not os.path.exists(filepath):
    url = "https://archive.org/stream/anatomyofhumanbo1918gray/anatomyofhumanbo1918gray_djvu.txt"
    response = requests.get(url)
    with open(filepath, "w", encoding="utf-8") as f:
        f.write(response.text)
    print("✓ Gray's Anatomy downloaded!")
else:
    print("✓ Gray's Anatomy already exists!")

# Step 2: Load and split text
from langchain.document_loaders import TextLoader
from langchain.text_splitter import RecursiveCharacterTextSplitter

loader = TextLoader(filepath, encoding="utf-8")
documents = loader.load()

splitter = RecursiveCharacterTextSplitter(chunk_size=1000, chunk_overlap=200)
chunks = splitter.split_documents(documents)
print(f"✓ Text split into {len(chunks)} chunks")

# Step 3: Create embeddings (GPU + batched)
from sentence_transformers import SentenceTransformer
import torch
import numpy as np

device = "cuda" if torch.cuda.is_available() else "cpu"
print(f"Using device: {device}")

model = SentenceTransformer("sentence-transformers/all-MiniLM-L6-v2", device=device)

batch_size = 64  # Adjust if GPU memory is low
embeddings_list = []

for i in range(0, len(chunks), batch_size):
    batch_texts = [c.page_content for c in chunks[i:i+batch_size]]
    batch_embeddings = model.encode(batch_texts, convert_to_numpy=True, batch_size=batch_size, show_progress_bar=True)
    embeddings_list.append(batch_embeddings)

embeddings = np.vstack(embeddings_list)
print(f"✓ Created embeddings: shape = {embeddings.shape}")

# Step 4: Build FAISS vector store
from langchain.vectorstores import FAISS
from langchain.embeddings import HuggingFaceEmbeddings

# Wrap embeddings in HuggingFaceEmbeddings (for LangChain compatibility)
hf_embeddings = HuggingFaceEmbeddings(model_name="sentence-transformers/all-MiniLM-L6-v2")

vectorstore = FAISS.from_documents(chunks, hf_embeddings)
vectorstore.save_local("grays_anatomy_vectorstore")
print("✓ FAISS vector store created and saved!")

# Step 5: Optional - reload vector store later
# vectorstore = FAISS.load_local("grays_anatomy_vectorstore", hf_embeddings)


In [None]:
# =========================================
# Gray's Anatomy AI - Embeddings-Only Gradio UI
# =========================================

# 1️⃣ Install dependencies (if not already)
!pip install --quiet langchain sentence-transformers gradio faiss-cpu

# 2️⃣ Load FAISS vector store
from langchain.vectorstores import FAISS
from langchain.embeddings import HuggingFaceEmbeddings

hf_embeddings = HuggingFaceEmbeddings(model_name="sentence-transformers/all-MiniLM-L6-v2")

vectorstore = FAISS.load_local(
    "grays_anatomy_vectorstore",
    hf_embeddings,
    allow_dangerous_deserialization=True  # safe because we built it ourselves
)

# 3️⃣ Define embeddings-only QA function
def answer_question_no_llm(question, k=4):
    docs = vectorstore.similarity_search(question, k=k)
    answer_text = "\n\n".join([d.page_content for d in docs])
    return answer_text[:2000]  # truncate for display

# 4️⃣ Define Gradio chat callback
def chat_fn(question, chat_history):
    answer = answer_question_no_llm(question)
    chat_history.append((question, answer))
    return "", chat_history

# 5️⃣ Build Gradio UI
import gradio as gr

with gr.Blocks(theme=gr.themes.Soft(), title="Gray's Anatomy AI") as demo:
    gr.Markdown("# 🧠 Gray's Anatomy FAQ Agent\nAsk questions about human anatomy based on Gray's Anatomy (1918).")

    with gr.Row():
        with gr.Column(scale=2):
            chatbot = gr.Chatbot(height=500, label="Conversation")
            question_input = gr.Textbox(
                placeholder="Ask about anatomy...",
                label="Your Question",
                lines=2
            )
            submit_btn = gr.Button("Ask", variant="primary")
            clear_btn = gr.Button("Clear Conversation")

        with gr.Column(scale=1):
            gr.Markdown("### 💡 Example Questions")
            examples = [
                "What are the main bones of the skull?",
                "Describe the structure of the heart",
                "What muscles are involved in breathing?",
                "Explain the layers of the skin",
                "What is the function of the cerebellum?",
                "Describe the structure of a long bone",
                "What are the parts of the digestive system?",
                "Explain the vertebral column"
            ]
            for ex in examples:
                btn = gr.Button(ex, size="sm")
                btn.click(lambda x=ex: x, outputs=question_input)

    # 6️⃣ Wire events
    submit_btn.click(chat_fn, inputs=[question_input, chatbot], outputs=[question_input, chatbot])
    question_input.submit(chat_fn, inputs=[question_input, chatbot], outputs=[question_input, chatbot])
    clear_btn.click(lambda: ("", []), None, chatbot)

# 7️⃣ Launch
demo.launch(share=True)
