In [None]:
from google.colab import userdata
import os

# --- 1. Define constants and secrets ---
GIT_TOKEN = userdata.get('github_token')
GITHUB_USER = 'yguo005'
GITHUB_REPO = 'medgemma_chatbot'
BRANCH_NAME = 'main'

# --- 2. ALWAYS start from a clean state in /content ---
# Go back to the root content directory to avoid nested paths
%cd /content

# Remove the repository directory if it already exists to ensure a fresh clone
!rm -rf {GITHUB_REPO}

# --- 3. clone the branch ---
!git clone https://github.com/yguo005/medgemma_chatbot.git

# --- 4. Change directory into the newly cloned project ---
%cd {GITHUB_REPO}

/content
Cloning into 'medgemma_chatbot'...
remote: Enumerating objects: 142, done.[K
remote: Counting objects: 100% (142/142), done.[K
remote: Compressing objects: 100% (92/92), done.[K
remote: Total 142 (delta 44), reused 138 (delta 40), pack-reused 0 (from 0)[K
Receiving objects: 100% (142/142), 22.41 MiB | 12.08 MiB/s, done.
Resolving deltas: 100% (44/44), done.
/content/medgemma_chatbot


In [None]:
#------------------DON'T RUN-------------------------------
#----------------------------------------------------------
#----------------------------------------------------------

# ---Install System and Python Dependencies ---
# First, install system-level build tools for FAISS
print("Installing system dependencies for FAISS...")
!pip install --upgrade -q pip
!apt-get update -qq
!apt-get install -y -qq libomp-dev cmake

# --- 2. Install faiss-gpu by itself ---
# install it separately to isolate any issues.

print("Installing faiss-cpu...")
!pip install -q faiss-cpu==1.8.0

# --- 3. Install the rest of the Python packages ---
print("\nInstalling remaining Python packages...")
!pip install -q \
    "torch==2.2.2" \
    "transformers>=4.42.4" \
    "accelerate==0.29.3" \
    "bitsandbytes==0.43.1" \
    "langchain==0.1.16" \
    "langchain-community==0.0.38" \
    "langchain-openai==0.1.3" \
    "fastapi==0.110.0" \
    "uvicorn==0.29.0" \
    "python-multipart==0.0.9" \
    "pypdf" \
    "python-dotenv" \
    "google-cloud-aiplatform==1.47.0" \
    "pyngrok==7.1.6" \
    "pydantic==1.10.13"

print("\n All dependencies installed successfully!")

In [None]:
# ---Install System and Python Dependencies ---
print("Installing all packages with latest compatible versions...")
!pip install -q \
    torch \
    "transformers>=4.42.4" \
    accelerate \
    bitsandbytes \
    langchain \
    langchain-community \
    langchain-openai \
    faiss-cpu \
    fastapi \
    uvicorn \
    python-multipart \
    pypdf \
    python-dotenv \
    google-cloud-aiplatform \
    pyngrok \
    pydantic \
    starlette
!pip install --upgrade "pydantic>=2.0.0"
!pip install --upgrade langchain langchain-community langchain-openai
!pip install --upgrade torch torchvision torchaudio

print(" \nInstallation completed!")

Installing all packages with latest compatible versions...
[0m 
Installation completed!


In [None]:
from google.colab import auth, userdata
import os
from huggingface_hub import login

# Authenticate for Google Cloud services
auth.authenticate_user()

# Set environment variables from Colab Secrets
os.environ['OPENAI_API_KEY'] = userdata.get('OPENAI_API_KEY')
os.environ['NGROK_AUTHTOKEN'] = userdata.get('NGROK_AUTHTOKEN')

# Manually set other env vars for the demo
os.environ['DEPLOYMENT_MODE'] = 'development'
os.environ['USE_MEDGEMMA_GARDEN'] = 'false'

#  Log in to Hugging Face
# This uses the HF_TOKEN secret to authenticate session
HF_TOKEN = userdata.get('HF_TOKEN')
login(token=HF_TOKEN)

In [None]:
# Build the Knowledge Base
!python /content/medgemma_chatbot/src/services/ai/rag/create_memory_for_llm.py


 Configuration Status:
   Mode: development
   MedGemma: Local HF
   Valid: True

 Creating FAISS Vector Store for AI Health Consultant
 Current script: /content/medgemma_chatbot/src/services/ai/rag/create_memory_for_llm.py
 Project root: /content/medgemma_chatbot
 Data path: /content/medgemma_chatbot/data/document
 FAISS path: /content/medgemma_chatbot/data/vectorstore/db_faiss
 Project root exists: True
 Data directory exists: True

 Loaded 759 documents from 1 PDF file(s)
 Created 7080 text chunks.
 OpenAI Embedding Model Loaded (Vector Dimension: 1536)
 FAISS vector store already exists. Overwriting...
 FAISS vector store created and saved to: /content/medgemma_chatbot/data/vectorstore/db_faiss


In [None]:
# Run the FastAPI Server using ngrok
import os
import sys
import asyncio
from pyngrok import ngrok, conf

sys.path.insert(0, os.path.abspath('src'))


# Validate environment variables
NGROK_TOKEN = os.environ.get("NGROK_AUTHTOKEN")
OPENAI_KEY = os.environ.get("OPENAI_API_KEY")

if not NGROK_TOKEN:
    print(" ERROR: NGROK_AUTHTOKEN not set!")
    print("Set it with: os.environ['NGROK_AUTHTOKEN'] = 'your-token-here'")
    sys.exit(1)

if not OPENAI_KEY:
    print(" ERROR: OPENAI_API_KEY not set!")
    print("Set it with: os.environ['OPENAI_API_KEY'] = 'your-key-here'")
    sys.exit(1)


# Set the ngrok auth token
conf.get_default().auth_token = NGROK_TOKEN

async def run_fastapi():
    try:
        # Use nest_asyncio to allow uvicorn to run in a notebook
        import nest_asyncio
        nest_asyncio.apply()

        # Import uvicorn
        import uvicorn

        print(" Starting FastAPI server...")
        print(f" Working directory: {os.getcwd()}")

        # Check if main.py exists
        if not os.path.exists("main.py"):
            print(" ERROR: main.py not found in current directory!")
            print("Make sure you're in the correct directory with your FastAPI app.")
            return

        # Configure uvicorn server
        config = uvicorn.Config(
            "main:app",
            host="0.0.0.0",
            port=8000,
            log_level="info",
            reload=False  # Disable reload in Colab
        )
        server = uvicorn.Server(config)

        # Open a tunnel to the uvicorn server
        print(" Opening ngrok tunnel...")
        public_url = ngrok.connect(8000)
        print(f" FastAPI server is live at: {public_url}")
        print(f" Mobile interface: {public_url}/mobile.html")
        print(f" Desktop interface: {public_url}/")
        print(f" API docs: {public_url}/docs")
        print("\n To stop the server, interrupt this cell (Runtime > Interrupt execution)")

        # Run the server
        await server.serve()

    except Exception as e:
        print(f" Error starting server: {e}")
        import traceback
        traceback.print_exc()
    finally:
        # Clean up ngrok tunnels
        try:
            ngrok.disconnect(8000)
            print(" Cleaned up ngrok tunnel")
        except:
            pass

# Run the server asynchronously
await run_fastapi()

 Starting FastAPI server...
 Working directory: /content/medgemma_chatbot
 Opening ngrok tunnel...
 FastAPI server is live at: NgrokTunnel: "https://b5c26f371aae.ngrok-free.app" -> "http://localhost:8000"
 Mobile interface: NgrokTunnel: "https://b5c26f371aae.ngrok-free.app" -> "http://localhost:8000"/mobile.html
 Desktop interface: NgrokTunnel: "https://b5c26f371aae.ngrok-free.app" -> "http://localhost:8000"/
 API docs: NgrokTunnel: "https://b5c26f371aae.ngrok-free.app" -> "http://localhost:8000"/docs

 To stop the server, interrupt this cell (Runtime > Interrupt execution)

 Configuration Status:
   Mode: development
   MedGemma: Local HF
   Valid: True





tokenizer_config.json:   0%|          | 0.00/1.16M [00:00<?, ?B/s]

tokenizer.model:   0%|          | 0.00/4.69M [00:00<?, ?B/s]

tokenizer.json:   0%|          | 0.00/33.4M [00:00<?, ?B/s]

added_tokens.json:   0%|          | 0.00/35.0 [00:00<?, ?B/s]

special_tokens_map.json:   0%|          | 0.00/662 [00:00<?, ?B/s]

chat_template.jinja:   0%|          | 0.00/1.53k [00:00<?, ?B/s]

config.json:   0%|          | 0.00/2.47k [00:00<?, ?B/s]

`torch_dtype` is deprecated! Use `dtype` instead!
ERROR:src.services.ai.medgemma.medgemma_service: Failed to load MedGemma model: Could not import module 'validate_bnb_backend_availability'. Are this object's requirements defined correctly?
INFO:     Started server process [57486]
INFO:     Waiting for application startup.
INFO:     Application startup complete.
INFO:     Uvicorn running on http://0.0.0.0:8000 (Press CTRL+C to quit)


INFO:     73.92.81.141:0 - "GET / HTTP/1.1" 200 OK
INFO:     73.92.81.141:0 - "GET /static/css/style.css HTTP/1.1" 200 OK
INFO:     73.92.81.141:0 - "GET /static/js/main.js HTTP/1.1" 200 OK


INFO:     Shutting down
INFO:     Waiting for application shutdown.
INFO:     Application shutdown complete.
INFO:     Finished server process [57486]


 Cleaned up ngrok tunnel
