# GPU API on Google Colab with FastAPI & ngrok

This notebook sets up a FastAPI server on Colab's GPU runtime that:
- Accepts image/video uploads via POST `/generate`
- Processes files using GPU (placeholder for 3D generation)
- Saves results to Google Drive
- Returns download URLs
- Exposes the API publicly via ngrok

## 1️⃣ Install & Imports

In [None]:
# Install required packages
!pip -q install fastapi uvicorn pyngrok nest-asyncio python-multipart

In [None]:
import os
import shutil
import asyncio
import nest_asyncio
from pathlib import Path
from datetime import datetime
from contextlib import asynccontextmanager

from fastapi import FastAPI, UploadFile, File, HTTPException, Header
from fastapi.responses import JSONResponse
import uvicorn
from pyngrok import ngrok
from google.colab import drive

# Enable nested event loops in Jupyter
nest_asyncio.apply()

## 2️⃣ API Setup

In [None]:
# Configuration
NGROK_AUTHTOKEN = "REPLACE_ME"  # Get from https://dashboard.ngrok.com/auth
API_KEY = "foo"  # Change this to a secure key in production
UPLOAD_DIR = Path("/tmp/uploads")
OUTPUT_DIR = Path("/content/drive/MyDrive/gpu_api_outputs")

# Create upload directory
UPLOAD_DIR.mkdir(exist_ok=True)

# Set ngrok auth token
ngrok.set_auth_token(NGROK_AUTHTOKEN)

In [None]:
# Mount Google Drive
drive.mount('/content/drive')

# Create output directory in Drive
OUTPUT_DIR.mkdir(parents=True, exist_ok=True)
print(f"Output directory created at: {OUTPUT_DIR}")

In [None]:
@asynccontextmanager
async def lifespan(app: FastAPI):
    # Startup
    print("Starting API server...")
    yield
    # Shutdown
    print("Shutting down API server...")

# Create FastAPI app
app = FastAPI(
    title="GPU Processing API",
    description="Process images/videos on Colab GPU",
    version="1.0.0",
    lifespan=lifespan
)

## 3️⃣ Upload → Process (Dummy GPU Processing)

In [None]:
def verify_api_key(api_key: str = Header(None)):
    """Verify API key for basic authentication"""
    if api_key != API_KEY:
        raise HTTPException(status_code=401, detail="Invalid API key")
    return api_key


def process_with_gpu(input_path: Path, output_path: Path):
    """
    Placeholder for GPU processing.
    Replace this with your actual 3D generation code.
    """
    import torch
    
    # Check if GPU is available
    if torch.cuda.is_available():
        print(f"GPU available: {torch.cuda.get_device_name(0)}")
        print(f"GPU memory: {torch.cuda.get_device_properties(0).total_memory / 1e9:.2f} GB")
    else:
        print("WARNING: No GPU available, running on CPU")
    
    # Placeholder: Copy input to output with timestamp
    # Replace this with actual GPU processing
    shutil.copy(input_path, output_path)
    
    # Simulate processing time
    import time
    time.sleep(2)
    
    print(f"Processed {input_path.name} -> {output_path.name}")

## 4️⃣ Google Drive Mount & Save

In [None]:
@app.post("/generate")
async def generate(file: UploadFile = File(...), api_key: str = Header(None)):
    """Process uploaded file and return download URL"""
    
    # Verify API key
    verify_api_key(api_key)
    
    # Validate file
    if not file.filename:
        raise HTTPException(status_code=400, detail="No file provided")
    
    # Generate unique filenames
    timestamp = datetime.now().strftime("%Y%m%d_%H%M%S")
    input_filename = f"input_{timestamp}_{file.filename}"
    output_filename = f"output_{timestamp}_{file.filename}"
    
    input_path = UPLOAD_DIR / input_filename
    output_path = OUTPUT_DIR / output_filename
    
    try:
        # Save uploaded file
        with open(input_path, "wb") as f:
            content = await file.read()
            f.write(content)
        
        # Process with GPU
        process_with_gpu(input_path, output_path)
        
        # Generate shareable link (simplified for demo)
        # In production, use Google Drive API for proper sharing
        download_url = f"/content/drive/MyDrive/gpu_api_outputs/{output_filename}"
        
        # Clean up input file
        input_path.unlink()
        
        return JSONResponse(
            content={
                "status": "success",
                "download_url": download_url,
                "filename": output_filename,
                "processed_at": datetime.now().isoformat()
            }
        )
        
    except Exception as e:
        # Clean up on error
        if input_path.exists():
            input_path.unlink()
        raise HTTPException(status_code=500, detail=str(e))


@app.get("/")
async def root():
    """API health check"""
    return {
        "status": "online",
        "message": "GPU Processing API is running",
        "endpoints": ["/", "/generate"]
    }

## 5️⃣ Print Public URL & Keepalive

In [ ]:
# Start the server and create ngrok tunnel
import threading

# Start uvicorn in a separate thread
def run_server():
    uvicorn.run(app, host="0.0.0.0", port=8000)

# Start server thread
server_thread = threading.Thread(target=run_server, daemon=True)
server_thread.start()

# Wait for server to start
import time
time.sleep(3)

# Create ngrok tunnel
tunnel = ngrok.connect(8000)
public_url = tunnel.public_url
print("\n" + "="*50)
print(f"🚀 API is live at: {public_url}")
print("="*50)
print(f"\nTest with:")
print(f"export PUBLIC_URL='{public_url}'")
print(f"curl -X POST $PUBLIC_URL/generate -H 'API_KEY: {API_KEY}' -F 'file=@sample.mp4'")
print("\n" + "="*50)

In [None]:
# Keep the notebook running
print("\n⏰ Server is running. Keep this cell executing to maintain the connection.")
print("Press 'Stop' to shutdown the server.\n")

try:
    while True:
        time.sleep(60)
        print(f"[{datetime.now().strftime('%H:%M:%S')}] Server alive at: {public_url}")
except KeyboardInterrupt:
    print("\nShutting down server...")
    ngrok.disconnect(public_url)
    ngrok.kill()