In [1]:
# Install required packages
!pip install fastapi uvicorn python-multipart langchain chromadb sentence-transformers pydantic python-jose python-dotenv slowapi PyPDF2 pyngrok
!pip install -U langchain-community
!pip install nest_asyncio

Collecting fastapi
  Downloading fastapi-0.115.6-py3-none-any.whl.metadata (27 kB)
Collecting uvicorn
  Downloading uvicorn-0.33.0-py3-none-any.whl.metadata (6.6 kB)
Collecting python-multipart
  Downloading python_multipart-0.0.19-py3-none-any.whl.metadata (1.8 kB)
Collecting chromadb
  Downloading chromadb-0.5.23-py3-none-any.whl.metadata (6.8 kB)
Collecting python-jose
  Downloading python_jose-3.3.0-py2.py3-none-any.whl.metadata (5.4 kB)
Collecting python-dotenv
  Downloading python_dotenv-1.0.1-py3-none-any.whl.metadata (23 kB)
Collecting slowapi
  Downloading slowapi-0.1.9-py3-none-any.whl.metadata (3.0 kB)
Collecting PyPDF2
  Downloading pypdf2-3.0.1-py3-none-any.whl.metadata (6.8 kB)
Collecting pyngrok
  Downloading pyngrok-7.2.2-py3-none-any.whl.metadata (8.4 kB)
Collecting starlette<0.42.0,>=0.40.0 (from fastapi)
  Downloading starlette-0.41.3-py3-none-any.whl.metadata (6.0 kB)
Collecting build>=1.0.3 (from chromadb)
  Downloading build-1.2.2.post1-py3-none-any.whl.metadata (

In [2]:
%%shell
# Download and install Ollama using the official install script
curl https://ollama.ai/install.sh | sh

# Start Ollama service in background
nohup ollama serve > ollama.log 2>&1 &

# Wait for Ollama to start
sleep 10

# Pull the model
ollama pull phi3

# Verify Ollama is running
curl http://localhost:11434/api/version

  % Total    % Received % Xferd  Average Speed   Time    Time     Time  Current
                                 Dload  Upload   Total   Spent    Left  Speed
100 13269    0 13269    0     0  37076      0 --:--:-- --:--:-- --:--:-- 37168
>>> Installing ollama to /usr/local
>>> Downloading Linux amd64 bundle
############################################################################################# 100.0%
>>> Creating ollama user...
>>> Adding ollama user to video group...
>>> Adding current user to ollama group...
>>> Creating ollama systemd service...
>>> The Ollama API is now available at 127.0.0.1:11434.
>>> Install complete. Run "ollama" from the command line.
[?25lpulling manifest ⠋ [?25h[?25l[2K[1Gpulling manifest ⠙ [?25h[?25l[2K[1Gpulling manifest ⠹ [?25h[?25l[2K[1Gpulling manifest ⠼ [?25h[?25l[2K[1Gpulling manifest ⠼ [?25h[?25l[2K[1Gpulling manifest ⠦ [?25h[?25l[2K[1Gpulling manifest ⠧ [?25h[?25l[2K[1Gpulling manifest 
pulling f890c9d2e6d4...   0% 



In [3]:
!ollama list

NAME                  ID              SIZE      MODIFIED      
wizardcoder:latest    de9d848c1323    3.8 GB    3 seconds ago    


In [4]:
# Install ngrok
!pip install pyngrok
from pyngrok import ngrok



In [None]:
%%writefile app.py
import os
from typing import List, Optional, Dict
from fastapi import FastAPI, File, UploadFile, HTTPException, Request, Depends
from fastapi.middleware.cors import CORSMiddleware
from pydantic import BaseModel
from langchain_community.llms import Ollama
from langchain.text_splitter import RecursiveCharacterTextSplitter
from langchain_community.embeddings import OllamaEmbeddings
import chromadb
from chromadb.config import Settings
from slowapi import Limiter, _rate_limit_exceeded_handler
from slowapi.util import get_remote_address
from slowapi.errors import RateLimitExceeded
import asyncio
from datetime import datetime
import uuid
import PyPDF2
import markdown

app = FastAPI(
    title="Chat Assistant API",
    description="API for document discussions and chat interactions",
    version="1.0.0",
)

app.add_middleware(
    CORSMiddleware,
    allow_origins=["*"],
    allow_credentials=True,
    allow_methods=["*"],
    allow_headers=["*"],
)

# Initialize rate limiter
limiter = Limiter(key_func=get_remote_address)
app.state.limiter = limiter
app.add_exception_handler(RateLimitExceeded, _rate_limit_exceeded_handler)

# Initialize Ollama and embeddings
llm = Ollama(model="phi3", base_url="http://localhost:11434")
embeddings = OllamaEmbeddings(model="phi3", base_url="http://localhost:11434")

PERSIST_DIRECTORY = "./chroma_db"
chroma_client = chromadb.PersistentClient(path=PERSIST_DIRECTORY)

text_splitter = RecursiveCharacterTextSplitter(
    chunk_size=1000, chunk_overlap=200, separators=["\n\n", "\n", ". ", " ", ""]
)


class ChatMessage(BaseModel):
    role: str
    content: str


class ChatRequest(BaseModel):
    messages: List[ChatMessage]
    context_id: Optional[str] = None


class ChatResponse(BaseModel):
    response: str
    context_id: Optional[str] = None


class DocumentContext(BaseModel):
    content: str
    metadata: Dict[str, str]
    context_id: str


def create_collection(content: str, doc_type: str) -> str:
    """Create a collection for the document content and return collection ID"""
    collection_id = str(uuid.uuid4())
    collection = chroma_client.create_collection(name=collection_id)

    chunks = text_splitter.split_text(content)

    for i, chunk in enumerate(chunks):
        embedding = embeddings.embed_query(chunk)
        collection.add(
            embeddings=[embedding],
            documents=[chunk],
            metadatas=[{"doc_type": doc_type, "chunk_index": i}],
            ids=[f"chunk_{i}"],
        )

    return collection_id


def process_pdf_file(file_content: bytes) -> str:
    """Extract text from PDF file"""
    pdf_reader = PyPDF2.PdfReader(io.BytesIO(file_content))
    text = ""
    for page in pdf_reader.pages:
        text += page.extract_text() + "\n"
    return text


def process_markdown_file(content: bytes) -> str:
    """Convert markdown to plain text"""
    md_content = content.decode("utf-8")
    html = markdown.markdown(md_content)
    # Simple HTML tag removal (you might want to use a proper HTML parser)
    text = html.replace("<p>", "\n").replace("</p>", "\n")
    return " ".join(text.split())


@app.post("/api/chat", response_model=ChatResponse)
@limiter.limit("30/minute")
async def chat(request: Request, chat_request: ChatRequest):
    try:
        # Format conversation history
        conversation = "\n".join(
            [f"{msg.role}: {msg.content}" for msg in chat_request.messages]
        )

        # If context_id is provided, retrieve relevant context
        context = ""
        if chat_request.context_id:
            collection = chroma_client.get_collection(name=chat_request.context_id)
            # Get last message for context search
            last_message = chat_request.messages[-1].content
            results = collection.query(
                query_embeddings=[embeddings.embed_query(last_message)], n_results=2
            )
            context = " ".join(results["documents"][0])

        prompt = f"""Context: {context}

Conversation:
{conversation}
Please proceed with the response, considering both the context and conversation history."""

        response = await asyncio.get_event_loop().run_in_executor(
            None, lambda: llm.invoke(prompt)
        )

        return ChatResponse(response=response, context_id=chat_request.context_id)

    except Exception as e:
        raise HTTPException(status_code=500, detail=str(e))


@app.post("/api/document/upload", response_model=DocumentContext)
@limiter.limit("10/minute")
async def upload_document(request: Request, file: UploadFile = File(...)):
    try:
        content = await file.read()
        file_extension = file.filename.split(".")[-1].lower()

        if file_extension == "pdf":
            text_content = process_pdf_file(content)
            doc_type = "pdf"
        elif file_extension in ["md", "markdown"]:
            text_content = process_markdown_file(content)
            doc_type = "markdown"
        elif file_extension in ["txt", "text"]:
            text_content = content.decode("utf-8")
            doc_type = "text"
        else:
            raise HTTPException(
                status_code=400,
                detail="Unsupported file format. Please upload PDF, MD, or TXT files.",
            )

        context_id = create_collection(text_content, doc_type)

        return DocumentContext(
            content=(
                text_content[:1000] + "..."
                if len(text_content) > 1000
                else text_content
            ),
            metadata={
                "filename": file.filename,
                "type": doc_type,
                "size": len(content),
            },
            context_id=context_id,
        )

    except Exception as e:
        raise HTTPException(status_code=500, detail=str(e))


@app.get("/")
async def root():
    return {
        "message": "Chat Assistant API",
        "version": "1.0.0",
        "documentation": "/docs",
        "health": "/health",
    }


@app.get("/health")
async def health_check():
    return {
        "status": "healthy",
        "timestamp": datetime.now().isoformat(),
        "service": "Chat Assistant API",
    }


In [6]:
!ngrok config add-authtoken 2pqAryFpOn6pt3y4F8by2rV7eVl_HnmvLCipjgjzuxMiRCwb

Authtoken saved to configuration file: /root/.config/ngrok/ngrok.yml


In [None]:
import uvicorn
from pyngrok import ngrok
import asyncio
import nest_asyncio
import os

# Apply nest_asyncio to allow running async code in Jupyter
nest_asyncio.apply()

async def setup_ngrok():
    # Set up ngrok tunnel
    public_url = ngrok.connect(8000)
    print(f"Public URL: {public_url}")
    return public_url

def start_server():
    # Start FastAPI
    uvicorn.run("app:app", host="0.0.0.0", port=8000)

async def main():
    # Setup ngrok in the background
    public_url = await setup_ngrok()

    # Start the server
    start_server()

if __name__ == "__main__":
    # Run the async main function
    asyncio.run(main())

Public URL: NgrokTunnel: "https://3c7f-34-32-215-65.ngrok-free.app" -> "http://localhost:8000"


  llm = Ollama(model="wizardcoder", base_url="http://localhost:11434")
  embeddings = OllamaEmbeddings(model="wizardcoder", base_url="http://localhost:11434")
INFO:     Started server process [3936]
INFO:     Waiting for application startup.
INFO:     Application startup complete.
INFO:     Uvicorn running on http://0.0.0.0:8000 (Press CTRL+C to quit)


INFO:     105.109.186.243:0 - "GET /health HTTP/1.1" 200 OK
INFO:     105.109.186.243:0 - "GET /health HTTP/1.1" 200 OK
INFO:     105.109.186.243:0 - "GET /health HTTP/1.1" 200 OK
INFO:     105.109.186.243:0 - "GET /health HTTP/1.1" 200 OK
INFO:     105.109.186.243:0 - "GET /health HTTP/1.1" 200 OK
INFO:     105.109.186.243:0 - "GET /health HTTP/1.1" 200 OK
INFO:     105.109.186.243:0 - "GET /health HTTP/1.1" 200 OK
INFO:     105.109.186.243:0 - "GET /health HTTP/1.1" 200 OK
INFO:     105.109.186.243:0 - "POST /api/code/generate HTTP/1.1" 200 OK
Received debug request: code="```python\nimport heapq\nfrom collections import defaultdict, deque\n\nclass Graph:\n    def __init__(self):\n        self.graph = defaultdict(list)\n        \n    def add_edge(self, u, v, w):\n        self.graph[u].append((v, w))\n        self.graph[v].append((u, w)\n        \n    def dijkstra(self, start, end):\n        visited = [False] * len(self.graph)\n        dist = [float('inf')] * len(self.graph)\n        