In [2]:
# Install required packages
!pip install fastapi uvicorn python-multipart langchain chromadb sentence-transformers pydantic python-jose python-dotenv slowapi PyPDF2 pyngrok
!pip install -U langchain-community
!pip install nest_asyncio



In [3]:
%%shell
# Download and install Ollama using the official install script
curl https://ollama.ai/install.sh | sh

# Start Ollama service in background
nohup ollama serve > ollama.log 2>&1 &

# Wait for Ollama to start
sleep 10

# Pull the model
ollama pull wizardcoder

# Verify Ollama is running
curl http://localhost:11434/api/version

  % Total    % Received % Xferd  Average Speed   Time    Time     Time  Current
                                 Dload  Upload   Total   Spent    Left  Speed
  0     0    0     0    0     0      0      0 --:--:-- --:--:-- --:--:--     0100 14703    0 14703    0     0  66887      0 --:--:-- --:--:-- --:--:-- 66831100 14703    0 14703    0     0  66858      0 --:--:-- --:--:-- --:--:-- 66831
>>> Installing ollama to /usr/local
>>> Downloading Linux amd64 bundle
############################################################################################# 100.0%
>>> Creating ollama user...
>>> Adding ollama user to video group...
>>> Adding current user to ollama group...
>>> Creating ollama systemd service...
>>> The Ollama API is now available at 127.0.0.1:11434.
>>> Install complete. Run "ollama" from the command line.
[?25lpulling manifest ⠙ [?25h[?25l[2K[1Gpulling manifest ⠹ [?25h[?25l[2K[1Gpulling manifest ⠸ [?25h[?25l[2K[1Gpulling manifest ⠼ [?25h[?25l[2K[1Gpulli



In [4]:
# Install ngrok
!pip install pyngrok
from pyngrok import ngrok



In [5]:
%%writefile app.py
import os
from typing import List, Optional, Dict
from fastapi import FastAPI, File, UploadFile, HTTPException, Depends
from fastapi.middleware.cors import CORSMiddleware
from pydantic import BaseModel
from langchain_community.llms import Ollama
from langchain.text_splitter import RecursiveCharacterTextSplitter
from langchain_community.embeddings import OllamaEmbeddings
import chromadb
from chromadb.config import Settings
import numpy as np
from slowapi import Limiter, _rate_limit_exceeded_handler
from slowapi.util import get_remote_address
from slowapi.errors import RateLimitExceeded
import asyncio
from datetime import datetime
import uuid
from fastapi import Request, Depends

# Initialize FastAPI app
app = FastAPI(
    title="Code Assistant API",
    description="API for code generation, debugging, and documentation using WizardCoder",
    version="1.0.0"
)

# Configure CORS
app.add_middleware(
    CORSMiddleware,
    allow_origins=["*"],
    allow_credentials=True,
    allow_methods=["*"],
    allow_headers=["*"],
)

# Initialize rate limiter
limiter = Limiter(key_func=get_remote_address)
app.state.limiter = limiter
app.add_exception_handler(RateLimitExceeded, _rate_limit_exceeded_handler)


# Initialize Ollama with WizardCoder and embeddings
llm = Ollama(model="wizardcoder", base_url="http://localhost:11434")
embeddings = OllamaEmbeddings(model="wizardcoder", base_url="http://localhost:11434")

COLLECTION_NAME = "documents"
PERSIST_DIRECTORY = "./chroma_db"
# Initialize ChromaDB
chroma_client = chromadb.PersistentClient(path=PERSIST_DIRECTORY)

# Text splitter for code
code_splitter = RecursiveCharacterTextSplitter(
    chunk_size=1000,
    chunk_overlap=200,
    separators=["\nclass ", "\ndef ", "\n\n", "\n", " ", ""]
)

class CodeRequest(BaseModel):
    code: str
    task: str  # 'generate', 'debug', or 'document'
    language: Optional[str] = None
    description: Optional[str] = None

class CodeResponse(BaseModel):
    result: str
    task_type: str
    collection_id: Optional[str] = None

class DebugRequest(BaseModel):
    code: str
    language: str
    include_performance_analysis: bool = False
    include_security_analysis: bool = False

class Issue(BaseModel):
    severity: str
    message: str
    line_number: Optional[int]
    suggested_fix: str

class DebugResponse(BaseModel):
    issues: List[Issue]
    fixed_code: str
    performance_analysis: Optional[str] = None
    security_analysis: Optional[str] = None

def create_collection(code: str, language: str) -> str:
    """Create a collection for the code and return collection ID"""
    collection_id = str(uuid.uuid4())
    collection = chroma_client.create_collection(name=collection_id)

    # Split code into chunks
    chunks = code_splitter.split_text(code)

    # Generate embeddings and add to collection
    for i, chunk in enumerate(chunks):
        embedding = embeddings.embed_query(chunk)
        collection.add(
            embeddings=[embedding],
            documents=[chunk],
            metadatas=[{"language": language, "chunk_index": i}],
            ids=[f"chunk_{i}"]
        )

    return collection_id

def process_code_file(file: bytes) -> str:
    """Process code file content"""
    return file.decode("utf-8")

def analyze_code(code: str, language: str) -> List[Issue]:
    """Analyze code for common issues"""
    try:
        prompt = f"""Analyze the following {language} code for issues:
        {code}
        Provide a list of issues found, including severity (error/warning/info),
        line numbers when applicable, and suggested fixes.
        Format: SEVERITY|LINE|MESSAGE|FIX"""

        analysis = llm.invoke(prompt)
        issues = []

        for line in analysis.split('\n'):
            try:
                if '|' in line:
                    parts = line.split('|')
                    if len(parts) == 4:
                        severity, line_num, message, fix = parts
                        issues.append(Issue(
                            severity=severity.lower().strip(),
                            line_number=int(line_num) if line_num.strip().isdigit() else None,
                            message=message.strip(),
                            suggested_fix=fix.strip()
                        ))
            except Exception as line_error:
                print(f"Error processing analysis line: {str(line_error)}")
                continue

        return issues

    except Exception as e:
        print(f"Code analysis error: {str(e)}")
        return []

def analyze_performance(code: str, language: str) -> str:
    """Analyze code for performance issues"""
    prompt = f"""Analyze the following {language} code for performance optimization opportunities:
    {code}
    Provide detailed performance analysis and optimization suggestions."""

    return llm.invoke(prompt)

def analyze_security(code: str, language: str) -> str:
    """Analyze code for security vulnerabilities"""
    prompt = f"""Analyze the following {language} code for security vulnerabilities:
    {code}
    Provide detailed security analysis and remediation suggestions."""

    return llm.invoke(prompt)

@app.post("/api/debug", response_model=DebugResponse)
@limiter.limit("10/minute")
async def debug_code(
    request: Request,
    debug_request: DebugRequest
):
    try:
        # Log incoming request for debugging
        print(f"Received debug request: {debug_request}")

        # Validate input code
        if not debug_request.code or not debug_request.code.strip():
            raise HTTPException(status_code=400, detail="Code cannot be empty")

        # Test LLM connection
        try:
            llm.invoke("Test connection")
        except Exception as llm_error:
            print(f"LLM connection error: {str(llm_error)}")
            raise HTTPException(status_code=500, detail="LLM service unavailable")

        # Enhanced code analysis with error handling
        try:
            issues = []
            analysis = await asyncio.get_event_loop().run_in_executor(
                None,
                lambda: analyze_code(debug_request.code, debug_request.language)
            )
            issues.extend(analysis)
        except Exception as analysis_error:
            print(f"Analysis error: {str(analysis_error)}")
            issues = []  # Continue with empty issues list

        # Generate fixed code with error handling
        try:
            fix_prompt = f"""Fix the following {debug_request.language} code addressing all identified issues:
            {debug_request.code}
            Provide the complete fixed code."""

            fixed_code = await asyncio.get_event_loop().run_in_executor(
                None,
                lambda: llm.invoke(fix_prompt)
            )
        except Exception as fix_error:
            print(f"Fix generation error: {str(fix_error)}")
            fixed_code = debug_request.code  # Return original code if fix fails

        response = DebugResponse(
            issues=issues,
            fixed_code=fixed_code
        )

        # Optional performance analysis with error handling
        if debug_request.include_performance_analysis:
            try:
                performance_analysis = await asyncio.get_event_loop().run_in_executor(
                    None,
                    lambda: analyze_performance(debug_request.code, debug_request.language)
                )
                response.performance_analysis = performance_analysis
            except Exception as perf_error:
                print(f"Performance analysis error: {str(perf_error)}")
                response.performance_analysis = "Performance analysis failed"

        # Optional security analysis with error handling
        if debug_request.include_security_analysis:
            try:
                security_analysis = await asyncio.get_event_loop().run_in_executor(
                    None,
                    lambda: analyze_security(debug_request.code, debug_request.language)
                )
                response.security_analysis = security_analysis
            except Exception as sec_error:
                print(f"Security analysis error: {str(sec_error)}")
                response.security_analysis = "Security analysis failed"

        return response

    except HTTPException as http_error:
        raise http_error
    except Exception as e:
        print(f"Debug endpoint error: {str(e)}")
        raise HTTPException(
            status_code=500,
            detail=f"Internal server error: {str(e)}"
        )

@app.post("/api/debug/file")
@limiter.limit("10/minute")
async def debug_code_file(
    request: Request,
    file: UploadFile = File(...),
    include_performance_analysis: bool = False,
    include_security_analysis: bool = False
):
    try:
        content = await file.read()
        code = process_code_file(content)
        language = file.filename.split('.')[-1]

        debug_request = DebugRequest(
            code=code,
            language=language,
            include_performance_analysis=include_performance_analysis,
            include_security_analysis=include_security_analysis
        )

        return await debug_code(request, debug_request)

    except Exception as e:
        raise HTTPException(status_code=500, detail=str(e))

@app.post("/api/code/generate", response_model=CodeResponse)
@limiter.limit("10/minute")
async def generate_code(
    request: Request,
    code_request: CodeRequest
):
    try:
        prompt = ""
        if code_request.task == "generate":
            prompt = f"""Generate code in {code_request.language} for the following requirement:
            {code_request.description}
            Provide well-structured, efficient, and documented code."""

        elif code_request.task == "debug":
            prompt = f"""Debug the following code and explain the issues found:
            {code_request.code}
            Provide the corrected code and explanation of fixes."""

        elif code_request.task == "document":
            prompt = f"""Add comprehensive documentation to the following code:
            {code_request.code}
            Include function/class documentation, parameter descriptions, and usage examples."""
        else:
            raise HTTPException(status_code=400, detail="Invalid task type")

        result = await asyncio.get_event_loop().run_in_executor(
            None,
            lambda: llm.invoke(prompt)
        )

        # Create collection for the generated/processed code
        collection_id = create_collection(
            code=result,
            language=code_request.language or "unknown"
        )

        return CodeResponse(
            result=result,
            task_type=code_request.task,
            collection_id=collection_id
        )

    except Exception as e:
        raise HTTPException(status_code=500, detail=str(e))

@app.post("/api/code/file", response_model=CodeResponse)
@limiter.limit("10/minute")
async def process_code_file_endpoint(
    request: Request,
    file: UploadFile = File(...),
    task: str = "document"
):
    try:
        content = await file.read()
        code = process_code_file(content)

        code_request = CodeRequest(
            code=code,
            task=task,
            language=file.filename.split('.')[-1]
        )

        return await generate_code(request, code_request)

    except Exception as e:
        raise HTTPException(status_code=500, detail=str(e))

@app.get("/")
async def root():
    return {
        "message": "Code Assistant API",
        "version": "1.0.0",
        "documentation": "/docs",
        "health": "/health"
    }

@app.get("/health")
async def health_check():
    return {
        "status": "healthy",
        "timestamp": datetime.now().isoformat(),
        "service": "Code Assistant API"
    }

Writing app.py


In [6]:
!ngrok config add-authtoken 2pqAryFpOn6pt3y4F8by2rV7eVl_HnmvLCipjgjzuxMiRCwb

Authtoken saved to configuration file: /root/.config/ngrok/ngrok.yml


In [None]:
import uvicorn
from pyngrok import ngrok
import asyncio
import nest_asyncio
import os

# Apply nest_asyncio to allow running async code in Jupyter
nest_asyncio.apply()

async def setup_ngrok():
    # Set up ngrok tunnel
    public_url = ngrok.connect(8000)
    print(f"Public URL: {public_url}")
    return public_url

def start_server():
    # Start FastAPI
    uvicorn.run("app:app", host="0.0.0.0", port=8000)

async def main():
    # Setup ngrok in the background
    public_url = await setup_ngrok()

    # Start the server
    start_server()

if __name__ == "__main__":
    # Run the async main function
    asyncio.run(main())

Public URL: NgrokTunnel: "https://9b7f-34-169-39-138.ngrok-free.app" -> "http://localhost:8000"


  llm = Ollama(model="wizardcoder", base_url="http://localhost:11434")
  embeddings = OllamaEmbeddings(model="wizardcoder", base_url="http://localhost:11434")
INFO:     Started server process [555]
INFO:     Waiting for application startup.
INFO:     Application startup complete.
INFO:     Uvicorn running on http://0.0.0.0:8000 (Press CTRL+C to quit)


INFO:     105.235.134.214:0 - "GET / HTTP/1.1" 200 OK
INFO:     105.235.134.214:0 - "GET /health HTTP/1.1" 200 OK
Received debug request: code='=== Generate Results ===\n\nHere\'s a C++ implementation of Dijkstra\'s algorithm with documentation:\n\n```c++\n// Graph class representing a graph using adjacency list representation\nclass Graph {\npublic:\n    // Define a structure for a node in the graph\n    struct Node {\n        int vertex;   // Vertex number of this node\n        int weight;  // Weight of the edge leading to this node from source vertex\n        Node* next; // Pointer to the next node in the adjacency list for this vertex\n    };\n\n    // Define a structure for the adjacency list representation of the graph\n    struct AdjList {\n        int vertex;   // Vertex number of this node\n        int weight;  // Weight of the edge leading to this node from source vertex\n        Node* next; // Pointer to the next node in the adjacency list for this vertex\n    };\n\n    // M