## Install required packages

In [None]:
!pip install fastapi uvicorn pydantic httpx slowapi torch ollama pyngrok python-multipart

Collecting fastapi
  Downloading fastapi-0.115.6-py3-none-any.whl.metadata (27 kB)
Collecting uvicorn
  Downloading uvicorn-0.32.1-py3-none-any.whl.metadata (6.6 kB)
Collecting slowapi
  Downloading slowapi-0.1.9-py3-none-any.whl.metadata (3.0 kB)
Collecting ollama
  Downloading ollama-0.4.4-py3-none-any.whl.metadata (4.7 kB)
Collecting pyngrok
  Downloading pyngrok-7.2.1-py3-none-any.whl.metadata (8.3 kB)
Collecting python-multipart
  Downloading python_multipart-0.0.19-py3-none-any.whl.metadata (1.8 kB)
Collecting starlette<0.42.0,>=0.40.0 (from fastapi)
  Downloading starlette-0.41.3-py3-none-any.whl.metadata (6.0 kB)
Collecting limits>=2.3 (from slowapi)
  Downloading limits-3.14.1-py3-none-any.whl.metadata (7.2 kB)
Collecting httpx
  Downloading httpx-0.27.2-py3-none-any.whl.metadata (7.1 kB)
Downloading fastapi-0.115.6-py3-none-any.whl (94 kB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m94.8/94.8 kB[0m [31m5.6 MB/s[0m eta [36m0:00:00[0m
[?25hDownloading uv

In [None]:
# Install Ollama
!curl https://ollama.ai/install.sh | sh

# Install required packages first
!pip install ollama pyngrok

import subprocess
import multiprocessing
import time
import os

def run_ollama_server():
    subprocess.run(['ollama', 'serve'], check=True)

# Start the Ollama server in a separate process
server_process = multiprocessing.Process(target=run_ollama_server)
server_process.start()

# Wait for the server to initialize
print("Waiting for Ollama server to start...")
time.sleep(10)

# Verify if the server is running by checking the API endpoint
import requests
try:
    response = requests.get('http://127.0.0.1:11434/api/version')
    if response.status_code == 200:
        print("Ollama server is running successfully!")
except requests.exceptions.ConnectionError:
    print("Failed to connect to Ollama server")
    server_process.terminate()

  % Total    % Received % Xferd  Average Speed   Time    Time     Time  Current
                                 Dload  Upload   Total   Spent    Left  Speed
  0     0    0     0    0     0      0      0 --:--:-- --:--:-- --:--:--     0100 14703    0 14703    0     0  66086      0 --:--:-- --:--:-- --:--:-- 66229
>>> Installing ollama to /usr/local
>>> Downloading Linux amd64 bundle
############################################################################################# 100.0%
>>> Adding ollama user to video group...
>>> Adding current user to ollama group...
>>> Creating ollama systemd service...
>>> The Ollama API is now available at 127.0.0.1:11434.
>>> Install complete. Run "ollama" from the command line.
Waiting for Ollama server to start...
Ollama server is running successfully!


In [None]:
# Pull the WizardCoder model
!ollama pull wizardcoder

[?25lpulling manifest ⠋ [?25h[?25l[2K[1Gpulling manifest ⠙ [?25h[?25l[2K[1Gpulling manifest ⠹ [?25h[?25l[2K[1Gpulling manifest ⠸ [?25h[?25l[2K[1Gpulling manifest ⠼ [?25h[?25l[2K[1Gpulling manifest ⠦ [?25h[?25l[2K[1Gpulling manifest ⠦ [?25h[?25l[2K[1Gpulling manifest ⠧ [?25h[?25l[2K[1Gpulling manifest ⠇ [?25h[?25l[2K[1Gpulling manifest ⠏ [?25h[?25l[2K[1Gpulling manifest ⠋ [?25h[?25l[2K[1Gpulling manifest ⠙ [?25h[?25l[2K[1Gpulling manifest ⠸ [?25h[?25l[2K[1Gpulling manifest ⠸ [?25h[?25l[2K[1Gpulling manifest ⠼ [?25h[?25l[2K[1Gpulling manifest ⠴ [?25h[?25l[2K[1Gpulling manifest ⠦ [?25h[?25l[2K[1Gpulling manifest ⠧ [?25h[?25l[2K[1Gpulling manifest ⠇ [?25h[?25l[2K[1Gpulling manifest 
pulling f890c9d2e6d4...   0% ▕▏    0 B/3.8 GB                  [?25h[?25l[2K[1G[A[2K[1Gpulling manifest 
pulling f890c9d2e6d4...   0% ▕▏    0 B/3.8 GB                  [?25h[?25l[2K[1G[A[2K[1Gpulling manifest 
pulling f89

In [None]:
!ollama list

NAME                  ID              SIZE      MODIFIED       
wizardcoder:latest    de9d848c1323    3.8 GB    33 seconds ago    


## Install and authenticate ngrok

In [None]:
!pip install pyngrok
from pyngrok import ngrok
!ngrok authtoken "2pqAryFpOn6pt3y4F8by2rV7eVl_HnmvLCipjgjzuxMiRCwb"  # Replace with your ngrok auth token

Authtoken saved to configuration file: /root/.config/ngrok/ngrok.yml


## Create and run the FastAPI service

In [None]:
# Import necessary libraries
from fastapi import FastAPI, HTTPException, Depends, Request
from fastapi.middleware.cors import CORSMiddleware
from fastapi.responses import JSONResponse
from pydantic import BaseModel, Field
from typing import Optional, Dict, Any
import ollama
from ollama import AsyncClient
import asyncio
import time
from slowapi import Limiter, _rate_limit_exceeded_handler
from slowapi.util import get_remote_address
from slowapi.errors import RateLimitExceeded
import logging
import threading
import uvicorn
import torch
import nest_asyncio
from fastapi.responses import JSONResponse
from slowapi.util import get_remote_address
# Apply nest_asyncio to make async work in Jupyter
nest_asyncio.apply()

# Configure logging
logging.basicConfig(
    level=logging.INFO,
    format='%(asctime)s - %(name)s - %(levelname)s - %(message)s'
)
logger = logging.getLogger(__name__)

# Initialize rate limiter
limiter = Limiter(key_func=get_remote_address)

app = FastAPI(
    title="Code Generation and Documentation API",
    description="API service for code generation, documentation, and explanation using WizardCoder",
    version="1.0.0"
)

# Add CORS middleware
app.add_middleware(
    CORSMiddleware,
    allow_origins=["*"],
    allow_credentials=True,
    allow_methods=["*"],
    allow_headers=["*"],
)

# Add rate limiter to FastAPI app
app.state.limiter = limiter
app.add_exception_handler(RateLimitExceeded, _rate_limit_exceeded_handler)

# Request/Response Models
class CodeGenerationRequest(BaseModel):
    prompt: str = Field(..., description="Natural language description of the code to generate")
    language: str = Field(default="python", description="Target programming language")
    temperature: float = Field(default=0.7, ge=0, le=1, description="Generation temperature")

class DocumentationRequest(BaseModel):
    code: str = Field(..., description="Code to generate documentation for")
    language: str = Field(default="python", description="Programming language of the code")
    style: str = Field(default="google", description="Documentation style (google, numpy, etc.)")

class ExplanationRequest(BaseModel):
    code: str = Field(..., description="Code to explain")
    detail_level: str = Field(default="medium", description="Explanation detail level (basic, medium, detailed)")

class ApiResponse(BaseModel):
    success: bool
    data: Dict[str, Any]
    error: Optional[str] = None

# API Endpoints
@app.post("/generate-code", response_model=ApiResponse)
# @limiter.limit("10/minute")  # Applies rate limiting
async def generate_code(
    request: CodeGenerationRequest,
    req: Request  # Remove Depends(), FastAPI injects this automatically
):
    try:
        prompt = f"""Generate {request.language} code for the following requirement:
        {request.prompt}

        Please provide clean, well-structured, and efficient code."""

        # Call the WizardCoder API
        response = await AsyncClient().chat(
            model='wizardcoder',
            messages=[{'role': 'user', 'content': prompt}]
        )

        return ApiResponse(success=True, data={
            "generated_code": response.message.content,
            "language": request.language
        })

    except Exception as e:
        logger.error(f"Error in code generation: {str(e)}")
        return JSONResponse(
            status_code=500,
            content={"success": False, "error": str(e)}
        )

@app.post("/generate-documentation", response_model=ApiResponse)
# @limiter.limit("10/minute")
async def generate_documentation(
    request: DocumentationRequest,
    req: Request  # Use Request directly
):
    try:
        prompt = f"""Generate detailed documentation for the following {request.language} code using {request.style} style:

        {request.code}

        Please include:
        1. Function/class purpose
        2. Parameters description
        3. Return values
        4. Usage examples"""

        response = await AsyncClient().chat(
            model='wizardcoder',
            messages=[{'role': 'user', 'content': prompt}]
        )

        return ApiResponse(success=True, data={
            "documentation": response.message.content,
            "style": request.style,
            "language": request.language
        })

    except Exception as e:
        logger.error(f"Error in documentation generation: {str(e)}")
        return JSONResponse(
            status_code=500,
            content={"success": False, "error": str(e)}
        )



@app.post("/explain-code", response_model=ApiResponse)
# @limiter.limit("10/minute")
async def explain_code(
    request: ExplanationRequest,
    req: Request  # Use Request directly
):
    try:
        prompt = f"""Explain the following code with {request.detail_level} level of detail:

        {request.code}

        Please provide:
        1. Overall purpose
        2. Line-by-line explanation
        3. Key concepts used
        4. Potential improvements"""

        response = await AsyncClient().chat(
            model='wizardcoder',
            messages=[{'role': 'user', 'content': prompt}]
        )

        return ApiResponse(success=True, data={
            "explanation": response.message.content,
            "detail_level": request.detail_level
        })

    except Exception as e:
        logger.error(f"Error in code explanation: {str(e)}")
        return JSONResponse(
            status_code=500,
            content={"success": False, "error": str(e)}
        )


@app.get("/health")
async def health_check():
    return {
        "status": "healthy",
        "gpu_available": torch.cuda.is_available()
    }

# Function to run the FastAPI server
def run_fastapi(port):
    uvicorn.run(app, host="0.0.0.0", port=port)

# Start the server in a separate thread
port = 8000
server_thread = threading.Thread(target=run_fastapi, args=(port,))
server_thread.daemon = True
server_thread.start()

# Setup ngrok tunnel
ngrok.set_auth_token("2pqAryFpOn6pt3y4F8by2rV7eVl_HnmvLCipjgjzuxMiRCwb")  # Replace with your ngrok auth token
public_url = ngrok.connect(port)
print(f"Public URL: {public_url}")


# Keep the notebook running
try:
    while True:
        time.sleep(1)
except KeyboardInterrupt:
    print("Shutting down server...")
    ngrok.kill()

ERROR:asyncio:Task exception was never retrieved
future: <Task finished name='Task-39' coro=<Server.serve() done, defined at /usr/local/lib/python3.10/dist-packages/uvicorn/server.py:67> exception=SystemExit(1)>
Traceback (most recent call last):
  File "/usr/local/lib/python3.10/dist-packages/uvicorn/server.py", line 162, in startup
    server = await loop.create_server(
  File "/usr/lib/python3.10/asyncio/base_events.py", line 1519, in create_server
    raise OSError(err.errno, 'error while attempting '
OSError: [Errno 98] error while attempting to bind on address ('0.0.0.0', 8000): address already in use

During handling of the above exception, another exception occurred:

Traceback (most recent call last):
  File "/usr/lib/python3.10/threading.py", line 1016, in _bootstrap_inner
    self.run()
  File "/usr/lib/python3.10/threading.py", line 953, in run
    self._target(*self._args, **self._kwargs)
  File "<ipython-input-20-5a1926a14b65>", line 185, in run_fastapi
    uvicorn.run(ap

Public URL: NgrokTunnel: "https://123e-34-82-112-188.ngrok-free.app" -> "http://localhost:8000"
INFO:     105.235.136.48:0 - "GET / HTTP/1.1" 404 Not Found
INFO:     105.235.136.48:0 - "GET /health HTTP/1.1" 200 OK
INFO:     105.235.136.48:0 - "POST /generate-code HTTP/1.1" 500 Internal Server Error


ERROR:    Exception in ASGI application
Traceback (most recent call last):
  File "/usr/local/lib/python3.10/dist-packages/uvicorn/protocols/http/h11_impl.py", line 403, in run_asgi
    result = await app(  # type: ignore[func-returns-value]
  File "/usr/local/lib/python3.10/dist-packages/uvicorn/middleware/proxy_headers.py", line 60, in __call__
    return await self.app(scope, receive, send)
  File "/usr/local/lib/python3.10/dist-packages/fastapi/applications.py", line 1054, in __call__
    await super().__call__(scope, receive, send)
  File "/usr/local/lib/python3.10/dist-packages/starlette/applications.py", line 113, in __call__
    await self.middleware_stack(scope, receive, send)
  File "/usr/local/lib/python3.10/dist-packages/starlette/middleware/errors.py", line 187, in __call__
    raise exc
  File "/usr/local/lib/python3.10/dist-packages/starlette/middleware/errors.py", line 165, in __call__
    await self.app(scope, receive, _send)
  File "/usr/local/lib/python3.10/dist-pack

Shutting down server...


## other tests

In [None]:
from fastapi import FastAPI, HTTPException, Depends, Request
from fastapi.middleware.cors import CORSMiddleware
from fastapi.responses import JSONResponse
from pydantic import BaseModel, Field
from typing import Optional, Dict, Any
import ollama
from ollama import AsyncClient
import asyncio
import time
from slowapi import Limiter
from slowapi.util import get_remote_address
import logging
import threading
import uvicorn
import torch
from pyngrok import ngrok
import nest_asyncio

nest_asyncio.apply()

# Modified rate limiter implementation
limiter = Limiter(key_func=get_remote_address)

app = FastAPI(title="Code Generation API")

app.add_middleware(
    CORSMiddleware,
    allow_origins=["*"],
    allow_credentials=True,
    allow_methods=["*"],
    allow_headers=["*"],
)

class CodeGenerationRequest(BaseModel):
    prompt: str = Field(..., description="Natural language description of the code to generate")
    language: str = Field(default="python", description="Target programming language")

class ApiResponse(BaseModel):
    success: bool
    data: Dict[str, Any]
    error: Optional[str] = None

@app.post("/generate-code")
async def generate_code(request: CodeGenerationRequest):
    try:
        prompt = f"""Generate {request.language} code for the following requirement:
        {request.prompt}

        Please provide clean, well-structured, and efficient code."""

        response = await AsyncClient().chat(
            model='wizardcoder',
            messages=[{'role': 'user', 'content': prompt}]
        )

        return ApiResponse(
            success=True,
            data={
                "generated_code": response.message.content,
                "language": request.language
            }
        )

    except Exception as e:
        return ApiResponse(success=False, data={}, error=str(e))

@app.get("/health")
async def health_check():
    return {
        "status": "healthy",
        "gpu_available": torch.cuda.is_available()
    }

# Function to run the FastAPI server
def run_fastapi(port):
    uvicorn.run(app, host="0.0.0.0", port=port)

# Start the server in a separate thread
port = 8000
server_thread = threading.Thread(target=run_fastapi, args=(port,))
server_thread.daemon = True
server_thread.start()

# Setup ngrok tunnel
ngrok.set_auth_token("2pqAryFpOn6pt3y4F8by2rV7eVl_HnmvLCipjgjzuxMiRCwb")  # Replace with your ngrok auth token
public_url = ngrok.connect(port)
print(f"Public URL: {public_url}")

## Run the FastAPI server with ngrok

## Test the API

In [None]:
import requests

ngrok_url = "YOUR_NGROK_URL"  # Replace with the URL printed by the code
test_prompt = {
    "prompt": "Create a function that sorts a list using bubble sort",
    "language": "python"
}

response = requests.post(f"{ngrok_url}/generate-code", json=test_prompt)
print(response.json())

## Cleanup (run this when you're done)

In [None]:
ngrok.kill()  # Stop ngrok process