In [2]:
import requests
import json
from dotenv import load_dotenv

# Load environment variables from .env file (if any)
load_dotenv()

# LiteLLM Proxy Configuration
PROXY_URL = "http://localhost:8000"

# Example function to call the LiteLLM proxy
def call_litellm_proxy(prompt, model="gpt-4", max_tokens=100):
    """
    Send a request to the LiteLLM proxy server
    
    Args:
        prompt (str): The prompt to send to the model
        model (str): The model to use
        max_tokens (int): Maximum number of tokens to generate
        
    Returns:
        dict: The response from the model
    """
    headers = {
        "Content-Type": "application/json",
        "Authorization": "Bearer sk-1234"  # Replace with actual API key or environment variable
    }
    
    payload = {
        "model": model,
        "messages": [{"role": "user", "content": prompt}],
        "max_tokens": max_tokens
    }
    
    try:
        response = requests.post(
            f"{PROXY_URL}/v1/chat/completions",
            headers=headers,
            data=json.dumps(payload)
        )
        response.raise_for_status()
        return response.json()
    except requests.exceptions.RequestException as e:
        print(f"Error calling LiteLLM proxy: {e}")
        return {"error": str(e)}


In [6]:
prompt = "Who was the first president of the United States?"
response = call_litellm_proxy(prompt)
print("Response from LiteLLM proxy:")
print(json.dumps(response, indent=2))


Response from LiteLLM proxy:
{
  "id": "chatcmpl-BRQTLit5OarmHBRwnMDL0NWVVPKdF",
  "created": 1745876315,
  "model": "gpt-3.5-turbo-0125",
  "object": "chat.completion",
  "system_fingerprint": null,
  "choices": [
    {
      "finish_reason": "stop",
      "index": 0,
      "message": {
        "content": "The first president of the United States was George Washington.",
        "role": "assistant",
        "tool_calls": null,
        "function_call": null,
        "annotations": []
      }
    }
  ],
  "usage": {
    "completion_tokens": 12,
    "prompt_tokens": 17,
    "total_tokens": 29,
    "completion_tokens_details": {
      "accepted_prediction_tokens": 0,
      "audio_tokens": 0,
      "reasoning_tokens": 0,
      "rejected_prediction_tokens": 0
    },
    "prompt_tokens_details": {
      "audio_tokens": 0,
      "cached_tokens": 0
    }
  },
  "service_tier": "default"
}
