# Diagnose Ollama 404 Error

This notebook helps diagnose why you're getting a 404 error when calling the Ollama API.

## What the Error Means

A 404 error means "Not Found" - the server couldn't find what you asked for. This could mean:
1. The model doesn't exist in your Ollama installation
2. Ollama server is not running or not accessible
3. The API endpoint path is wrong (unlikely if you're using the latest Ollama)
4. The model name format is incorrect

Let's run some diagnostic tests to figure out what's wrong.


In [None]:
# Import necessary libraries
import sys
from pathlib import Path
import requests
import json

# Add the src directory to Python path so we can import our modules
project_root = Path().resolve().parent
sys.path.insert(0, str(project_root))

from src.config import Config

print("‚úÖ Imports successful!")


## Test 1: Check if Ollama Server is Running

First, let's verify that Ollama is running and accessible.


In [None]:
# Test if Ollama server is running
# We'll try to access the /api/tags endpoint which lists available models
base_url = "http://localhost:11434"
test_url = f"{base_url}/api/tags"

print(f"Testing Ollama connection at: {base_url}")
print(f"Endpoint: {test_url}")
print()

try:
    # Make a GET request to the tags endpoint
    # GET is used because we're just asking for information, not sending data
    response = requests.get(test_url, timeout=5)
    
    # Check the HTTP status code
    # 200 means success, anything else is an error
    if response.status_code == 200:
        print("‚úÖ Ollama server is running and accessible!")
        print()
        
        # Parse the JSON response to see what models are available
        data = response.json()
        models = data.get('models', [])
        
        print(f"Found {len(models)} available models:")
        for i, model in enumerate(models, 1):
            model_name = model.get('name', 'Unknown')
            print(f"  {i}. {model_name}")
    else:
        print(f"‚ùå Ollama returned status code: {response.status_code}")
        print(f"Response: {response.text}")
        
except requests.exceptions.ConnectionError:
    print("‚ùå Could not connect to Ollama server!")
    print(f"   Make sure Ollama is running at {base_url}")
    print("   Start it with: ollama serve")
except requests.exceptions.Timeout:
    print("‚ùå Connection to Ollama timed out!")
    print("   The server might be overloaded or not responding")
except Exception as e:
    print(f"‚ùå Error: {type(e).__name__}: {str(e)}")


## Test 2: Check if the Chat API Endpoint Exists

Now let's test if the `/api/chat` endpoint is available. This is the endpoint your code uses for text generation.


In [None]:
# Test the /api/chat endpoint
# This is the endpoint used by the generate() function in ollama_client.py
chat_url = f"{base_url}/api/chat"

print(f"Testing chat endpoint: {chat_url}")
print()

# First, let's get a list of available models to test with
try:
    tags_response = requests.get(f"{base_url}/api/tags", timeout=5)
    if tags_response.status_code == 200:
        models_data = tags_response.json()
        available_models = [m.get('name') for m in models_data.get('models', [])]
        
        if available_models:
            # Use the first available model for testing
            test_model = available_models[0]
            print(f"Using model '{test_model}' for testing")
            print()
            
            # Prepare a test request
            # This matches the format used in ollama_client.py
            test_payload = {
                "model": test_model,
                "messages": [
                    {
                        "role": "user",
                        "content": "Say hello"
                    }
                ],
                "stream": False  # Don't stream, get full response at once
            }
            
            print(f"Sending test request to {chat_url}")
            print(f"Payload: {json.dumps(test_payload, indent=2)}")
            print()
            
            # Send the request
            chat_response = requests.post(chat_url, json=test_payload, timeout=30)
            
            # Check the response
            if chat_response.status_code == 200:
                print("‚úÖ Chat endpoint is working!")
                result = chat_response.json()
                print(f"Response: {result.get('message', {}).get('content', 'No content')[:100]}...")
            elif chat_response.status_code == 404:
                print("‚ùå 404 Not Found - The chat endpoint doesn't exist!")
                print("   This might mean:")
                print("   1. Your Ollama version is too old (upgrade with: ollama update)")
                print("   2. The endpoint path is wrong")
                print(f"   Response: {chat_response.text[:200]}")
            else:
                print(f"‚ùå Got status code: {chat_response.status_code}")
                print(f"Response: {chat_response.text[:200]}")
        else:
            print("‚ö†Ô∏è  No models found in Ollama. You need to pull a model first.")
            print("   Example: ollama pull gemma3:1b")
    else:
        print(f"‚ùå Could not get model list (status {tags_response.status_code})")
        
except Exception as e:
    print(f"‚ùå Error testing chat endpoint: {type(e).__name__}: {str(e)}")


In [None]:
# Check if the specific model from your experiment exists
# The experiment name shows "gengemma3_1b" but the actual model name should be "gemma3:1b"
experiment_model_name = "gemma3:1b"  # This is what should be in the config

print(f"Checking if model '{experiment_model_name}' exists...")
print()

try:
    # Get list of available models
    tags_response = requests.get(f"{base_url}/api/tags", timeout=5)
    if tags_response.status_code == 200:
        models_data = tags_response.json()
        available_models = [m.get('name') for m in models_data.get('models', [])]
        
        # Check if our model is in the list
        if experiment_model_name in available_models:
            print(f"‚úÖ Model '{experiment_model_name}' is available!")
            
            # Try to use it
            print(f"\nTesting the model with a simple request...")
            test_payload = {
                "model": experiment_model_name,
                "messages": [{"role": "user", "content": "test"}],
                "stream": False
            }
            
            test_response = requests.post(chat_url, json=test_payload, timeout=30)
            
            if test_response.status_code == 200:
                print("‚úÖ Model works correctly!")
            else:
                print(f"‚ùå Model exists but request failed with status {test_response.status_code}")
                print(f"Response: {test_response.text[:200]}")
        else:
            print(f"‚ùå Model '{experiment_model_name}' is NOT available!")
            print(f"\nAvailable models are:")
            for model in available_models:
                print(f"  - {model}")
            print(f"\nüí° To install the model, run: ollama pull {experiment_model_name}")
            
            # Check for similar model names
            similar = [m for m in available_models if 'gemma' in m.lower() or '1b' in m.lower()]
            if similar:
                print(f"\nSimilar models found:")
                for model in similar:
                    print(f"  - {model}")
    else:
        print(f"‚ùå Could not get model list")
        
except Exception as e:
    print(f"‚ùå Error: {type(e).__name__}: {str(e)}")


## Test 4: Test with the Actual Code

Let's test using the actual `generate()` function from your codebase to see what error message we get now (with improved error handling).


In [None]:
# Import the generate function from ollama_client
from src.ollama_client import generate

# Test with the actual function
test_model = "gemma3:1b"  # The model from your experiment
test_prompt = "Say hello in one word"

print(f"Testing generate() function with:")
print(f"  Model: {test_model}")
print(f"  Prompt: {test_prompt}")
print(f"  Base URL: {base_url}")
print()

try:
    # Call the generate function
    # This is the same function that failed in your experiment
    response_text, metadata = generate(
        prompt=test_prompt,
        model=test_model,
        base_url=base_url,
        stream=False
    )
    
    print("‚úÖ generate() function worked!")
    print(f"Response: {response_text}")
    print(f"Metadata: {json.dumps(metadata, indent=2)}")
    
except requests.exceptions.HTTPError as e:
    print(f"‚ùå HTTP Error occurred:")
    print(f"   {str(e)}")
    print()
    print("This is the same type of error you saw in your experiment.")
    print("The improved error handling should now show more details about what went wrong.")
    
except ConnectionError as e:
    print(f"‚ùå Connection Error:")
    print(f"   {str(e)}")
    
except Exception as e:
    print(f"‚ùå Unexpected error: {type(e).__name__}: {str(e)}")
