<a href="https://colab.research.google.com/github/usernamehelloworld/18/blob/main/ollama_setup.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
# Cell 1: Install Dependencies (Run once per session)
print("Installing necessary packages...")
!pip install colab-xterm gradio requests --quiet
%load_ext colabxterm
print("Packages installed and Xterm ready.")

In [None]:
# Cell 2: Download Config/Setup Files from GitHub (Run once per session)
# Purpose: Downloads the Modelfile and the setup script from your GitHub repo.

# !!! IMPORTANT: EDIT THIS LINE WITH YOUR GITHUB DETAILS !!!
GITHUB_USERNAME="usernamehelloworld" # Replace with your GitHub username
GITHUB_REPONAME="UC"                 # Replace with your GitHub repository name
GITHUB_BRANCH="main"               # Replace with your branch name if not 'main'
# Construct the URL for raw file access
GITHUB_REPO_URL = f"https://raw.githubusercontent.com/{GITHUB_USERNAME}/{GITHUB_REPONAME}/{GITHUB_BRANCH}/"

MODELFILE_NAME = "IncreaseContext.Modelfile"
SETUP_SCRIPT_NAME = "setup_ollama.sh"

print(f"Using GitHub URL: {GITHUB_REPO_URL}")

print(f"Downloading {MODELFILE_NAME}...")
!curl -# -L -O {GITHUB_REPO_URL}{MODELFILE_NAME}

print(f"Downloading {SETUP_SCRIPT_NAME}...")
!curl -# -L -O {GITHUB_REPO_URL}{SETUP_SCRIPT_NAME}

print(f"Making {SETUP_SCRIPT_NAME} executable...")
!chmod +x {SETUP_SCRIPT_NAME}

print("\n--- Checking downloaded files ---")
!ls -l {MODELFILE_NAME} {SETUP_SCRIPT_NAME}
print("---------------------------------")

print(f"\nFiles downloaded. Ready for Ollama Server Setup in %xterm using ./{SETUP_SCRIPT_NAME}")
print("NOTE: Pull any additional models you want to use via 'ollama pull model_name' in the first xterm after setup.")

In [None]:
# Cell 3: Open First %xterm for Ollama Server Setup (Run once per session)
# Purpose: Opens the terminal where you will start the Ollama server.

print("----------------------------------------------------------------------")
print(">>> Action Needed: Start Ollama Server <<<")
print("1. A terminal window (xterm) will open below.")
print(f"2. Click inside the xterm window and type the command: ./setup_ollama.sh")
print("3. Press Enter to run the script.")
print("4. Wait for the script to complete (installs Ollama, starts server, creates model).")
print("5. OPTIONAL: Pull additional models using 'ollama pull model_name' (e.g., ollama pull llama3).")
print("6. IMPORTANT: Leave this xterm window OPEN. It keeps the Ollama server running.")
print("----------------------------------------------------------------------")
%xterm

In [None]:
# Cell 4: Define Gradio App with Model Selection (Run once per session, AFTER server is running)
# Purpose: Defines the Gradio UI and the Python functions to interact with Ollama.

import gradio as gr
import requests
import json
import time

OLLAMA_API_BASE_URL = "http://127.0.0.1:11434" # Base URL for Ollama API
DEFAULT_CUSTOM_MODEL = "artifish/llama3.2-uncensored-8k:latest" # The tag created by setup_ollama.sh

# --- Helper Function to Get Available Ollama Models ---
def get_ollama_models():
    """Fetches the list of locally available Ollama models via the API."""
    models = []
    default_model = None
    placeholder_message = "Server Down / No Models Found"
    try:
        print("Attempting to fetch models from Ollama server...")
        response = requests.get(f"{OLLAMA_API_BASE_URL}/api/tags", timeout=5) # Short timeout for check
        response.raise_for_status()
        data = response.json()
        if "models" in data and data["models"]:
            models = sorted([m["name"] for m in data["models"]]) # Sort alphabetically
            print(f"Found Ollama models: {models}")
            # Set default model
            if DEFAULT_CUSTOM_MODEL in models:
                 default_model = DEFAULT_CUSTOM_MODEL
            elif models:
                 default_model = models[0] # Fallback to first model
            else:
                 default_model = placeholder_message # Should not happen if models list has items
        else:
             print("Ollama server running, but no models found/pulled.")
             models = [placeholder_message]
             default_model = placeholder_message

    except requests.exceptions.ConnectionError:
        print("ERROR: Ollama server not reachable at {OLLAMA_API_BASE_URL}. Cannot get model list.")
        gr.Warning("Ollama server not running! Cannot fetch models. Please ensure Ollama is running (check the first xterm).")
        models = [placeholder_message]
        default_model = placeholder_message
    except Exception as e:
        print(f"Error fetching Ollama models: {e}")
        gr.Warning(f"Error fetching Ollama models: {e}")
        models = [placeholder_message]
        default_model = placeholder_message
    return models, default_model

# --- Modified Chat Function ---
def ollama_chat_dynamic(model_name, message, history):
    """ Talks to the selected Ollama model via API, streaming response. """
    if not model_name or model_name == "Server Down / No Models Found":
        yield "[Error: Please select a valid model from the dropdown and ensure the server is running.]"
        return

    print(f"Sending request to model: {model_name}")
    messages = []
    for pair in history:
        # Ensure history entries are valid before appending
        if isinstance(pair, (list, tuple)) and len(pair) == 2:
             messages.append({"role": "user", "content": str(pair[0]) if pair[0] is not None else ""})
             messages.append({"role": "assistant", "content": str(pair[1]) if pair[1] is not None else ""})
        else:
             print(f"Skipping invalid history entry: {pair}")

    messages.append({"role": "user", "content": message})

    payload = {
        "model": model_name,
        "messages": messages,
        "stream": True
    }

    full_response = ""
    try:
        # Use a session object for potential connection reuse
        with requests.Session() as s:
            with s.post(f"{OLLAMA_API_BASE_URL}/api/chat", json=payload, stream=True, timeout=300) as response: # 5 min timeout
                response.raise_for_status()
                print("Connection established, receiving stream...")
                for line in response.iter_lines():
                    if line:
                        try:
                            data = json.loads(line.decode('utf-8'))
                            if data.get("message") and data["message"].get("content"):
                                chunk = data["message"]["content"]
                                full_response += chunk
                                yield full_response # Stream intermediate results
                            if data.get("done"):
                                print(f"Stream finished. Reason: {data.get('done_reason', 'N/A')}")
                                break
                        except json.JSONDecodeError:
                            print(f"Warning: Could not decode JSON line: {line}")
                        except Exception as e:
                             print(f"Error processing stream line: {e}")
                             yield f"[Error processing stream chunk: {e}]" # Send error to UI
                             return # Stop streaming on error
                if not full_response:
                     yield "[Received empty response from model]"

    except requests.exceptions.Timeout:
         print("ERROR: Request timed out.")
         yield "[Error: Request to Ollama timed out. The model might be taking too long.]"
    except requests.exceptions.ConnectionError as e:
        print(f"ERROR connecting to Ollama: {e}")
        yield f"[Error: Could not connect to Ollama server at {OLLAMA_API_BASE_URL}. Is it running?]"
    except requests.exceptions.RequestException as e:
         print(f"ERROR during Ollama request: {e}")
         yield f"[Error during Ollama request: {e}]"
    except Exception as e:
        print(f"An unexpected error occurred in ollama_chat_dynamic: {e}")
        import traceback
        traceback.print_exc() # Print full traceback for debugging
        yield f"[An unexpected error occurred: {e}]"


# --- Build Gradio Interface using Blocks ---
print("Defining Gradio Interface...")

with gr.Blocks(theme=gr.themes.Soft()) as demo:
    gr.Markdown("# Ollama Chat Interface")
    gr.Markdown("Select a model you have pulled locally via Ollama. Ensure the Ollama server is running (check the first xterm).")

    with gr.Row():
        # Fetch models when the UI is built
        available_models_list, initial_default = get_ollama_models()
        model_selector = gr.Dropdown(
            label="Select Ollama Model",
            choices=available_models_list,
            value=initial_default,
            interactive=True # Allow user interaction
        )
        refresh_button = gr.Button("🔄 Refresh Models")

    chatbot = gr.Chatbot(label="Chat History", height=550, show_copy_button=True)
    msg = gr.Textbox(label="Your Message", placeholder="Type your message and press Enter...", scale=7) # Take more space
    submit_btn = gr.Button("Send", scale=1) # Explicit submit button
    clear = gr.ClearButton([msg, chatbot], value="🗑️ Clear Chat") # Use ClearButton

    # Function to update the dropdown choices
    def update_model_list():
        print("Refreshing model list via button...")
        models, default = get_ollama_models()
        # Update the dropdown component directly
        return gr.Dropdown(choices=models, value=default)

    # Link refresh button to update function
    refresh_button.click(update_model_list, inputs=None, outputs=model_selector)

    # Function to handle message submission (generator version for streaming)
    def respond(model_name, message, chat_history):
        if not message: # Do nothing if message is empty
            return "", chat_history
        if not model_name or model_name == "Server Down / No Models Found":
             gr.Warning("Please select a valid model or refresh the list.")
             # Need to yield something to update UI state correctly
             yield "", chat_history
             return # Stop processing

        # Append user message optimistically
        chat_history.append([message, None]) # Add user message with None placeholder for bot response
        # Yield immediately to show user message
        yield "", chat_history

        # Start streaming bot response
        stream = ollama_chat_dynamic(model_name, message, chat_history[:-1]) # Pass history BEFORE adding the current user msg
        bot_response = ""
        for chunk in stream:
            bot_response = chunk # Keep updating with the latest full response
            chat_history[-1][1] = bot_response # Update the None placeholder
            yield "", chat_history # Yield cleared input, updated history

    # Link message submission (Enter key in Textbox OR Submit button click)
    msg.submit(respond, [model_selector, msg, chatbot], [msg, chatbot])
    submit_btn.click(respond, [model_selector, msg, chatbot], [msg, chatbot])

print("Gradio Blocks defined.")

In [None]:
# Cell 5: Launch Gradio Interface (Run once per session, AFTER server is running)
# Purpose: Starts the Gradio web UI.

print("Launching Gradio Chat Interface...")
print("This might take a minute. Please wait for the public URL.")

# Launch the Gradio Blocks interface
demo.launch(share=True, debug=True) # share=True provides a public link, debug=True shows logs

print("Gradio app launched. Click the public URL link above to open the chat interface in a new tab.")
print("Remember to keep the first xterm window (running the Ollama server) open!")