In [12]:
# Model selection
MODEL_NAME = "gemma2:9b"
%env OLLAMA_CONTEXT_LENGTH=16384
%env OLLAMA_HOST=0.0.0.0
%env OLLAMA_KEEP_ALIVE=-1


env: OLLAMA_CONTEXT_LENGTH=16384
env: OLLAMA_HOST=0.0.0.0
env: OLLAMA_KEEP_ALIVE=-1


In [13]:
!apt-get install -y lshw pciutils
!nvcc --version
!nvidia-smi

from psutil import virtual_memory
ram_gb = virtual_memory().total / 1e9
print(f"\nüß† Available RAM: {ram_gb:.1f} GB")
print("‚úÖ High-RAM runtime!" if ram_gb >= 20 else "‚ùå Not a high-RAM runtime.")


Reading package lists... Done
Building dependency tree... Done
Reading state information... Done
lshw is already the newest version (02.19.git.2021.06.19.996aaad9c7-2build1).
pciutils is already the newest version (1:3.7.0-6).
0 upgraded, 0 newly installed, 0 to remove and 45 not upgraded.
nvcc: NVIDIA (R) Cuda compiler driver
Copyright (c) 2005-2025 NVIDIA Corporation
Built on Fri_Feb_21_20:23:50_PST_2025
Cuda compilation tools, release 12.8, V12.8.93
Build cuda_12.8.r12.8/compiler.35583870_0
Fri Feb 13 11:02:26 2026       
+-----------------------------------------------------------------------------------------+
| NVIDIA-SMI 580.82.07              Driver Version: 580.82.07      CUDA Version: 13.0     |
+-----------------------------------------+------------------------+----------------------+
| GPU  Name                 Persistence-M | Bus-Id          Disp.A | Volatile Uncorr. ECC |
| Fan  Temp   Perf          Pwr:Usage/Cap |           Memory-Usage | GPU-Util  Compute M. |
|        

In [None]:
# Cell 1: Install zstd (fixes your error)
!apt-get update -qq && apt-get install -y zstd
# Cell 2: Optional: Other deps for smooth GPU/CUDA (from working guides)
!apt-get install -y pciutils
# Cell 3: Now install Ollama
!curl -fsSL https://ollama.com/install.sh | sh

W: Skipping acquire of configured file 'main/source/Sources' as repository 'https://r2u.stat.illinois.edu/ubuntu jammy InRelease' does not seem to provide it (sources.list entry misspelt?)
Reading package lists... Done
Building dependency tree... Done
Reading state information... Done
zstd is already the newest version (1.4.8+dfsg-3build1).
0 upgraded, 0 newly installed, 0 to remove and 45 not upgraded.
Reading package lists... Done
Building dependency tree... Done
Reading state information... Done
pciutils is already the newest version (1:3.7.0-6).
0 upgraded, 0 newly installed, 0 to remove and 45 not upgraded.
>>> Cleaning up old version at /usr/local/lib/ollama
>>> Installing ollama to /usr/local
>>> Downloading ollama-linux-amd64.tar.zst
########################                                                  34.3%

In [None]:
import subprocess
import time
import requests
import threading

# Start ollama serve in a background thread
def start_ollama():
    subprocess.call(['ollama', 'serve'])

ollama_thread = threading.Thread(target=start_ollama)
ollama_thread.daemon = True
ollama_thread.start()

# Pull model (this also verifies Ollama CLI is ready)
!ollama pull {MODEL_NAME}

# Wait for Ollama HTTP API to be ready
def wait_for_ollama(timeout=60):
    for i in range(timeout):
        try:
            r = requests.get("http://127.0.0.1:11434")
            if r.status_code in [200, 404]:
                print(f"‚úÖ Ollama is up (after {i+1}s).")
                return
        except requests.exceptions.ConnectionError:
            pass
        print(f"‚è≥ Waiting for Ollama to start... {i+1}s")
        time.sleep(1)
    raise RuntimeError("‚ùå Ollama did not start in time.")

wait_for_ollama()


In [None]:
!wget https://github.com/cloudflare/cloudflared/releases/latest/download/cloudflared-linux-amd64 -O cloudflared
!chmod +x cloudflared


In [None]:
import re

# Run cloudflared tunnel in background and get the public URL
cloudflared_proc = subprocess.Popen(
    ['./cloudflared', 'tunnel', '--url', 'http://localhost:11434', '--no-autoupdate'],
    stdout=subprocess.PIPE,
    stderr=subprocess.STDOUT,
    text=True
)

public_url = None
for line in cloudflared_proc.stdout:
    print(line.strip())
    match = re.search(r'(https://.*\.trycloudflare\.com)', line)
    if match:
        public_url = match.group(1)
        break

if public_url:
    print(f"\n‚úÖ Public URL for Ollama:\n{public_url}")
else:
    raise RuntimeError("‚ùå Could not find public Cloudflare URL.")


In [None]:
import json

data = {
    "model": MODEL_NAME,
    "prompt": "Question: What is the capital of Japan?\nAnswer:",
    "stream": False
}

response = requests.post(f"{public_url}/api/generate", json=data)
print(response.json())
