# Colab1: Finetune & Export to Ollama (Unsloth)

Follow Unsloth tutorial to finetune a small model and export to Ollama, then run a sample inference. Run in Colab with GPU.

In [None]:
# Install deps
!pip install -q unsloth datasets transformers accelerate peft

In [None]:
# Config
MODEL_NAME = "unsloth/smollm2-135m"
OUTPUT_DIR = "/content/unsloth-ollama"
EPOCHS = 1
BATCH_SIZE = 4
LR = 2e-4


In [None]:
# Simple toy dataset (replace with your task data)
from datasets import Dataset
texts = [
    "Explain what Ollama export does",
    "Describe how to deploy a model to Ollama",
    "Share a quick tip for using Ollama",
]
df = Dataset.from_dict({"text": texts}).train_test_split(test_size=0.33, seed=42)


In [None]:
from unsloth import FastLanguageModel
model, tokenizer = FastLanguageModel.from_pretrained(MODEL_NAME)
tokenizer.pad_token = tokenizer.eos_token


In [None]:
def preprocess(batch):
    return tokenizer(batch["text"], padding="max_length", truncation=True, max_length=256)

train_tok = df["train"].map(preprocess, batched=True)
val_tok = df["test"].map(preprocess, batched=True)


In [None]:
import torch
from transformers import Trainer, TrainingArguments

def collate_fn(batch):
    input_ids = torch.tensor([b['input_ids'] for b in batch])
    attention_mask = torch.tensor([b['attention_mask'] for b in batch])
    labels = input_ids.clone()
    labels[labels == tokenizer.pad_token_id] = -100
    return {"input_ids": input_ids, "attention_mask": attention_mask, "labels": labels}

training_args = TrainingArguments(
    output_dir=OUTPUT_DIR,
    per_device_train_batch_size=BATCH_SIZE,
    per_device_eval_batch_size=BATCH_SIZE,
    num_train_epochs=EPOCHS,
    learning_rate=LR,
    evaluation_strategy="epoch",
    save_strategy="epoch",
    logging_steps=10,
)

trainer = Trainer(
    model=model,
    args=training_args,
    train_dataset=train_tok,
    eval_dataset=val_tok,
    tokenizer=tokenizer,
    data_collator=collate_fn,
)
trainer.train()


In [None]:
# Save and export to Ollama format
trainer.save_model(OUTPUT_DIR)

# Unsloth export helper (see official docs)
from unsloth import export_ollama
export_ollama(model_dir=OUTPUT_DIR, ollama_dir=OUTPUT_DIR + "/ollama_export", model_name="smollm2-135m-ollama")


In [None]:
# Test a generation from the saved model
from transformers import pipeline
pipe = pipeline("text-generation", model=OUTPUT_DIR, tokenizer=tokenizer, device=0)
print(pipe("How do I load a model into Ollama?", max_length=60))


## Instructions
- Replace the toy dataset with your real task data.
- Run on GPU in Colab; keep epochs short for demo.
- Verify the Ollama export folder and include a note on how to import into local Ollama.
- Record a video walkthrough (training, export, sample inference); add the video link in the top cell.
