# Colab1: Mental Health Chatbot Finetune (Unsloth)

Adapted from phi3/Unsloth tutorials for a mental health chatbot. Run in Colab with GPU; use a safe, curated dataset.

In [None]:
# Install deps
!pip install -q unsloth datasets transformers accelerate peft

In [None]:
# Config
MODEL_NAME = "unsloth/smollm2-135m"
OUTPUT_DIR = "/content/unsloth-mental-health"
EPOCHS = 1
BATCH_SIZE = 4
LR = 2e-4


## Load/prepare mental health QA data
Replace this toy data with your curated mental health support dataset.

In [None]:
from datasets import Dataset
pairs = [
    {"input": "I feel anxious lately.", "response": "I'm sorry you're feeling anxious. Can you tell me more about what's causing it?"},
    {"input": "I can't sleep.", "response": "Sleep issues can be tough. Have you tried a wind-down routine?"},
    {"input": "I'm stressed about work.", "response": "Work stress is common. Let's break down what's most pressing."},
    {"input": "I feel overwhelmed.", "response": "It can help to take small steps and practice self-care. What helps you relax?"},
]

def format_example(ex):
    return f"### Instruction:
{ex['input']}

### Response:
{ex['response']}"

df = Dataset.from_list(pairs)
df = df.map(lambda ex: {"text": format_example(ex)})
split = df.train_test_split(test_size=0.25, seed=42)
split

In [None]:
from unsloth import FastLanguageModel
model, tokenizer = FastLanguageModel.from_pretrained(MODEL_NAME)
tokenizer.pad_token = tokenizer.eos_token


In [None]:
MAX_LEN = 256

def preprocess(batch):
    return tokenizer(batch["text"], padding="max_length", truncation=True, max_length=MAX_LEN)

train_tok = split["train"].map(preprocess, batched=True)
val_tok = split["test"].map(preprocess, batched=True)


In [None]:
import torch
from transformers import Trainer, TrainingArguments

def collate_fn(batch):
    input_ids = torch.tensor([b['input_ids'] for b in batch])
    attention_mask = torch.tensor([b['attention_mask'] for b in batch])
    labels = input_ids.clone()
    labels[labels == tokenizer.pad_token_id] = -100
    return {"input_ids": input_ids, "attention_mask": attention_mask, "labels": labels}

training_args = TrainingArguments(
    output_dir=OUTPUT_DIR,
    per_device_train_batch_size=BATCH_SIZE,
    per_device_eval_batch_size=BATCH_SIZE,
    num_train_epochs=EPOCHS,
    learning_rate=LR,
    evaluation_strategy="epoch",
    save_strategy="epoch",
    logging_steps=10,
)

trainer = Trainer(
    model=model,
    args=training_args,
    train_dataset=train_tok,
    eval_dataset=val_tok,
    tokenizer=tokenizer,
    data_collator=collate_fn,
)
trainer.train()


In [None]:
# Save and test
trainer.save_model(OUTPUT_DIR)
from transformers import pipeline
pipe = pipeline("text-generation", model=OUTPUT_DIR, tokenizer=tokenizer, device=0)
print(pipe("I feel anxious about my exams.", max_length=80))


## Instructions
- Replace the toy dataset with a vetted mental health support dataset.
- Run on GPU in Colab; keep epochs/time limits low for demo.
- Record a video walkthrough (data prep, training, sample responses).
- Save executed notebook with outputs; add video link in the top cell.
