# Colab1: Continued Finetuning from Custom Checkpoint

Use Unsloth to resume/continue finetuning from a previously saved checkpoint. Run in Colab with GPU; mount your checkpoint (e.g., from Drive).

In [None]:
# Install deps
!pip install -q unsloth datasets transformers accelerate peft

In [None]:
# Config
BASE_CHECKPOINT = "/content/drive/MyDrive/checkpoints/unsloth-base"  # change to your path
OUTPUT_DIR = "/content/unsloth-continued"
EPOCHS = 1
BATCH_SIZE = 4
LR = 2e-4


In [None]:
# Load dataset (replace with your task data)
from datasets import Dataset
train_texts = [
    "Continue training example one",
    "Another example for resumed training",
    "Fine-tuning from a checkpoint",
    "Custom data row"
]
train_dataset = Dataset.from_dict({"text": train_texts}).train_test_split(test_size=0.25, seed=42)
train_dataset

In [None]:
# Load model/tokenizer from checkpoint
from unsloth import FastLanguageModel
model, tokenizer = FastLanguageModel.from_pretrained(BASE_CHECKPOINT)
tokenizer.pad_token = tokenizer.eos_token


In [None]:
# Tokenize
def preprocess(batch):
    return tokenizer(batch["text"], padding="max_length", truncation=True, max_length=256)

train_tokenized = train_dataset["train"].map(preprocess, batched=True)
val_tokenized = train_dataset["test"].map(preprocess, batched=True)


In [None]:
# Collator and trainer
import torch
from transformers import Trainer, TrainingArguments

def collate_fn(batch):
    input_ids = torch.tensor([b['input_ids'] for b in batch])
    attention_mask = torch.tensor([b['attention_mask'] for b in batch])
    labels = input_ids.clone()
    labels[labels == tokenizer.pad_token_id] = -100
    return {"input_ids": input_ids, "attention_mask": attention_mask, "labels": labels}

training_args = TrainingArguments(
    output_dir=OUTPUT_DIR,
    per_device_train_batch_size=BATCH_SIZE,
    per_device_eval_batch_size=BATCH_SIZE,
    num_train_epochs=EPOCHS,
    learning_rate=LR,
    evaluation_strategy="epoch",
    save_strategy="epoch",
    logging_steps=10,
)

trainer = Trainer(
    model=model,
    args=training_args,
    train_dataset=train_tokenized,
    eval_dataset=val_tokenized,
    tokenizer=tokenizer,
    data_collator=collate_fn,
)
trainer.train()


In [None]:
# Save continued model and test generation
trainer.save_model(OUTPUT_DIR)
from transformers import pipeline
pipe = pipeline("text-generation", model=OUTPUT_DIR, tokenizer=tokenizer, device=0)
print(pipe("Summarize the benefits of continued finetuning", max_length=60))


## Instructions
- Mount/load your actual checkpoint (Drive, GCS, etc.).
- Replace the toy dataset with your task data.
- Ensure GPU runtime in Colab.
- Record a video walkthrough (data, checkpoint load, training, sample inference).
- Save executed notebook with outputs and add the video link in the top cell.
