<a href="https://colab.research.google.com/github/withpi/cookbook-withpi/blob/main/colabs/Low_Rank_Adaptation.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

<a href="https://withpi.ai"><img src="https://withpi.ai/logoFullBlack.svg" width="240"></a>

<a href="https://code.withpi.ai"><font size="4">Documentation</font></a>

<a href="https://play.withpi.ai"><font size="4">Technique Catalog</font></a>

# Supervised Fine-tuning (SFT) with Standard Gradient Descent

This is the companion to the SFT playground

Description: Train models to more deeply learn patterns from your data.

## Install and initialize SDK

This notebook needs a T4 GPU.  Make sure to select this explicitly above before proceeding.

You'll need a WITHPI_API_KEY from https://play.withpi.ai.  Add it to your notebook secrets (the key symbol) on the left.

Run the cell below to install packages and load the SDK

In [None]:
%%capture
%pip install withpi httpx datasets tqdm

import os
import json
from pathlib import Path
from collections import defaultdict

import datasets
import httpx
from google.colab import files, userdata
from withpi import PiClient
from withpi.types import Contract

from rich.console import Console
from rich.table import Table
from rich.live import Live

console = Console()


# Load the notebook secret into the environment so the Pi Client can access it.
os.environ["WITHPI_API_KEY"] = userdata.get("WITHPI_API_KEY")
client = PiClient()


def print_contract(contract: Contract):
    """print_contract pretty-prints a contract"""
    for dimension in contract.dimensions:
        print(dimension.label)
        for sub_dimension in dimension.sub_dimensions:
            print(f"\t{sub_dimension.description}")


def print_scores(pi_scores):
    """print_scores pretty-prints a Pi Score response as a table."""
    for dimension_name, dimension_scores in pi_scores.dimension_scores.items():
        print(f"{dimension_name}: {dimension_scores.total_score}")
        for (
            subdimension_name,
            subdimension_score,
        ) in dimension_scores.subdimension_scores.items():
            print(f"\t{subdimension_name}: {subdimension_score}")
        print("\n")
    print("---------------------")
    print(f"Total score: {pi_scores.total_score}")


def save_file(filename: str, model: str):
    """save_file offers to download the model with the given filename"""
    Path(filename).write_text(model)
    files.download(filename)


def load_contract(url: str) -> Contract:
    """load_contract pulls a Contract JSON blob locally with validation."""
    resp = httpx.get(url)
    return Contract.model_validate_json(resp.content)


def generate_table(training_data: dict, is_done: bool):
    """Generate a training progress table dynamically."""
    table = Table(title="Training Status")

    # Define columns
    table.add_column("Step", justify="right", style="cyan", no_wrap=True)
    table.add_column("Epoch", justify="right", style="cyan", no_wrap=True)
    table.add_column("Learning Rate", justify="right", style="cyan", no_wrap=True)
    table.add_column("Train Loss", justify="right", style="magenta")
    table.add_column("Eval Loss", justify="right", style="green")
    table.add_column("Pi Score", justify="right", style="yellow")

    def format_num(num: float | None, digits: int = 4) -> str:
        if num is None:
            return "X"
        return format(num, f".{digits}f")

    for step, data in training_data.items():
        table.add_row(
            str(step),
            format_num(data.get("epoch", None)),
            format_num(data.get("learning_rate", None)),
            format_num(data.get("loss", None)),
            format_num(data.get("eval_loss", None)),
            format_num(data.get("contract_score", None)),
        )

    if not is_done:
        table.add_row("...", "", "", "", "", "")

    return table


def stream_response(job_id: str, method):
    """stream_response streams messages from the provided method

    method should be a Pi client object with `retrieve` and `stream_messages`
    endpoints.  This is primarily for convenience."""

    training_data = defaultdict(dict)
    is_log_console = False

    while True:
        response = method.retrieve(job_id=job_id)
        if (response.state != "QUEUED") and (response.state != "RUNNING"):
            if response.state == "DONE" and not is_log_console:
                for line in response.detailed_status:
                    try:
                        data_dict = json.loads(line)
                        training_data[data_dict["step"]].update(data_dict)
                    except Exception:
                        pass
                console.print(generate_table(training_data, is_done=True))
            return response

        with method.with_streaming_response.stream_messages(
            job_id=job_id, timeout=None
        ) as response:
            with Live(auto_refresh=True, console=console, refresh_per_second=4) as live:
                is_done = False
                for line in response.iter_lines():
                    if line == "DONE":
                        is_done = True
                    try:
                        data_dict = json.loads(line)
                        training_data[data_dict["step"]].update(data_dict)
                    except Exception:
                        pass
                    live.update(generate_table(training_data, is_done))
                    is_log_console = True

# Load a contract and dataset

We have a pre-existing contract you can play with.


In [None]:
import datasets

tldr_contract = load_contract(
    "https://raw.githubusercontent.com/withpi/cookbook-withpi/refs/heads/main/contracts/tldr.json"
)

num_examples = 200
tldr_data = datasets.load_dataset("withpi/tldr")["train"].select(range(num_examples))

print(tldr_data)

## Kick off the job

The SFT job internally performs a 90/10 train-test split, which is why the loader is not splitting the input data.

This process takes a while, please be patient as a cloud GPU is aquired, fine tuning is performed, and a result is returned.

In [None]:
status = client.model.sft.start_job(
    contract=tldr_contract,
    examples=[
        {"llm_input": row["prompt"], "llm_output": row["completion"]}
        for row in tldr_data
    ],
    base_sft_model="LLAMA_3.2_3B",
    num_train_epochs=5,
)

print("SFT model = {}".format(response.trained_models[0].model_dump_json(indent=2)))
repo_name = response.trained_models[0].hf_model_name

## Load Model

You'll need to acknowledge access to the Llama gated model on Hugging Face and generate a token. You can put this into your notebook secrets under "HF_TOKEN" and run the cell below.

This will load the model into the local GPU.  It takes a few minutes.

In [None]:
from huggingface_hub import snapshot_download
from vllm import LLM, SamplingParams
from vllm.lora.request import LoRARequest
from google.colab import userdata
import os

# Load the notebook secret into the environment so the Pi Client can access it.
os.environ["HF_TOKEN"] = userdata.get("HF_TOKEN")

adapter_path = snapshot_download(repo_id=sft_model_name)

llm = LLM(
    model="meta-llama/Llama-3.2-3B-Instruct",
    dtype="half",
    max_model_len=2048,
    enable_lora=True,
)

## Try it out!

Now lets just do some local inference with this model.

In [None]:
sampling_params = SamplingParams(temperature=0.6, max_tokens=512)

response = llm.chat(
    messages=[
        {
            "content": "SUBREDDIT: r/relationships TITLE: My boyfriend [25M] of a year and I [22F] hardly ever argue, except when we are with his family. I can't figure out why this is happening or how to stop it. Help! POST: My boyfriend and I have a great relationship. We communicate well and our conflict resolution styles seem to compliment one another. We rarely disagree on things, and when we do it's resolved with a simple conversation. Here's the problem: When we visit his parents, we argue constantly. It's really starting to bother me because I can't figure out why it's happening. I know that his parents see this and think there's no way we are happy together (because his mom has told me). His parents tend to bicker fairly often, but that shouldn't impact our communication, should it? It doesn't matter if it's just his parents, or if it's the whole extended family. We just seem to argue so much more in their presence. The only exception being when it's just us with his brother and SIL. I've also noticed that they (brother and SIL) seem to be holding back from arguing with one another while we're with the whole family, and they are not like this when it's just the 4 of us. I don't know if it's the stress of seeing his family - I know that they don't *love* us together (probably because we're always arguing!). Or if the family's communication style impacts us. Or if it's just that I'm stressed and little things bother me, and we can't have our simple resolving conversations while we're there. Any ideas what could be causing it? Or how we can stop it from happening? TL;DR:",
            "role": "user",
        },
    ],
    sampling_params=sampling_params,
    lora_request=LoRARequest("adapter", 1, adapter_path),
    use_tqdm=False,
)

print(response[0].outputs[0].text)

## Next steps

You can load this model onto more powerful hardware than the T4 included in Colab or your own inference provider.