### Check Setup

In [None]:
!nvidia-smi

In [None]:
import torch

print("GPU Available:", torch.cuda.is_available())
print("#GPU Devices:", torch.cuda.device_count())

import bitsandbytes as _
import flash_attn as _

### Login to Truefoundry (to save metrics, checkpoints and models!)
You only need to do it once

In [None]:
import os

# This should point to your Truefoundry platform endpoint
TRUEFOUNDRY_HOST = os.getenv("TFY_HOST", "https://<your-org>.truefoundry.cloud")

import mlfoundry
mlfoundry.login(TRUEFOUNDRY_HOST)

### LLM Finetuning

#### Prepare data

Data needs to be in `jsonl` format with each line containing a json encoded string containing two keys `prompt` and `completion`

```jsonl
{"prompt": "What is 2 + 2?", "completion": "The answer to 2 + 2 is 4"}
{"prompt": "Flip a coin", "completion": "I flipped a coin and the result is heads!"}
{"prompt": "<prompt text>", "completion": "<ideal generated text>"}
...
```

In case you don't have data prepared, run the next cell to fetch the Stanford Alpaca dataset to use it as example

In [None]:
%%bash
wget --progress=bar:force:noscroll https://assets.production.truefoundry.com/standford_alpaca_52k.jsonl -O standford_alpaca_52k.jsonl

In [None]:
%%bash
head -2 standford_alpaca_52k.jsonl

In [None]:
import os
import torch

In [None]:
# Huggingface hub model id to finetune e.g. "stas/tiny-random-llama-2"
# If you created this notebook instance from Truefoundry's Model Catalogue, the model id will be set in the env
model_id = os.getenv("TFY_FINETUNE_MODEL_ID")

# URI to training data. Can be a file on disk or an mlfoundry artifact fqn
train_data = "./standford_alpaca_52k.jsonl"

# URI to evaluation data. Can be a file on disk or an mlfoundry artifact fqn. 
eval_data = "NA"

# When eval_data is set to "NA", use this portion of the train_data to use as eval
eval_size = 0.1

# How many samples to use for training. 0 means all data. Useful to test quickly
max_num_samples = 100

if not model_id:
    print(
        """Warning! Variable `model_id` is not set. Please set it to some valid Huggingface hub model.
        E.g model_id = "stas/tiny-random-llama-2"
        """
    )

In [None]:
print("Model to finetune:", model_id)
print("Train Data Location:", train_data)
print("Eval Data Location:", eval_data)

In [None]:
## Lora Configuration

# Enable LoRa with Quantization
use_qlora = True

# If you want to disable quantization, set `use_qlora` to False and set `use_lora` to True
use_lora = False

# Which modules to target for qlora. "auto" targets all linear layers (execpt embeddings, lm_head). To customize give a json encoded list
lora_target_modules = "auto"

# qlora r. Increasing this will increase GPU memory requirement and training time but can give better results
lora_r = 32

# qlora alpha
lora_alpha = max(16, 2 * lora_r)

# qlora dropout
lora_dropout = 0.05

# qlora bias
lora_bias = "none"


if use_qlora and use_lora:
    raise ValueError("Both `use_qlora` and `use_lora` cannot be True at the same time!")

In [None]:
## Training Arguments

# Where to dump checkpoints and model
output_dir = "./model"

# If to delete `output_dir` before starting
cleanup_output_dir_on_start = False

# If to also train on prompt. Setting this to True will increase GPU memory requirement and training time
train_on_prompt = False

# Max batch size per GPU. Increasing this will increase GPU memory requirement and training time
per_device_train_batch_size = 1

# Learning rate
learning_rate = 0.0003

# How many epochs to run training for
num_train_epochs = 5

# How many eval steps to wait for the eval loss to improve before stopping
early_stopping_patience = 10

# How much the eval loss should improve at least to not count towards early stopping
early_stopping_threshold = "0.0"

# If to resume from checkpoints when available
resume_from_checkpoint = True

# Gradient accumulation steps
gradient_accumulation_steps = 4

# Mixed Precision Training. We automatically select the precision based on GPU capability
mixed_precision = "bf16" if torch.cuda.is_bf16_supported() else "fp16"
bf16 = (mixed_precision == "bf16")
fp16 = (mixed_precision == "fp16")

In [None]:
# How often to log metrics. Value less than 1 denotes every X% of total run
logging_steps = 5

# How often to evaluate. Value less than 1 denotes every X% of total run
eval_steps = 0.05

# How often to save checkpoints. Value less than 1 denotes every X% of total run
save_steps = 0.05

In [None]:
from mlfoundry_utils import generate_run_name, get_or_create_run

# Enable reporting metrics to mlfoundry
mlfoundry_enable_reporting = True

# Which ML Repo to log metrics and checkpoints to. 
# You can create new ML Repos from the https://<your-org>.truefoundry.cloud/mlfoundry page
# Docs: https://docs.truefoundry.com/docs/key-concepts#creating-a-ml-repo
mlfoundry_ml_repo = "llm-finetuning"

# If to upload checkpoints to ML Repo when they are saved
mlfoundry_log_checkpoints = True

# Run to which metrics and checkpoints will be logged
mlfoundry_run_name = generate_run_name(model_id)

# If to upload checkpoints to ML Repo when they are saved
mlfoundry_checkpoint_artifact_name = f"ckpt-{mlfoundry_run_name}"


if mlfoundry_enable_reporting:
    print(f"Checkpoints will be logged with name {mlfoundry_checkpoint_artifact_name}")
    get_or_create_run(
        ml_repo=mlfoundry_ml_repo,
        run_name=mlfoundry_run_name,
        auto_end=False,
        create_ml_repo=True
    )
    print(f"You can click on the above link to track metrics and checkpoints")

In [None]:
if not mlfoundry_enable_reporting:
    import os
    from urllib.parse import urljoin
    from tensorboard import notebook

    tb_logs = os.path.join(output_dir, "runs")
    os.makedirs(tb_logs, exist_ok=True)
    os.environ["TENSORBOARD_PROXY_URL"] = urljoin(os.getenv("NB_PREFIX", "/"), "proxy/%PORT%/")
    notebook.start(f"--logdir {tb_logs} --reload_interval 30.0 --reload_multifile True")

In [None]:
COMMAND = f"""
accelerate launch \
--mixed_precision {mixed_precision} \
train.py \
--use_ddp true \
--output_dir {output_dir} \
--model_id {model_id} \
--train_data {train_data} \
--eval_data {eval_data} \
--eval_size {eval_size} \
--max_num_samples {max_num_samples} \
--bf16 {bf16} \
--fp16 {fp16} \
--num_train_epochs {num_train_epochs} \
--per_device_train_batch_size {per_device_train_batch_size} \
--per_device_eval_batch_size {per_device_train_batch_size} \
--gradient_accumulation_steps {gradient_accumulation_steps} \
--learning_rate {learning_rate} \
--train_on_prompt {train_on_prompt} \
--logging_strategy steps \
--logging_steps {logging_steps} \
--evaluation_strategy steps \
--eval_steps {eval_steps} \
--save_strategy steps \
--save_steps {save_steps} \
--use_qlora {use_qlora} \
--use_lora {use_lora} \
--qlora_bit_length 4 \
--lora_target_modules {lora_target_modules} \
--lora_r {lora_r} \
--lora_alpha {lora_alpha} \
--lora_dropout {lora_dropout} \
--lora_bias {lora_bias} \
--mlfoundry_enable_reporting {mlfoundry_enable_reporting} \
--mlfoundry_ml_repo {mlfoundry_ml_repo} \
--mlfoundry_run_name {mlfoundry_run_name} \
--mlfoundry_checkpoint_artifact_name {mlfoundry_checkpoint_artifact_name} \
--mlfoundry_log_checkpoints {mlfoundry_log_checkpoints} \
--cleanup_output_dir_on_start {cleanup_output_dir_on_start}
"""

print(f"Command to run: {COMMAND}")

In [None]:
!{COMMAND}