In [None]:
!nvidia-smi

### Login to Truefoundry (to save metrics, checkpoints and models!)
You only need to do it once

In [None]:
import os

# This should point to your Truefoundry platform endpoint
TRUEFOUNDRY_HOST = os.getenv("TFY_HOST", "https://<your-org>.truefoundry.cloud")

import mlfoundry
mlfoundry.login(TRUEFOUNDRY_HOST)

## LLM Finetuning

### Prepare data

Data needs to be in `jsonl` format with each line containing a json encoded string containing two keys `prompt` and `completion`

```jsonl
{"prompt": "What is 2 + 2?", "completion": "The answer to 2 + 2 is 4"}
{"prompt": "Flip a coin", "completion": "I flipped a coin and the result is heads!"}
{"prompt": "<prompt text>", "completion": "<ideal generated text>"}
...
```

Once you have your data on `.jsonl` files, you can upload them to the file tree on the left and change the `train_data` and `eval_data` variables in the `Data Parameters` section

![Upload Data](./assets/upload-data.png)

---
In case you don't have data prepared, run the next cell to fetch the [Stanford Alpaca Dataset](https://crfm.stanford.edu/2023/03/13/alpaca.html) (https://huggingface.co/datasets/tatsu-lab/alpaca) to use it as example

In [None]:
%%bash
wget --progress=bar:force:noscroll https://assets.production.truefoundry.com/standford_alpaca_train_49k.jsonl -O standford_alpaca_train_49k.jsonl
wget --progress=bar:force:noscroll https://assets.production.truefoundry.com/standford_alpaca_test_2k.jsonl -O standford_alpaca_test_2k.jsonl

In [None]:
%%bash
head -2 standford_alpaca_train_49k.jsonl

### Data Parameters

In [3]:
# URI to training data. Can be a file on disk or an mlfoundry artifact fqn
train_data = "./standford_alpaca_train_49k.jsonl"

# URI to evaluation data. Can be a file on disk or an mlfoundry artifact fqn. 
# Set to "NA" if you want to split from train data
eval_data = "./standford_alpaca_test_2k.jsonl"

# When eval_data is set to "NA", use this portion of the train_data to use as eval
eval_size = 0.1

# How many samples to use for training. 0 means all data. Useful to test quickly
max_num_samples = 0

if max_num_samples != 0:
    print(f"Note: Only first {max_num_samples} data points will be used. This is okay for quick testing. To use all data points please set `max_num_samples` to 0")

Note: Only first 100 data points will be used. This is okay for quick testing. To use all data points please set `max_num_samples` to 0


### Preconfigured Parameters
This section loads the default parameters configured when deploying the notebook such as the model id, batch size, etc

In [None]:
from utils import load_launch_parameters

launch_parameters = load_launch_parameters("/mnt/llm-finetune/finetune-config.json")

### Select a Model

In [None]:
import os
import torch

# Huggingface hub model id to finetune e.g. "stas/tiny-random-llama-2"
# If you created this notebook instance from Truefoundry's Model Catalogue, the model id will be set in `launch_parameters`
model_id = launch_parameters.model_id

if not model_id:
    print('Warning! Variable `model_id` is not set. Please set it to some valid Huggingface hub model. E.g model_id = "stas/tiny-random-llama-2"')

### QLoRa Configuration

In [None]:
# Enable LoRa with Quantization
use_qlora = True

# If you want to disable quantization, set `use_qlora` to False and set `use_lora` to True
use_lora = False

# qlora r. Increasing this will increase GPU memory requirement and training time but can give better results
lora_r = 32

# qlora alpha
lora_alpha = max(16, 2 * lora_r)


if use_qlora and use_lora:
    raise ValueError("Both `use_qlora` and `use_lora` cannot be True at the same time!")

### Hyperparameters

In [None]:
# Where to dump checkpoints and model
output_dir = "./model"

# If to delete `output_dir` before starting
cleanup_output_dir_on_start = False

# Max Sequence Length. 
# Increasing this will allow longer sequences but will significantly increase GPU memory requirement and training time.
# This cannot be greater than model's max sequence length
max_length = launch_parameters.max_length

# Max batch size per GPU. 
# Increasing this will increase GPU memory requirement and training time
per_device_train_batch_size = launch_parameters.batch_size

# Learning rate
learning_rate = 0.00003

# How many epochs to run training for
num_train_epochs = 10

# How often to evaluate. Value less than 1 denotes every X% of total run
eval_steps = 0.05

# How often to save checkpoints. Value less than 1 denotes every X% of total run
save_steps = 0.05

### Experiment Tracking

In [None]:
from mlfoundry_utils import generate_run_name, get_or_create_run

# Enable reporting metrics to mlfoundry
mlfoundry_enable_reporting = True

# Which ML Repo to log metrics and checkpoints to. 
# You can create new ML Repos from the https://<your-org>.truefoundry.cloud/mlfoundry page
# Docs: https://docs.truefoundry.com/docs/key-concepts#creating-a-ml-repo
mlfoundry_ml_repo = "llm-finetuning"

# If to upload checkpoints to ML Repo when they are saved
mlfoundry_log_checkpoints = True

# Run to which metrics and checkpoints will be logged
mlfoundry_run_name = generate_run_name(model_id)

# If to upload checkpoints to ML Repo when they are saved
mlfoundry_checkpoint_artifact_name = f"ckpt-{mlfoundry_run_name}"


if mlfoundry_enable_reporting:
    print(f"Checkpoints will be logged with name {mlfoundry_checkpoint_artifact_name}")
    get_or_create_run(
        ml_repo=mlfoundry_ml_repo,
        run_name=mlfoundry_run_name,
        auto_end=False,
        create_ml_repo=True
    )
    print("You can click on the above link to track metrics and checkpoints")

In [None]:
def _launch_tensorboard():
    import os
    from urllib.parse import urljoin
    from tensorboard import notebook

    tb_logs = os.path.join(".", "tensorboard_logs")
    os.makedirs(tb_logs, exist_ok=True)
    os.environ["TENSORBOARD_PROXY_URL"] = urljoin(os.getenv("NB_PREFIX", "/"), "proxy/%PORT%/")
    notebook.start(f"--logdir {tb_logs} --reload_interval 30.0 --reload_multifile True")

if not mlfoundry_enable_reporting:
    _launch_tensorboard()

### Start Finetuning!

In [None]:
# Mixed Precision Training. We automatically select the precision based on GPU capability
mixed_precision = "bf16" if torch.cuda.is_bf16_supported() else "fp16"
bf16 = (mixed_precision == "bf16")
fp16 = (mixed_precision == "fp16")

COMMAND = f"""
accelerate launch \
--mixed_precision {mixed_precision} \
--use_deepspeed \
train.py \
--deepspeed ./deepspeed_configs/3_ds_z2_config.json \
--bf16 {bf16} \
--fp16 {fp16} \
--model_id {model_id} \
--output_dir {output_dir} \
--train_data {train_data} \
--eval_data {eval_data} \
--eval_size {eval_size} \
--max_num_samples {max_num_samples} \
--train_on_prompt False \
--max_length {max_length} \
--use_qlora {use_qlora} \
--use_lora {use_lora} \
--qlora_bit_length 4 \
--lora_target_modules auto \
--lora_r {lora_r} \
--lora_alpha {lora_alpha} \
--lora_dropout 0.05 \
--lora_bias none \
--num_train_epochs {num_train_epochs} \
--early_stopping_patience 10 \
--early_stopping_threshold 0.0 \
--auto_find_batch_size false \
--per_device_train_batch_size {per_device_train_batch_size} \
--per_device_eval_batch_size {per_device_train_batch_size} \
--gradient_accumulation_steps 4 \
--learning_rate {learning_rate} \
--logging_strategy steps \
--logging_steps 5 \
--evaluation_strategy steps \
--eval_steps {eval_steps} \
--save_strategy steps \
--save_steps {save_steps} \
--mlfoundry_enable_reporting {mlfoundry_enable_reporting} \
--mlfoundry_ml_repo {mlfoundry_ml_repo} \
--mlfoundry_run_name {mlfoundry_run_name} \
--mlfoundry_checkpoint_artifact_name {mlfoundry_checkpoint_artifact_name} \
--mlfoundry_log_checkpoints {mlfoundry_log_checkpoints} \
--cleanup_output_dir_on_start False \
--resume_from_checkpoint True \
| tee train.log
"""

print(f"Command to run: {COMMAND}")

In [None]:
!{COMMAND} 