In [None]:
!nvidia-smi

### Login to Truefoundry (to save metrics, checkpoints and models!)
You only need to do it once

In [None]:
import os

# This should point to your Truefoundry platform endpoint
TRUEFOUNDRY_HOST = os.getenv("TFY_HOST", "https://<your-org>.truefoundry.cloud")

import truefoundry
truefoundry.login(TRUEFOUNDRY_HOST)

## LLM Finetuning

### Prepare data

We support two different data formats:

### `Chat`

Data needs to be in `jsonl` format with each line containing a whole conversation in OpenAI chat format i.e. each line contains a key called `messages`. Each `messages` key contains a list of messages, where each message is a dictionary with `role` and `content` keys. The `role` key can be either `user`, `assistant` or `system` and the `content` key contains the message content.

```jsonl
{"messages": [{"role": "system", "content": "Marv is a factual chatbot that is also sarcastic."}, {"role": "user", "content": "What's the capital of France?"}, {"role": "assistant", "content": "Paris"}, {"role": "user", "content": "Can you be more sarcastic?"}, {"role": "assistant", "content": "Paris, as if everyone doesn't know that already."}]}
{"messages": [{"role": "system", "content": "Marv is a factual chatbot that is also sarcastic."}, {"role": "user", "content": "Who wrote 'Romeo and Juliet'?"}, {"role": "assistant", "content": "William Shakespeare"}, {"role": "user", "content": "Can you be more sarcastic?"}, {"role": "assistant", "content": "Oh, just some guy named William Shakespeare. Ever heard of him?"}]}
{"messages": [{"role": "system", "content": "Marv is a factual chatbot that is also sarcastic."}, {"role": "user", "content": "How far is the Moon from Earth?"}, {"role": "assistant", "content": "384,400 kilometers"}, {"role": "user", "content": "Can you be more sarcastic?"}, {"role": "assistant", "content": "Around 384,400 kilometers. Give or take a few, like that really matters."}]}
...
```


Reference: https://platform.openai.com/docs/guides/fine-tuning/preparing-your-dataset


### `Completion`
Data needs to be in `jsonl` format with each line containing a json encoded string containing two keys `prompt` and `completion`.

```jsonl
{"prompt": "What is 2 + 2?", "completion": "The answer to 2 + 2 is 4"}
{"prompt": "Flip a coin", "completion": "I flipped a coin and the result is heads!"}
{"prompt": "<prompt text>", "completion": "<ideal generated text>"}
...
```

Reference: https://platform.openai.com/docs/guides/fine-tuning/preparing-your-dataset

### Uploading data to notebook

Once you have your data on `.jsonl` files, you can upload them to the file tree on the left and change the `train_data_uri` and `eval_data_uri` variables in the `Data Parameters` section

![Upload Data](./assets/upload-data.png)

---

In case you don't have data prepared, run the next cell to fetch the [Chat Alpaca English Dataset](https://github.com/cascip/ChatAlpaca)

In [None]:
%%bash
# Chat Type Data
wget --progress=bar:force:noscroll https://assets.production.truefoundry.com/chatalpaca-openai-10k.jsonl -O chatalpaca-openai-10k.jsonl

In [None]:
%%bash
head -2 chatalpaca-openai-10k.jsonl

In [None]:
# Completion Type Data: https://huggingface.co/datasets/tatsu-lab/alpaca
# wget --progress=bar:force:noscroll https://assets.production.truefoundry.com/standford_alpaca_train_49k.jsonl -O standford_alpaca_train_49k.jsonl
# wget --progress=bar:force:noscroll https://assets.production.truefoundry.com/standford_alpaca_test_2k.jsonl -O standford_alpaca_test_2k.jsonl
# head -2 standford_alpaca_train_49k.jsonl

### Data Parameters

In [None]:
from typing import List, Dict, Optional, Any
from data_utils import DatasetType

# Type of dataset - Either `completion` or `chat`
dataset_type = DatasetType.chat.value

# URI to training data. Can be a file on disk or an mlfoundry artifact fqn
train_data_uri: str = "./chatalpaca-openai-10k.jsonl"

# URI to evaluation data. Can be a file on disk or an mlfoundry artifact fqn. 
# Set to "None" if you want to split from train data
eval_data_uri: Optional[str] = None

# When eval_data is set to `None`, use this portion of the train_data to use as eval
eval_size = 0.1

# If your dataset is small (< 50 examples), set this to False
sample_packing = True

# How many steps to use for training. None means all data. Useful to test quickly
max_steps: Optional[int] = None

if max_steps is not None:
    print(f"Note: max_steps is set, this might not use the entire training data. This is okay for quick testing. To use all data points please set `max_steps` to `None`")

### Preconfigured Parameters
This section loads the default parameters configured when deploying the notebook such as the model id, batch size, etc

In [None]:
from utils import load_launch_parameters

launch_parameters = load_launch_parameters("/mnt/llm-finetune/finetune-config.json")

### Select a Model

In [None]:
# Huggingface hub model id to finetune e.g. "stas/tiny-random-llama-2"
# If you created this notebook instance from Truefoundry's Model Catalogue, the model id will be set in `launch_parameters`
model_id = launch_parameters.model_id

if not model_id:
    print('Warning! Variable `model_id` is not set. Please set it to some valid Huggingface hub model. E.g model_id = "stas/tiny-random-llama-2"')

### QLoRa Configuration

In [None]:
adapter = "qlora"

# lora r. Increasing this will increase GPU memory requirement and training time but can give better results
lora_r = 32

# lora alpha
lora_alpha = max(16, 2 * lora_r)

# Whether to apply Lora to all linear layers
lora_target_linear = True

#  The names of the modules to apply Lora to. These will be added to modules found by `lora_target_linear` if that is enabled
lora_target_modules: Optional[List[str]] = None

### Hyperparameters

In [None]:
# Where to dump checkpoints and model
output_dir = "./outputs"

# If to delete `output_dir` before starting
cleanup_output_dir_on_start = False

# Max Sequence Length. 
# Increasing this will allow longer sequences but will significantly increase GPU memory requirement and training time.
# This cannot be greater than model's max sequence length
max_sequence_length = launch_parameters.max_length

# If to drop sequences that are longer than max_sequence_length
# error ->  will raise an error that are longer than max_sequence_length
# truncate -> will truncate sequences that are longer than max_sequence_length
# drop -> will drop sequences that are longer than max_sequence_length
long_sequences_strategy = "error"

# Batch size per GPU. 
# Increasing this will increase GPU memory requirement and training time
micro_batch_size = launch_parameters.batch_size

# Learning rate
learning_rate = 0.0002

# How many epochs to run training for
num_epochs = 10

# How often to evaluate. Value less than 1 denotes every X% of total run
eval_steps = 0.1

# How often to save checkpoints. Value less than 1 denotes every X% of total run
save_steps = 0.1

### Experiment Tracking

In [None]:
import os
from mlfoundry_utils import generate_run_name, get_or_create_run

# Enable reporting metrics to mlfoundry
truefoundry_ml_enable_reporting = True

# Which ML Repo to log metrics and checkpoints to. 
# You can create new ML Repos from the https://<your-org>.truefoundry.cloud/mlfoundry page
# Docs: https://docs.truefoundry.com/docs/key-concepts#creating-a-ml-repo
truefoundry_ml_repo = "llm-finetuning"

# If to upload checkpoints to ML Repo when they are saved
truefoundry_ml_log_checkpoints = True

# Run to which metrics and checkpoints will be logged
truefoundry_ml_run_name = generate_run_name(model_id, seed=os.getpid())

# If to upload checkpoints to ML Repo when they are saved
truefoundry_ml_checkpoint_artifact_name = f"ckpt-{truefoundry_ml_run_name}"


if truefoundry_ml_enable_reporting:
    print(f"Checkpoints will be logged with name {truefoundry_ml_checkpoint_artifact_name}")
    get_or_create_run(
        ml_repo=truefoundry_ml_repo,
        run_name=truefoundry_ml_run_name,
        auto_end=False,
    )
    print("You can click on the above link to track metrics and checkpoints")

In [None]:
def _launch_tensorboard():
    import os
    from urllib.parse import urljoin
    from tensorboard import notebook

    tb_logs = os.path.join(os.path.abspath(output_dir), "model", "runs")
    os.makedirs(tb_logs, exist_ok=True)
    os.environ["TENSORBOARD_PROXY_URL"] = urljoin(os.getenv("NB_PREFIX", "/"), "proxy/%PORT%/")
    notebook.start(f"--logdir {tb_logs} --reload_interval 30.0 --reload_multifile True")

if not truefoundry_ml_enable_reporting:
    _launch_tensorboard()

### Start Finetuning!

In [None]:
import os
import torch

# Mixed Precision Training. We automatically select the precision based on GPU capability
is_ampere_or_newer = torch.cuda.get_device_capability(device=0) >= (8, 0)
mixed_precision = "bf16" if is_ampere_or_newer and torch.cuda.is_bf16_supported() else "fp16"

COMMAND = f"""
accelerate launch \
--mixed_precision {mixed_precision} \
--use_deepspeed \
train.py \
config-base.yaml \
--deepspeed ./deepspeed_configs/3_ds_z2_config.json \
--gradient_checkpointing unsloth \
--base_model {model_id} \
--output_dir {output_dir} \
--dataset_type {dataset_type} \
--train_data_uri {train_data_uri} \
--val_data_uri {eval_data_uri} \
--val_set_size {eval_size} \
--max_steps {max_steps} \
--sequence_len {max_sequence_length} \
--long_sequences_strategy {long_sequences_strategy} \
--train_on_inputs False \
--sample_packing {sample_packing} \
--pad_to_sequence_len True \
--num_epochs {num_epochs} \
--micro_batch_size {micro_batch_size} \
--learning_rate {learning_rate} \
--warmup_ratio 0.1 \
--gradient_accumulation_steps 4 \
--early_stopping_patience 10 \
--adapter qlora \
--lora_target_linear {lora_target_linear} \
--lora_target_modules {lora_target_modules} \
--lora_r {lora_r} \
--lora_alpha {lora_alpha} \
--lora_dropout 0.05 \
--logging_steps 5 \
--evaluation_strategy steps \
--eval_steps {eval_steps} \
--save_strategy steps \
--save_steps {save_steps} \
--seed 42 \
--truefoundry_ml_enable_reporting {truefoundry_ml_enable_reporting} \
--truefoundry_ml_repo {truefoundry_ml_repo} \
--truefoundry_ml_run_name {truefoundry_ml_run_name} \
--truefoundry_ml_checkpoint_artifact_name {truefoundry_ml_checkpoint_artifact_name} \
--truefoundry_ml_log_checkpoints {truefoundry_ml_log_checkpoints} \
--cleanup_output_dir_on_start {cleanup_output_dir_on_start} \
--resume_from_checkpoint True \
| tee train.log
"""

print(f"Command to run: {COMMAND}")

In [None]:
!{COMMAND}