<a href="https://colab.research.google.com/github/utensil/llm-playground/blob/main/notebooks/axolotl/runpod/axolotl-falcon-40b-qlora-deepspeed.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

# Finetuning falcon-40b

- Axolotl+QLoRA
- on minotaur datasets
- deepspeed ZeRO 3 8xGPU

<!-- https://jupyterlab.readthedocs.io/en/stable/user/commands.html#commands-list -->
<button data-commandLinker-command="apputils:change-theme" data-commandlinker-args='{"theme": "JupyterLab Dark"}' href="#">Dark theme</button>
<button data-commandLinker-command="filebrowser:go-to-path" data-commandlinker-args='{"path": "/workspace/llm-playground/"}' href="#">llm-playground</button>
<button data-commandLinker-command="filebrowser:go-to-path" data-commandlinker-args='{"path": "/workspace/axolotl/"}' href="#">axolotl</button>
<button data-commandLinker-command="filebrowser:go-to-path" data-commandlinker-args='{"path": "/workspace/llm-playground/notebooks/axolotl/runpod"}' href="#">Runpod notebooks</button>
<button data-commandLinker-command="filebrowser:go-to-path" data-commandlinker-args='{"path": "/workspace/axolotl/examples"}' href="#">axolotl configs</button>
<button data-commandLinker-command="filebrowser:go-to-path" data-commandlinker-args='{"path": "/workspace/llm-playground/storage"}' href="#">Storage</button>
<button data-commandLinker-command="filebrowser:go-to-path" data-commandlinker-args='{"path": "/workspace/llm-playground/axolotl-trained"}' href="#">axolotl-trained</button>
<button data-commandLinker-command="docmanager:open" data-commandlinker-args='{"path": "/workspace/axolotl/examples/falcon/config-40b-qlora.yml"}' href="#">Edit config</button>

In [None]:
!python /workspace/llm-playground/helper/storage.py utensil/axolotl-trained

In [None]:
!ls /workspace/llm-playground/axolotl-trained

In [None]:
!pip install deepspeed

In [None]:
%cd /workspace/axolotl

/workspace/axolotl


In [None]:
# Try no config
# !accelerate config default

Setting ds_accelerator to cuda (auto detect)
accelerate configuration saved at /root/.cache/huggingface/accelerate/default_config.yaml


In [None]:
%%writefile ds_config.json
{
  "zero_optimization": {
    "stage": 3,
    "offload_optimizer": {
      "device": "cpu",
      "pin_memory": true
    },
    "offload_param": {
      "device": "cpu",
      "pin_memory": true
    },
    "overlap_comm": true,
    "contiguous_gradients": true,
    "sub_group_size": 0,
    "reduce_bucket_size": "auto",
    "stage3_prefetch_bucket_size": "auto",
    "stage3_param_persistence_threshold": "auto",
    "stage3_max_live_parameters": 0,
    "stage3_max_reuse_distance": 0,
    "stage3_gather_16bit_weights_on_model_save": true
  },
  "bf16": {
    "enabled": "auto"
  },
  "fp16": {
    "enabled": "auto",
    "auto_cast": false,
    "loss_scale": 0,
    "initial_scale_power": 32,
    "loss_scale_window": 1000,
    "hysteresis": 2,
    "min_loss_scale": 1
  },
  "optimizer": {
    "type": "AdamW",
    "params": {
      "lr": "auto",
      "betas": [
        0.9,
        0.999
      ],
      "eps": 1e-8,
      "weight_decay": "auto"
    }
  },
  "gradient_accumulation_steps": "auto",
  "train_micro_batch_size_per_gpu": "auto",
  "wall_clock_breakdown": false
}

In [None]:
%%writefile scripts/ft.py
import os
from pathlib import Path
import fire
import logging
import finetune
from axolotl.utils.trainer import setup_trainer as setup_trainer_orig

logging.basicConfig(level=os.getenv("LOG_LEVEL", "INFO"))

def train_ex(
    config: Path = Path("configs/"),
    prepare_ds_only: bool = False,
    **kwargs,
):
  logging.info('train_ex before')
  finetune.train(config, prepare_ds_only, **kwargs)
  logging.info('train_ex after')

def setup_trainer_ex(cfg, train_dataset, eval_dataset, model, tokenizer):
  logging.info('setup_trainer_ex before')
  logging.info(f'cfg.some_config = {cfg.some_config}')
  trainer = setup_trainer_orig(cfg, train_dataset, eval_dataset, model, tokenizer)
  logging.info('setup_trainer_ex after')
  return trainer

finetune.setup_trainer = setup_trainer_ex

if __name__ == "__main__":
    fire.Fire(train_ex)

In [None]:
%env ACCELERATE_USE_DEEPSPEED=true

## #1

In [None]:
%cd /workspace/axolotl

In [None]:
%%writefile examples/falcon/config-40b-qlora.yml
# 1b: tiiuae/falcon-rw-1b
# 7b: tiiuae/falcon-7b
# 40b: tiiuae/falcon-40b
base_model: tiiuae/falcon-40b
base_model_config: tiiuae/falcon-40b
# required by falcon custom model code: https://huggingface.co/tiiuae/falcon-7b/tree/main
trust_remote_code: true
model_type: AutoModelForCausalLM
tokenizer_type: AutoTokenizer
load_in_8bit: false
# enable 4bit for QLoRA
load_in_4bit: true
gptq: false
strict: false

push_dataset_to_hub: utensil
hf_use_auth_token: true

datasets:
  - path: QingyiSi/Alpaca-CoT
    data_files:
      - Chain-of-Thought/formatted_cot_data/gsm8k_train.json
    type: "alpaca:chat"

dataset_prepared_path: last_run_prepared
val_set_size: 0.01
# enable QLoRA
adapter: qlora
lora_model_dir:
sequence_len: 2048
max_packed_sequence_len:

# hyperparameters from QLoRA paper Appendix B.2
# "We find hyperparameters to be largely robust across datasets"
lora_r: 64
lora_alpha: 16
# 0.1 for models up to 13B
# 0.05 for 33B and 65B models
lora_dropout: 0.05
# add LoRA modules on all linear layers of the base model
lora_target_modules:
lora_target_linear: true
lora_fan_in_fan_out:

wandb_project: falcon-qlora
wandb_watch:
wandb_run_id:
wandb_log_model:
output_dir: /workspace/llm-playground/axolotl-trained/falcon-qlora-40b-gsm8k/

# QLoRA paper Table 9
# - 16 for 7b & 13b
# - 32 for 33b, 64 for 64b
# Max size tested on A6000
# - 7b: 40
# - 40b: 4
# decrease if OOM, increase for max VRAM utilization
micro_batch_size: 1
gradient_accumulation_steps: 1
num_epochs: 3
# Optimizer for QLoRA
optimizer: paged_adamw_32bit
torchdistx_path:
lr_scheduler: cosine
# QLoRA paper Table 9
# - 2e-4 for 7b & 13b
# - 1e-4 for 33b & 64b
learning_rate: 0.0002
train_on_inputs: false
group_by_length: false
bf16: true
fp16: false
tf32: true
gradient_checkpointing: true
# stop training after this many evaluation losses have increased in a row
# https://huggingface.co/transformers/v4.2.2/_modules/transformers/trainer_callback.html#EarlyStoppingCallback
early_stopping_patience: 3
resume_from_checkpoint:
auto_resume_from_checkpoints: true
local_rank:
logging_steps: 1
xformers_attention: true
flash_attention:
gptq_groupsize:
gptq_model_v1:
warmup_steps: 10
eval_steps: 5
save_steps: 10
debug:
deepspeed:
weight_decay: 0.01
fsdp:
fsdp_config:
special_tokens:
  pad_token: "<|endoftext|>"
  bos_token: ">>ABSTRACT<<"
  eos_token: "<|endoftext|>"

In [None]:
!accelerate launch scripts/finetune.py examples/falcon/config-40b-qlora.yml --deepspeed ds_config.json

## #2


In [None]:
!cat examples/falcon/config-40b-qlora.yml

In [None]:
#%%writefile examples/falcon/config-40b-qlora.yml

In [None]:
!accelerate launch scripts/finetune.py examples/falcon/config-40b-qlora.yml --deepspeed ds_config.json

## #3

In [None]:
!cat examples/falcon/qlora.yml

In [None]:
#%%writefile examples/falcon/config-40b-qlora.yml

In [None]:
!accelerate launch scripts/finetune.py examples/falcon/config-40b-qlora.yml --deepspeed ds_config.json

## #4

In [None]:
!cat examples/falcon/qlora.yml

In [None]:
#%%writefile examples/falcon/config-40b-qlora.yml

In [None]:
!accelerate launch scripts/finetune.py examples/falcon/config-40b-qlora.yml --deepspeed ds_config.json

## Below are ad hoc cells handling issues during training

### Force release VRAM

In [None]:
# First interupt the kernel, wait a few seconds then run this to kill finetune to release VRAM
!ps aux|grep python|grep finetune|awk '{print $2}'|xargs kill

### Clean the finetuned model and all checkpoints

In [None]:
# Only run this to start over
!rm -rf ./qlora-out

### Zip the prepared dataset

In [None]:
!apt install zip
!zip -r last_run_prepared.zip -xi last_run_prepared

### Monitoring GPU

In [None]:
# Run this in a seperate terminal
!nvitop -m full

### Fix DISK FULL

In [None]:
%cd /

/


In [None]:
!du -d 2 -h|grep G

In [None]:
!du -d 2 -h /root/.local

In [None]:
!rm -rf /root/.local/share/Trash/

In [None]:
!rm -rf /root/.local/share/wandb/

In [None]:
!rm -rf /root/.cache/wandb/

### Check who is using GPU

In [None]:
!apt install lsof

In [None]:
!lsof /dev/nvidia*

### Upload checkpoints to HF

In [None]:
%cd /workspace/llm-playground/axolotl-trained/falcon-qlora-40b-gsm8k/

In [None]:
!ls -lhta |grep checkpoint-

In [None]:
!ls -lhta |grep checkpoint- | awk 'NR > 1 {print $9}'

In [None]:
# ls -lhta |grep checkpoint- | awk 'NR > 1 {print $9}' | xargs rm -rf

In [None]:
!python /workspace/llm-playground/helper/storage.py utensil/axolotl-trained -u

### Update axolotl

In [None]:
%cd /workspace/

/workspace


In [None]:
!git clone https://github.com/OpenAccess-AI-Collective/axolotl axolotl-update

In [None]:
!cp -r axolotl-update/* axolotl

In [None]:
%cd /workspace/axolotl

In [None]:
!git status

In [None]:
!pip install -e .

### A new bash without tmux etc.

In [None]:
!bash --norc --noprofile

### Clean up all checkpoints but last one

In [None]:
cd /workspace/axolotl/qlora-out/ && ls -lhta |grep checkpoint- | awk 'NR > 1 {print $9}' | xargs rm -rf