# Fine-tuning with LLaMA-Factory on SageMaker

## 0. Initialize

In [None]:
# install once
# !pip install -U boto3 sagemaker awscli modelscope huggingface-hub
# restart jupyter kernel

In [None]:
import sagemaker
from sagemaker import get_execution_role

sess = sagemaker.Session()
role = get_execution_role()
sagemaker_default_bucket = sess.default_bucket()
region = sess.boto_session.region_name

## 1. Prepare dataset

You can prepare your finetune dataset in format according to:
[https://llamafactory.readthedocs.io/zh-cn/latest/getting_started/data_preparation.html](https://llamafactory.readthedocs.io/zh-cn/latest/getting_started/data_preparation.html)

Here is an example dataset `llamafactory/PubMedQA` on HuggingFace, and push to s3 bucket.

In [None]:
from huggingface_hub import snapshot_download

dataset_repo_id = "llamafactory/PubMedQA"
local_dataset_path = f"/home/ec2-user/dataset/{dataset_repo_id}"

repo_path = snapshot_download(
    repo_id=dataset_repo_id,
    repo_type="dataset",
    local_dir=local_dataset_path, 
)

s3_dataset_path = f's3://{sagemaker_default_bucket}/dataset/{dataset_repo_id}'

!aws s3 sync {local_dataset_path} {s3_dataset_path}

## 2. Prepare pretrained model

Download pretrain models and push to s3 bucket. Here using Qwen2.5 model from modelscope.

In [None]:
model_id = "Qwen/Qwen2.5-32B-Instruct"
local_model_path = f"/home/ec2-user/model/{model_id}"
s3_model_path = f's3://{sagemaker_default_bucket}/pretrained-models/{model_id}'

!modelscope download --local_dir {local_model_path} {model_id} 
!aws s3 sync {local_model_path} {s3_model_path}

## 3. Prepare training config

Prepare training script in `submit_src`, the entrypoint is `submit_src/estimator_entry.py`, training config is `.yaml`.
More about llamafactory training config: https://llamafactory.readthedocs.io/zh-cn/latest/getting_started/sft.html

In [None]:
!git clone https://github.com/hiyouga/LLaMA-Factory.git -b v0.9.1 ./submit_src/LLaMA-Factory && rm -rf ./submit_src/LLaMA-Factory/.git
!curl -L https://github.com/peak/s5cmd/releases/download/v2.2.2/s5cmd_2.2.2_Linux-64bit.tar.gz | tar -xz s5cmd && mv s5cmd ./submit_src/

In [None]:
# llamafactory_yaml = "qwen_full_sft"
llamafactory_yaml = "qwen_lora_sft"

## 4. Start training on SageMaker training job

In [None]:
from sagemaker.pytorch import PyTorch
from sagemaker.estimator import Estimator

if region.startswith("us-"):
    image_uri = f'763104351884.dkr.ecr.{region}.amazonaws.com/pytorch-training:2.3.0-gpu-py311-cu121-ubuntu20.04-sagemaker'
elif region.startswith("cn-"):
    image_uri = f'727897471807.dkr.ecr.{region}.amazonaws.com.cn/pytorch-training:2.3.0-gpu-py311-cu121-ubuntu20.04-sagemaker'
else:
    raise Exception("find image_uri on more images from https://github.com/aws/deep-learning-containers/blob/master/available_images.md")
    
# instance_type = "ml.g5.2xlarge"     # 1 * A10g (24G/GPU)
# instance_type = "ml.g5.12xlarge"    # 4 * A10g (24G/GPU)
instance_type = "ml.g5.48xlarge"    # 8 * A10g (24G/GPU)
# instance_type = "ml.p4d.24xlarge"   # 8 * A100 (40G/GPU)
# instance_type = "ml.p5.48xlarge"    # 8 * H100 (80G/GPU)
# instance_type = "ml.g6e.48xlarge"   # 8 * L40s (80G/GPU)

instance_count = 1                  # 1 or Multi-node

envs = {
    'MODEL_ID_OR_S3_PATH': f"{s3_model_path}/*",
    'MODEL_SAVE_PATH_S3': f's3://{sagemaker_default_bucket}/output-model/250110/{model_id}',
    'CONF_YAML_NAME': f'{llamafactory_yaml}.yaml',
    'DATASET_S3_PATH': f"{s3_dataset_path}/*",
}

hypers = {
}

base_job_name = f"{model_id}-{llamafactory_yaml}".replace('/','-').replace('.','-').replace('_','-')

smp_estimator = Estimator(
    role=role,
    sagemaker_session=sess,
    base_job_name=base_job_name,
    entry_point="estimator_entry.py",
    source_dir='submit_src/',
    instance_type=instance_type,
    instance_count=instance_count,
    environment=envs,
    hyperparameters=hypers,
    image_uri=image_uri,
    max_run=7200,
    keep_alive_period_in_seconds=1800,
    disable_output_compression=True,
)

job_name = sagemaker.utils.name_from_base(base_job_name, short=True)

smp_estimator.fit(job_name=job_name)

## Speed test

Training speed test on g5 instances:

|                       | Qwen2.5-32B-Instruct | Qwen2.5-32B-Instruct |
| --------------------- | -------------------- | -------------------- |
| finetune_type         | **full**             | **lora**             |
| deepspeed             | zero3-offload        | zero3-offload        |
| instance              | g5.48xlarge          | g5.48xlarge          |
| instance_num          | **2**                | **1**                |
| batch_size_per_device | 4                    | 4                    |
| cutoff_len            | 2048                 | 2048                 |
| gradient_accumulation | 1                    | 1                    |
| total_batch_size      | 64                   | 32                   |
| seconds_per_batch     | **89**               | **72**               |
| samples_per_hour      | **2588.8**           | **1600.0**           |
