# Fine-tune llama using DeepSpeed on Amazon SageMaker

In [None]:
import sagemaker
from sagemaker import get_execution_role
import boto3

sess = sagemaker.Session()
sagemaker_session_bucket = sess.default_bucket()

role = (
    get_execution_role()
)  
print(f"SageMaker Execution Role:{role}")

sess = sagemaker.Session(default_bucket=sagemaker_session_bucket)

print(f"sagemaker role arn: {role}")
print(f"sagemaker bucket: {sess.default_bucket()}")
print(f"sagemaker session region: {sess.boto_region_name}")


In [None]:
import time
from sagemaker.huggingface import HuggingFace

# define Training Job Name 
job_name = f'huggingface-deepspeed-{time.strftime("%Y-%m-%d-%H-%M-%S", time.localtime())}'

#define the model s3 path which will store your trained model asset
#Note: you should use your real s3 path to configure target model_s3_path
target_model_s3_path=f's3://{sess.default_bucket()}/llama-deepspeed-finetuned-052116/model/'

#define the s3 path of source model before training.  
#Note: Please add the wildcard character '*' in the following path, otherwise error will happen.
source_model_s3_path = f's3://{sess.default_bucket()}/llama/pretrained/7B/model/*'

environment = {'CUDA_LAUNCH_BLOCKING': '1',
               'NODE_NUMBER':'2',
               'SOURCE_MODEL_BEFORE_TRAINING_S3_PATH': source_model_s3_path,
               'TARGET_MODEL_AFTER_TRAINING_S3_PATH': target_model_s3_path}

# create the Estimator
huggingface_estimator = HuggingFace(
    entry_point          = 'start.py',        # deepspeed launcher script
    source_dir           = '.',               # directory which includes all the files needed for training
    instance_type        = 'ml.p4d.24xlarge', # instances type used for the training job
    instance_count       = 2,                 # the number of instances used for training
    base_job_name        = job_name,          # the name of the training job
    role                 = role, # Iam role used in training job to access AWS ressources, e.g. S3
    #volume_size          = 600,               # the size of the EBS volume in GB
    transformers_version = '4.17',            # the transformers version used in the training job
    pytorch_version      = '1.10',            # the pytorch_version version used in the training job
    py_version           = 'py38',            # the python version used in the training job
    environment=environment,
    debugger_hook_config=False
)

In [None]:
# define a data input dictonary with our uploaded s3 uris
train_input_path = 's3://your_bucket/data/train'
test_input_path = 's3://your_bucket/data/test'

data = {
    'train': train_input_path,
    'test': test_input_path
}

# starting the train job with our uploaded datasets as input
huggingface_estimator.fit(data, wait=True)