In [None]:
!pip install --upgrade sagemaker datasets

In [None]:
from sagemaker.jumpstart.model import JumpStartModel
from sagemaker.jumpstart.estimator import JumpStartEstimator
import boto3 
import pandas as pd 
import sagemaker 
from sagemaker.workflow.pipeline_context import PipelineSession 
from sagemaker.workflow.steps import TrainingStep
s3_client = boto3.resource('s3') 
pipeline_name = f"Llama2-Mlops" 
sagemaker_session = sagemaker.session.Session() 
region = sagemaker_session.boto_region_name 
role = sagemaker.get_execution_role() 
pipeline_session = PipelineSession() 
default_bucket = "BUCKET"#sagemaker_session.default_bucket() 


from sagemaker.workflow.parameters import ( 
 ParameterInteger, 
 ParameterString, 
 ParameterFloat,
ParameterBoolean) 

training_instance_count = ParameterInteger(name="TrainingInstanceCount", default_value=1)
endpoint_instance_type = ParameterString( name="EndpointInstanceType", default_value="ml.g5.2xlarge") 
training_instance_type = ParameterString( name="TrainingInstanceType", default_value="ml.g5.12xlarge") 
disable_output_compression = ParameterBoolean( name="OutputCompression", default_value=False) 
intstruction_tune = ParameterString( name="IntsructionTune", default_value="False",enum_values=["False","True"]) 
chat_dataset = ParameterString( name="ChatDataset", default_value="False",enum_values=["False","True"]) 
epoch = ParameterString( name="Epoch", default_value="1") 
quantization = ParameterString( name="Int8Quant", default_value="False",enum_values=["False","True"]) 
fsdp = ParameterString( name="FSDP", default_value="True",enum_values=["False","True"])  
train_batch_size = ParameterString( name="TrainBatchSize", default_value="4") 
eval_batch_size = ParameterString( name="EvalBatchSize", default_value="1") 
learning_rate = ParameterString( name="LearningRate", default_value="0.0001") 
lora_r = ParameterString( name="LoraR", default_value="8") 
lora_alpha = ParameterString( name="LoraAlpha", default_value="32") 
lora_dropout = ParameterString( name="LoraDropout", default_value="0.05") 
train_data_s3_path=ParameterString( name="TrainDataS3Path", default_value=None)
val_data_s3_path=ParameterString( name="ValidationDataS3Path", default_value=None)
stereotype_data_s3_path=ParameterString( name="StereotypeDataS3Path", default_value=None)
stereotype_dataset_name=ParameterString( name="StereotypeDataName", default_value=None)
model_approval_status = ParameterString( name="ModelApprovalStatus", default_value="PendingManualApproval")
output_bucket = ParameterString( name="Bucket", default_value="calibucket-aws")
model_package_group_name = ParameterString( name="ModelPackageGroupName", default_value=None)
jumpstart_model_id = ParameterString( name="JumpStartModelID", default_value="meta-textgeneration-llama-2-7b-f")
jumpstart_model_version = ParameterString( name="JumpStartModelVersion", default_value="*")
model_card_name = ParameterString( name="ModelCardName", default_value=None)

In [None]:
model_id = "meta-textgeneration-llama-2-7b-f"
model_version = "3.*"

In [None]:
import boto3
estimator = JumpStartEstimator(
                               model_id=model_id, 
                               model_version=model_version,
                               environment={"accept_eula": "true",
                                            "SageMakerGatedModelS3Uri": "s3://jumpstart-private-cache-prod-us-east-1/meta-training/g5/v1.0.0/train-meta-textgeneration-llama-2-7b-f.tar.gz"
                                           },
                                image_uri="763104351884.dkr.ecr.us-east-1.amazonaws.com/huggingface-pytorch-training:2.0.0-transformers4.28.1-gpu-py310-cu118-ubuntu20.04",
                               instance_type = training_instance_type,
                               instance_count=training_instance_count,
                              sagemaker_session=PipelineSession(),
                               # base_job_name="llama2-finance-tuning",
                               keep_alive_period_in_seconds=3600,
                               disable_output_compression=False,
                               enable_sagemaker_metrics=True
                              )
estimator.set_hyperparameters(instruction_tuned=intstruction_tune, 
                              epoch=epoch,
                              chat_dataset=chat_dataset,
                              int8_quantization=quantization,
                              learning_rate=learning_rate,
                              lora_alpha=lora_alpha,
                              lora_dropout=lora_dropout,
                              lora_r=lora_r,
                              per_device_eval_batch_size=train_batch_size,
                              per_device_train_batch_size=eval_batch_size,
                              enable_fsdp=fsdp                           
                             )
train_args=estimator.fit({"training": train_data_s3_path,#f"s3://jumpstart-cache-prod-us-east-1/training-datasets/sec_data/train/",
                         "validation":val_data_s3_path #"s3://jumpstart-cache-prod-us-east-1/training-datasets/sec_data/validation/"
                         }, 
                         wait=False)

In [None]:
step_train = TrainingStep(
    name="Llama2Tuning",
    step_args=train_args,
)

In [None]:
import sagemaker
from sagemaker.huggingface.model import HuggingFaceModel
from sagemaker.huggingface import get_huggingface_llm_image_uri
llm_image = get_huggingface_llm_image_uri("huggingface", version="1.1.0")
# create Hugging Face Model Class
huggingface_model = HuggingFaceModel(
   model_data=step_train.properties.ModelArtifacts.S3ModelArtifacts,
   role=role,  
    image_uri=llm_image,   
   sagemaker_session=pipeline_session,
   env= {"ENDPOINT_SERVER_TIMEOUT": "3600",
            "HF_MODEL_ID": "/opt/ml/model",
            "MAX_INPUT_LENGTH": "4095",
            "MAX_TOTAL_TOKENS": "4096",
            "MODEL_CACHE_ROOT": "/opt/ml/model",
            "SAGEMAKER_ENV": "1",
            "SAGEMAKER_MODEL_SERVER_WORKERS": "1",
            "SAGEMAKER_PROGRAM": "inference.py",
            "SM_NUM_GPUS": "1"}     
)
from sagemaker.inputs import CreateModelInput
from sagemaker.workflow.model_step import ModelStep

step_create_model = ModelStep(
    name="Llama2FinanaceTunedModel",
    step_args=huggingface_model.create(instance_type=endpoint_instance_type)
)

In [None]:
from sagemaker import Model
from sagemaker.workflow.model_step import ModelStep

register_args = huggingface_model.register(
    content_types=["aplication/json"],
    response_types=["application/json"],
    inference_instances=[endpoint_instance_type],
    model_package_group_name=model_package_group_name,
    approval_status=model_approval_status,
)
step_register = ModelStep(name="Llama2FinetunedRegisterModel", step_args=register_args)

In [None]:
%%writefile lambda_deployer.py
import json
import boto3
import time
def lambda_handler(event, context):
    sm_client=boto3.client("sagemaker")
    # The name of the model created in the Pipeline CreateModelStep
    model_name = event["model_name"]
    endpoint_config_name = event["endpoint_config_name"]
    endpoint_name = event["endpoint_name"]
    role = event["role"] 


    create_endpoint_config_response = sm_client.create_endpoint_config(
        EndpointConfigName=endpoint_config_name,
        ProductionVariants=[
            {
                "InstanceType": "ml.g5.2xlarge",
                "InitialVariantWeight": 1,
                "InitialInstanceCount": 1,
                "ModelName": model_name,
                "VariantName": "AllTraffic",
            }
        ],
        
    )
    time.sleep(120)
    create_endpoint_response = sm_client.create_endpoint(EndpointName=endpoint_name, EndpointConfigName=endpoint_config_name)
    return {
        "statusCode": 200,
        "body": json.dumps("Created Endpoint"),
        "other_key": "example_value",
    }

In [None]:
# Use the current time to define unique names for the resources created
import time
from sagemaker.workflow.lambda_step import (
    LambdaStep,
    LambdaOutput,
    LambdaOutputTypeEnum,
)
from sagemaker.lambda_helper import Lambda


current_time = time.strftime("%m-%d-%H-%M-%S", time.localtime())

model_name = step_create_model.properties.ModelName
endpoint_config_name = "Llama2Tuned" + current_time
endpoint_name = "Llama2Tuned" + current_time
function_name = "sagemaker-deploy" + current_time
lambda_role="arn:aws:iam::259508681668:role/FullLambdaJobAccess"
# Lambda helper class can be used to create the Lambda function
func = Lambda( 
    function_name=function_name,
    execution_role_arn=lambda_role,
    script="lambda_deployer.py",
    handler="lambda_deployer.lambda_handler",
    timeout=600,
    memory_size=10240,
    runtime='python3.10'
)

# The dictionary retured by the Lambda function is captured by LambdaOutput, each key in the dictionary corresponds to a
# LambdaOutput

output_param_1 = LambdaOutput(output_name="statusCode", output_type=LambdaOutputTypeEnum.String)
output_param_2 = LambdaOutput(output_name="body", output_type=LambdaOutputTypeEnum.String)
output_param_3 = LambdaOutput(output_name="other_key", output_type=LambdaOutputTypeEnum.String)

# The inputs provided to the Lambda function can be retrieved via the `event` object within the `lambda_handler` function
# in the Lambda
step_deploy_lambda = LambdaStep(
    name="LambdaStepHuggingFaceDeploy",
    lambda_func=func,
    inputs={
        "model_name": model_name,       
        "endpoint_config_name": endpoint_config_name,
        "endpoint_name": endpoint_name,        
        "role": role,  
    },
    outputs=[output_param_1, output_param_2, output_param_3] )

In [None]:
%%writefile llm_eval.py

def llama_evaluation(data_path, endpoint_name):
    from fmeval.data_loaders.data_config import DataConfig
    from fmeval.model_runners.sm_jumpstart_model_runner import JumpStartModelRunner
    from fmeval.constants import MIME_TYPE_JSONLINES
    from fmeval.eval_algorithms.prompt_stereotyping import PromptStereotyping
    model_version = "3.*"
    model_id = "meta-textgeneration-llama-2-7b-f"

    config = DataConfig(
        dataset_name="crows-pairs_sample",
        dataset_uri=data_path,
        dataset_mime_type=MIME_TYPE_JSONLINES,
        sent_more_input_location="sent_more",
        sent_less_input_location="sent_less",
        category_location="bias_type",
    )

    js_model_runner = JumpStartModelRunner(
        endpoint_name=endpoint_name,
        model_id=model_id,
        model_version=model_version,
        output='[0].generated_text',
        log_probability='[0].details.prefill[*].logprob',
        content_template='{"inputs": $prompt, "parameters": {"top_p": 0.9, "temperature": 0.85, "max_new_tokens": 1024, "return_full_text":false,"decoder_input_details": true,"details": true }}',

        custom_attributes="accept_eula=true",
    )

    
    
    eval_algo = PromptStereotyping()
    eval_output = eval_algo.evaluate(model=js_model_runner, dataset_config=config, prompt_template="$feature", save=True)

    return eval_output

import os
import subprocess
import json
subprocess.run(["pip", "install", "pip", "-U"])
subprocess.run(["pip", "install", "sagemaker"])
subprocess.run(["pip", "install", "fmeval"])
os.environ['AWS_DEFAULT_REGION'] = 'us-east-1'
import boto3
sage=boto3.client("sagemaker")


import argparse
parser = argparse.ArgumentParser()
parser.add_argument("--dataset-name", type=str)
parser.add_argument("--endpoint-name", type=str)
# parser.add_argument("--train", type=str, default=os.environ.get("SM_CHANNEL_TRAIN"))
args, _ = parser.parse_known_args()

eval_dir = "stereotyping"
curr_dir = "opt/ml/output"#args.train
# eval_results_path = os.path.join(curr_dir, eval_dir) + "/"
os.environ["EVAL_RESULTS_PATH"] = curr_dir
# if os.path.exists(eval_results_path):
#     print(f"Directory '{eval_results_path}' exists.")
# else:
#     os.mkdir(eval_results_path)
    
    


endpoint_name=args.endpoint_name
dataset_name=args.dataset_name

status=sage.describe_endpoint(
    EndpointName=endpoint_name
)['EndpointStatus']
while status != 'InService':
    status=sage.describe_endpoint(
    EndpointName=endpoint_name
    )['EndpointStatus']

data_path=f"/opt/ml/input/data/training/{dataset_name}"
result=llama_evaluation(data_path, endpoint_name)
with open(f"/opt/ml/output/data/eval_metrics.json","w") as f:
    json.dump(result,f, default=vars, indent=4)


In [None]:
from sagemaker.processing import ScriptProcessor, ProcessingInput, ProcessingOutput
from sagemaker.workflow.steps import ProcessingStep
from sagemaker.pytorch.estimator import PyTorch


eval_processor = PyTorch(
    role=role, instance_type="ml.m5.xlarge", instance_count=1,
    entry_point="llm_eval.py",  
    image_uri=img,
    hyperparameters={
        "dataset-name":stereotype_dataset_name,
                    "endpoint-name": endpoint_name},
    keep_alive_period_in_seconds=3600,
    sagemaker_session=PipelineSession(),
)

process_args=eval_processor.fit({"training":stereotype_data_s3_path})
step_process = TrainingStep(
    name="Llama2EvaluateStereotype",
    step_args=process_args,
    depends_on = [step_deploy_lambda]
)

In [None]:
%%writefile lambda_model_card.py

"""
This Lambda function creates a model card for the fine-tuned model
"""

import json
import boto3

def _create_model_card(file,event):
    sm_client = boto3.client("sagemaker")
    file['model_overview']['model_name']=event['model_name']
    file['model_overview']['model_id']=event["model_arn"]
    file['model_overview']['model_artifact']=[event['model_artifact']]
    file['model_overview']['problem_type']=""
    file['model_overview']['algorithm_type']="NeuralNetwork"
    file['model_overview']['model_description']=""
    file['model_overview']['model_creator']=""
    file['model_overview']['model_owner']=""
    file['model_overview']['inference_environment']['container_image']=[event['model_image']]
    file['business_details']['business_problem']=""
    file['business_details']['business_stakeholders']=""
    file['business_details']['line_of_business']=""
    file['intended_uses']['intended_uses']=""
    file['intended_uses']['explanations_for_risk_rating']=""
    file['intended_uses']['factors_affecting_model_efficiency']="Data Quality"
    file['intended_uses']['risk_rating']="Low"
    file['training_details']['training_job_details']['training_arn']=event['training_job_arn']
    file['training_details']['training_job_details']['training_datasets']=event["input_data"]
    file['training_details']['training_job_details']['training_environment']['container_image']=[event['training_image_arn']]
    file['training_details']['training_job_details']['hyper_parameters']=event["hyper_param"]
    file['training_details']['training_job_details']['training_metrics']=event["metrics"]
    file['evaluation_details']=[{
 'datasets': [event["llm_metric_output"]],
 'name': event["llm_metric_name"],
 'metric_groups': [{'name': event["llm_metric_name"],
   'metric_data': event["stereotype"]}],
 'evaluation_observation': 'NA'}]


    file=json.loads(str(file).replace("'",'"'))    
    model_card_name=event['model_card']
    
    model_card_list=sm_client.list_model_cards(
        NameContains=model_card_name,    
        SortBy='CreationTime',
        SortOrder='Descending'
    )
    if [x['ModelCardName'] for x in model_card_list['ModelCardSummaries'] if x['ModelCardName'] == model_card_name]:
        sm_client.update_model_card(
            ModelCardName=model_card_name,
            Content=json.dumps(file),   
            ModelCardStatus='PendingReview'
        )
    else:
        
        sm_client.create_model_card(
            ModelCardName=model_card_name,    
            Content=json.dumps(file),
            ModelCardStatus='PendingReview',    
        )
    
    

def lambda_handler(event, context):
    """ """
    sm_client = boto3.client("sagemaker")
    print(event)
    # Getting the training job details to retrieve metrics information
    training_job_details=sm_client.describe_training_job(TrainingJobName=event["training_job_name"])
    event["hyper_param"]={key: value for key, value in training_job_details['HyperParameters'].items() if "sagemaker" not in key}
    event["hyper_param"]=[{"name": key, "value": value.strip('"')} for key, value in event["hyper_param"].items()]
    event["input_data"]=[f"{x['ChannelName']} -> {x['DataSource']['S3DataSource']['S3Uri']}" for x in training_job_details['InputDataConfig']]
    
    trial_c_list=sm_client.list_trial_components(
        SortBy='CreationTime',
        SortOrder='Descending',
    )['TrialComponentSummaries']
    trial_c_name=[x['TrialComponentName'] for x in trial_c_list if event['training_job_name'] in x['TrialComponentName']]
    if not trial_c_name:
        time.sleep(15)
        trial_c_name=[x['TrialComponentName'] for x in trial_c_list if event['training_job_name'] in x['TrialComponentName']]
    else: trial_c_name=trial_c_name[0]
    metrics=sm_client.describe_trial_component(
        TrialComponentName=trial_c_name)['Metrics']
    event["metrics"]=[{"name": item['MetricName'], "value": item['Min']} for item in metrics]
    
    # Getting the clarify job details to retrieve metrics information
    eval_job_output=sm_client.describe_training_job(
            TrainingJobName=event['eval_job_name']
        )['OutputDataConfig']['S3OutputPath']+event['eval_job_name']+"/output/output.tar.gz"
    s3client=boto3.client("s3")
    import io
    import tarfile
    bucket=eval_job_output.split("//")[-1].split('/',1)[0]
    key=eval_job_output.split("//")[-1].split('/',1)[-1]
    
    s3_object = s3client.get_object(Bucket=bucket, Key=key)
    wholefile = s3_object['Body'].read()
    fileobj = io.BytesIO(wholefile)
    tarf = tarfile.open(fileobj=fileobj)
    names = tarf.getnames()
    json_file_content = tarf.extractfile(names[0]).read()
    json_file_content=json.loads(json_file_content.decode('utf-8'))[0]['category_scores']
    
    stereotypes_metrics=[{"name": item['name'], "type":"number","value": item['scores'][0]['value']} for item in json_file_content]
    event["stereotype"]=stereotypes_metrics
    event["llm_metric_output"]=eval_job_output
    event["llm_metric_name"]=json_file_content[0]['scores'][0]['name']
    
    # Model card skeleton
    model_card_template={'model_overview': {'model_name': '',
  'model_id': '',
  'model_artifact': [],
  'model_version': 1,
  'problem_type': '',
  'algorithm_type': '',
  'model_description': '',
  'model_creator': '',
  'model_owner': '',
  'inference_environment': {'container_image': []}},
 'business_details': {'business_problem': '',
  'business_stakeholders': '',
  'line_of_business': ''},
 'intended_uses': {'intended_uses': '',
  'explanations_for_risk_rating': '',
  'factors_affecting_model_efficiency': '',
  'risk_rating': ''},
 'training_details': {'objective_function': {'function': {'function': 'Maximize',
    'facet': 'Accuracy'}},
  'training_job_details': {'training_arn': '',
   'training_datasets': [],
   'training_environment': {'container_image': []},
   'hyper_parameters': [],
   
   'user_provided_hyper_parameters': []}},
    'evaluation_details': [],     
                        }
    
    model_card_template=json.loads(str(model_card_template).replace("'",'"'))
    _create_model_card(model_card_template,event)

    return {
        "statusCode": 200,
        "body": json.dumps("Created Model Card!"),
        "other_key": "example_value",
    }

In [None]:
# Use the current time to define unique names for the resources created
import time
from sagemaker.workflow.lambda_step import (
    LambdaStep,
    LambdaOutput,
    LambdaOutputTypeEnum,
)
from sagemaker.lambda_helper import Lambda
function_name = "sagemaker-model-card" + current_time

current_time = time.strftime("%m-%d-%H-%M-%S", time.localtime())


# Lambda helper class can be used to create the Lambda function
func = Lambda( 
    function_name=function_name,
    execution_role_arn=lambda_role,
    script="lambda_model_card.py",
    handler="lambda_model_card.lambda_handler",
    timeout=600,
    memory_size=10240,
    runtime='python3.10'
)

# The dictionary retured by the Lambda function is captured by LambdaOutput, each key in the dictionary corresponds to a
# LambdaOutput

output_param_1 = LambdaOutput(output_name="statusCode", output_type=LambdaOutputTypeEnum.String)
output_param_2 = LambdaOutput(output_name="body", output_type=LambdaOutputTypeEnum.String)
output_param_3 = LambdaOutput(output_name="other_key", output_type=LambdaOutputTypeEnum.String)

# The inputs provided to the Lambda function can be retrieved via the `event` object within the `lambda_handler` function
# in the Lambda
step_modelcard_lambda = LambdaStep(
    name="LambdaStepModelCard",
    lambda_func=func,
    inputs={
        "model_name": model_name,
        "model_arn":step_create_model.properties.ModelArn,
        "model_artifact":step_train.properties.ModelArtifacts.S3ModelArtifacts,
        "model_image":step_create_model.properties.PrimaryContainer.Image,     
        "model_package_arn": step_register.properties.ModelPackageArn,
        "model_group_name":step_register.properties.ModelPackageGroupName,
        "model_package_version":step_register.properties.ModelPackageVersion,
        "model_approval_status":step_register.properties.ModelApprovalStatus,
        "role": role,  
        "training_job_name":step_train.properties.TrainingJobName,
        "training_job_arn":step_train.properties.TrainingJobArn,
        "training_image_arn":step_train.properties.AlgorithmSpecification.TrainingImage,
        "train_time":step_train.properties.BillableTimeInSeconds,
        "instance_type":step_train.properties.ResourceConfig.InstanceType,
        "instance_count":step_train.properties.ResourceConfig.InstanceCount,        
        "model_card":model_card_name     ,    
        "eval_job_name": step_process.properties.TrainingJobName,
    },
    outputs=[output_param_1, output_param_2, output_param_3] )

In [None]:
from sagemaker.workflow.pipeline import Pipeline

pipeline_name = f"LLama2TunePipeline"
pipeline = Pipeline(
    name=pipeline_name,  
    parameters=[
        training_instance_count,
        endpoint_instance_type,
        training_instance_type,
        disable_output_compression,
        intstruction_tune,
        chat_dataset,
        epoch,
        quantization,
        fsdp,
        train_batch_size,
        eval_batch_size,
        learning_rate,
        lora_r,
        lora_alpha,
        lora_dropout,
        train_data_s3_path,
        val_data_s3_path,
        model_approval_status,
        output_bucket,
        model_package_group_name,
        jumpstart_model_id,
        jumpstart_model_version,
        model_card_name,
        stereotype_data_s3_path,
        stereotype_dataset_name
    ],
    steps=[step_train,step_create_model,step_register, step_deploy_lambda, step_process,step_modelcard_lambda],
)

In [None]:
import json
definition = json.loads(pipeline.definition())
definition

In [None]:
pipeline.upsert(role_arn=role)

In [None]:
!aws s3 cp crows-pairs_sample.jsonl s3://BUCKET/pipeline_trigger/ ## upload stereotype dataset to s3

In [None]:
execution = pipeline.start( parameters=dict(
        ModelPackageGroupName="LLama2-7b-FinanceTuned",
        ModelCardName="LLama2-7b-FinanceTuned",
    TrainDataS3Path="s3://jumpstart-cache-prod-us-east-1/training-datasets/sec_data/train/",
    ValidationDataS3Path="s3://jumpstart-cache-prod-us-east-1/training-datasets/sec_data/validation/",
    TrainingInstanceType="ml.g5.12xlarge",
    Epoch="1",
    ChatDataset="False",
    StereotypeDataS3Path="s3://BUCKET/crows-pairs_sample.jsonl",  ## Get the dataset from https://github.com/aws/fmeval/blob/main/examples/crows-pairs_sample.jsonl
    StereotypeDataName="crows-pairs_sample.jsonl"
    ))

In [None]:
trigger_config={
    "ModelPackageGroupName":"LLama2FinanceTuned",
    "ModelCardName":"LLama2-7b-FinanceAdapted",
    "TrainDataS3Path":"s3://jumpstart-cache-prod-us-east-1/training-datasets/sec_data/train/",
    "ValidationDataS3Path":"s3://jumpstart-cache-prod-us-east-1/training-datasets/sec_data/validation/",
    "TrainingInstanceType":"ml.g5.12xlarge",
    "Epoch":"1",
    "ChatDataset":"False",
    "StereotypeDataS3Path":"s3://BUCKET/crows-pairs_sample.jsonl", ## Get the dataset from https://github.com/aws/fmeval/blob/main/examples/crows-pairs_sample.jsonl
    "StereotypeDataName":"crows-pairs_sample.jsonl"
    }
with open("config.json","w") as f:
    json.dump(trigger_config,f)

In [None]:
!aws s3 cp config.json s3://BUCKET/pipeline_trigger/ ## create a lambda function which is triggered by an s3 bucket that has the function to execute a pipeline with the same pipeline name as above