# Deployment and Monitor

This notebook deploy a model trained in pipeline, and monitor it.

## Update Model Package Approval Status

We can approve the model using the SageMaker Studio UI or programmatically as shown below.

In [2]:
from botocore.exceptions import ClientError

import os
import sagemaker
import logging
import boto3
import sagemaker
import pandas as pd

sess = sagemaker.Session()
bucket = sess.default_bucket()
role = sagemaker.get_execution_role()
region = boto3.Session().region_name

sm = boto3.Session().client(service_name="sagemaker", region_name=region)

In [3]:
pipeline_name = 'qa-pipeline-16327098691632709869'

### Retrive Model From Pipelines

In [4]:
import time
from pprint import pprint

executions_response = sm.list_pipeline_executions(PipelineName=pipeline_name)["PipelineExecutionSummaries"]
pipeline_execution_status = executions_response[0]["PipelineExecutionStatus"]
print(pipeline_execution_status)

while pipeline_execution_status == "Executing":
    try:
        executions_response = sm.list_pipeline_executions(PipelineName=pipeline_name)["PipelineExecutionSummaries"]
        pipeline_execution_status = executions_response[0]["PipelineExecutionStatus"]
    #        print('Executions for our pipeline...')
    #        print(pipeline_execution_status)
    except Exception as e:
        print("Please wait...")
        time.sleep(30)

pprint(executions_response)

Succeeded
[{'PipelineExecutionArn': 'arn:aws:sagemaker:us-east-1:093729152554:pipeline/qa-pipeline-16327098691632709869/execution/jy1d2p7zekky',
  'PipelineExecutionDisplayName': 'slot-val-f1-94',
  'PipelineExecutionStatus': 'Succeeded',
  'StartTime': datetime.datetime(2021, 9, 27, 3, 2, 2, 378000, tzinfo=tzlocal())},
 {'PipelineExecutionArn': 'arn:aws:sagemaker:us-east-1:093729152554:pipeline/qa-pipeline-16327098691632709869/execution/e1b1hr0ran8z',
  'PipelineExecutionDisplayName': 'execution-1632710043206',
  'PipelineExecutionStatus': 'Stopped',
  'StartTime': datetime.datetime(2021, 9, 27, 2, 34, 3, 47000, tzinfo=tzlocal())},
 {'PipelineExecutionArn': 'arn:aws:sagemaker:us-east-1:093729152554:pipeline/qa-pipeline-16327098691632709869/execution/yk84zwisygan',
  'PipelineExecutionDisplayName': 'execution-1632709950120',
  'PipelineExecutionStatus': 'Succeeded',
  'StartTime': datetime.datetime(2021, 9, 27, 2, 32, 29, 997000, tzinfo=tzlocal())}]


### List Execution Steps

In [5]:
pipeline_execution_status = executions_response[0]["PipelineExecutionStatus"]
print(pipeline_execution_status)

Succeeded


In [6]:
pipeline_execution_arn = executions_response[0]["PipelineExecutionArn"]
print(pipeline_execution_arn)

arn:aws:sagemaker:us-east-1:093729152554:pipeline/qa-pipeline-16327098691632709869/execution/jy1d2p7zekky


In [7]:
from pprint import pprint

steps = sm.list_pipeline_execution_steps(PipelineExecutionArn=pipeline_execution_arn)

pprint(steps)

{'PipelineExecutionSteps': [{'EndTime': datetime.datetime(2021, 9, 27, 3, 25, 13, 164000, tzinfo=tzlocal()),
                             'Metadata': {'RegisterModel': {'Arn': 'arn:aws:sagemaker:us-east-1:093729152554:model-package/qamodelpackagegroup/8'}},
                             'StartTime': datetime.datetime(2021, 9, 27, 3, 25, 12, 337000, tzinfo=tzlocal()),
                             'StepName': 'QARegisterModel',
                             'StepStatus': 'Succeeded'},
                            {'EndTime': datetime.datetime(2021, 9, 27, 3, 25, 13, 64000, tzinfo=tzlocal()),
                             'Metadata': {'Model': {'Arn': 'arn:aws:sagemaker:us-east-1:093729152554:model/pipelines-jy1d2p7zekky-createqamodel-soa7ikpht4'}},
                             'StartTime': datetime.datetime(2021, 9, 27, 3, 25, 12, 267000, tzinfo=tzlocal()),
                             'StepName': 'CreateQAModel',
                             'StepStatus': 'Succeeded'},
                     

### View Registered Model and Update Model Approval Status

In [8]:
for execution_step in steps["PipelineExecutionSteps"]:
    if execution_step["StepName"] == "QARegisterModel":
        model_package_arn = execution_step["Metadata"]["RegisterModel"]["Arn"]
        break
print(model_package_arn)

arn:aws:sagemaker:us-east-1:093729152554:model-package/qamodelpackagegroup/8


In [15]:
model_package_update_response = sm.update_model_package(
    ModelPackageArn=model_package_arn,
    ModelApprovalStatus="Approved",  # Other options are Rejected and PendingManualApproval
)

### View Created Model

In [9]:
for execution_step in steps["PipelineExecutionSteps"]:
    if execution_step["StepName"] == "CreateQAModel":
        model_arn = execution_step["Metadata"]["Model"]["Arn"]
        break
print(model_arn)

created_model_name = model_arn.split("/")[-1]
print('created_model_name:', created_model_name)

arn:aws:sagemaker:us-east-1:093729152554:model/pipelines-jy1d2p7zekky-createqamodel-soa7ikpht4
created_model_name: pipelines-jy1d2p7zekky-createqamodel-soa7ikpht4


## Create Model Endpoint from Model Registry and Configure It to Capture Requests

### Create model from registry

More details here: https://docs.aws.amazon.com/sagemaker/latest/dg/model-registry-deploy.html

In [17]:
import time

timestamp = int(time.time())

model_from_registry_name = "qa-model-from-registry-{}".format(timestamp)
print("Model from registry name : {}".format(model_from_registry_name))

model_registry_package_container = {
    "ModelPackageName": model_package_arn,
}

Model from registry name : qa-model-from-registry-1632713407


In [18]:
from pprint import pprint

create_model_from_registry_respose = sm.create_model(
    ModelName=model_from_registry_name, ExecutionRoleArn=role, PrimaryContainer=model_registry_package_container
)
pprint(create_model_from_registry_respose)

{'ModelArn': 'arn:aws:sagemaker:us-east-1:093729152554:model/qa-model-from-registry-1632713407',
 'ResponseMetadata': {'HTTPHeaders': {'content-length': '95',
                                      'content-type': 'application/x-amz-json-1.1',
                                      'date': 'Mon, 27 Sep 2021 03:30:09 GMT',
                                      'x-amzn-requestid': '8b88949f-93c7-4917-8585-527be5e99b10'},
                      'HTTPStatusCode': 200,
                      'RequestId': '8b88949f-93c7-4917-8585-527be5e99b10',
                      'RetryAttempts': 0}}


In [19]:
model_from_registry_arn = create_model_from_registry_respose["ModelArn"]
model_from_registry_arn

'arn:aws:sagemaker:us-east-1:093729152554:model/qa-model-from-registry-1632713407'

### Configure Endpoint to Capture Data from Requests and Responses

Check API for [create_endpoint_config](https://boto3.amazonaws.com/v1/documentation/api/latest/reference/services/sagemaker.html#SageMaker.Client.create_endpoint_config)

In [22]:
data_capture_bucket = 'sm-nlp-data'
data_capture_prefix = 'inference/'

Creates an endpoint configuration that Amazon SageMaker hosting services uses to deploy models.

In [21]:
endpoint_config_name = "qa-model-from-registry-epc-{}".format(timestamp)
print(endpoint_config_name)

create_endpoint_config_response = sm.create_endpoint_config(
    EndpointConfigName=endpoint_config_name,
    ProductionVariants=[
        {
            "InstanceType": "ml.m5.4xlarge",
            "InitialVariantWeight": 1,
            "InitialInstanceCount": 1,
            "ModelName": created_model_name,
            "VariantName": "AllTraffic",
        }
    ],
    DataCaptureConfig={
        'EnableCapture': True,
        'InitialSamplingPercentage': 100,
        'DestinationS3Uri': f"s3://{data_capture_bucket}/{data_capture_prefix}",
        'CaptureOptions': [
            {
                'CaptureMode': 'Input'
            },
            {
                'CaptureMode': 'Output'
            },
        ]
    }
)

qa-model-from-registry-epc-1632713407


delete an existing config with the following command:

In [36]:
# !aws sagemaker delete-endpoint-config --endpoint-config-name $endpoint_config_name

In [17]:
pipeline_endpoint_name = 'qa-model-from-registry-ep-1632713407'

In [18]:
print("EndpointName={}".format(pipeline_endpoint_name))

try:
    create_endpoint_response = sm.create_endpoint(
        EndpointName=pipeline_endpoint_name, EndpointConfigName=endpoint_config_name
    )
except ClientError as e:
    print(f"Endpoint with name {pipeline_endpoint_name} already exists, retrieving it ...")
    create_endpoint_response = sm.describe_endpoint(
        EndpointName=pipeline_endpoint_name
    )
print(create_endpoint_response["EndpointArn"])

EndpointName=qa-model-from-registry-ep-1632713407
Endpoint with name qa-model-from-registry-ep-1632713407 already exists, retrieving it ...
arn:aws:sagemaker:us-east-1:093729152554:endpoint/qa-model-from-registry-ep-1632713407


In [19]:
from IPython.core.display import display, HTML

display(
    HTML(
        '<b>Review <a target="blank" href="https://console.aws.amazon.com/sagemaker/home?region={}#/endpoints/{}">SageMaker REST Endpoint</a></b>'.format(
            region, pipeline_endpoint_name
        )
    )
)

In [20]:
%%time

waiter = sm.get_waiter("endpoint_in_service")
waiter.wait(EndpointName=pipeline_endpoint_name)

CPU times: user 16.9 ms, sys: 843 µs, total: 17.8 ms
Wall time: 135 ms


### List All Artifacts

In [24]:
import time
from sagemaker.lineage.visualizer import LineageTableVisualizer

viz = LineageTableVisualizer(sagemaker.session.Session())

for execution_step in reversed(steps["PipelineExecutionSteps"]):
    print(execution_step)
    # We are doing this because there appears to be a bug of this LineageTableVisualizer handling the Processing Step
    if execution_step["StepName"] == "Processing":
        processing_job_name = execution_step["Metadata"]["ProcessingJob"]["Arn"].split("/")[-1]
        print(processing_job_name)
        display(viz.show(processing_job_name=processing_job_name))
    elif execution_step["StepName"] == "Train":
        training_job_name = execution_step["Metadata"]["TrainingJob"]["Arn"].split("/")[-1]
        print(training_job_name)
        display(viz.show(training_job_name=training_job_name))
    else:
        display(viz.show(pipeline_execution_step=execution_step))
        time.sleep(5)

{'StepName': 'Processing', 'StartTime': datetime.datetime(2021, 9, 27, 3, 2, 4, 166000, tzinfo=tzlocal()), 'EndTime': datetime.datetime(2021, 9, 27, 3, 6, 24, 980000, tzinfo=tzlocal()), 'StepStatus': 'Succeeded', 'Metadata': {'ProcessingJob': {'Arn': 'arn:aws:sagemaker:us-east-1:093729152554:processing-job/pipelines-jy1d2p7zekky-processing-t8x7f3qjcy'}}}
pipelines-jy1d2p7zekky-processing-t8x7f3qjcy


Unnamed: 0,Name/Source,Direction,Type,Association Type,Lineage Type
0,s3://...27-02-33-20-926/input/code/preprocess.py,Input,DataSet,ContributedTo,artifact
1,s3://sm-nlp-data/nlu/data/qa_raw.zip,Input,DataSet,ContributedTo,artifact
2,68331...om/sagemaker-scikit-learn:0.23-1-cpu-py3,Input,Image,ContributedTo,artifact
3,s3://sm-nlp-data/nlu/data/processed/,Output,DataSet,Produced,artifact


{'StepName': 'Train', 'StartTime': datetime.datetime(2021, 9, 27, 3, 6, 25, 701000, tzinfo=tzlocal()), 'EndTime': datetime.datetime(2021, 9, 27, 3, 19, 43, 772000, tzinfo=tzlocal()), 'StepStatus': 'Succeeded', 'Metadata': {'TrainingJob': {'Arn': 'arn:aws:sagemaker:us-east-1:093729152554:training-job/pipelines-jy1d2p7zekky-train-xnow7xzq7v'}}}
pipelines-jy1d2p7zekky-train-xnow7xzq7v


Unnamed: 0,Name/Source,Direction,Type,Association Type,Lineage Type
0,s3://sm-nlp-data/nlu/data/processed/,Input,DataSet,ContributedTo,artifact
1,76310...onaws.com/pytorch-training:1.8.1-gpu-py3,Input,Image,ContributedTo,artifact
2,s3://...kky-Train-XNow7xzQ7v/output/model.tar.gz,Output,Model,Produced,artifact


{'StepName': 'EvaluateModel', 'StartTime': datetime.datetime(2021, 9, 27, 3, 19, 44, 82000, tzinfo=tzlocal()), 'EndTime': datetime.datetime(2021, 9, 27, 3, 25, 10, 510000, tzinfo=tzlocal()), 'StepStatus': 'Succeeded', 'Metadata': {'ProcessingJob': {'Arn': 'arn:aws:sagemaker:us-east-1:093729152554:processing-job/pipelines-jy1d2p7zekky-evaluatemodel-bv007hps11'}}}


Unnamed: 0,Name/Source,Direction,Type,Association Type,Lineage Type
0,s3://...9-27-02-34-02-425/input/code/evaluate.py,Input,DataSet,ContributedTo,artifact
1,s3://sm-nlp-data/nlu/data/processed/,Input,DataSet,ContributedTo,artifact
2,s3://...kky-Train-XNow7xzQ7v/output/model.tar.gz,Input,Model,ContributedTo,artifact
3,68331...om/sagemaker-scikit-learn:0.23-1-cpu-py3,Input,Image,ContributedTo,artifact
4,s3://...n-2021-09-27-02-31-10-102/output/metrics,Output,DataSet,Produced,artifact


{'StepName': 'IntentAndSlotCondition', 'StartTime': datetime.datetime(2021, 9, 27, 3, 25, 11, 27000, tzinfo=tzlocal()), 'EndTime': datetime.datetime(2021, 9, 27, 3, 25, 11, 814000, tzinfo=tzlocal()), 'StepStatus': 'Succeeded', 'Metadata': {'Condition': {'Outcome': 'True'}}}


None

{'StepName': 'CreateQAModel', 'StartTime': datetime.datetime(2021, 9, 27, 3, 25, 12, 267000, tzinfo=tzlocal()), 'EndTime': datetime.datetime(2021, 9, 27, 3, 25, 13, 64000, tzinfo=tzlocal()), 'StepStatus': 'Succeeded', 'Metadata': {'Model': {'Arn': 'arn:aws:sagemaker:us-east-1:093729152554:model/pipelines-jy1d2p7zekky-createqamodel-soa7ikpht4'}}}


None

{'StepName': 'QARegisterModel', 'StartTime': datetime.datetime(2021, 9, 27, 3, 25, 12, 337000, tzinfo=tzlocal()), 'EndTime': datetime.datetime(2021, 9, 27, 3, 25, 13, 164000, tzinfo=tzlocal()), 'StepStatus': 'Succeeded', 'Metadata': {'RegisterModel': {'Arn': 'arn:aws:sagemaker:us-east-1:093729152554:model-package/qamodelpackagegroup/8'}}}


Unnamed: 0,Name/Source,Direction,Type,Association Type,Lineage Type
0,qamodelpackagegroup-8-Approved-1632713391-aws-...,Input,Approval,ContributedTo,action
1,s3://...kky-Train-XNow7xzQ7v/output/model.tar.gz,Input,Model,ContributedTo,artifact
2,76310...naws.com/pytorch-inference:1.8.1-gpu-py3,Input,Image,ContributedTo,artifact
3,qamodelpackagegroup-8-PendingManualApproval-16...,Input,Approval,ContributedTo,action
4,QAModelPackageGroup-1631002331-aws-model-packa...,Output,ModelGroup,AssociatedWith,context


### Test the Deployed Model

CSVSerializer: [DOC](https://sagemaker.readthedocs.io/en/stable/api/inference/serializers.html#sagemaker.serializers.CSVSerializer) </br>
JSONDeserializer: [DOC](https://sagemaker.readthedocs.io/en/stable/api/inference/deserializers.html#sagemaker.deserializers.JSONDeserializer)

In [25]:
import json
from sagemaker.pytorch.model import PyTorchPredictor
from sagemaker.serializers import CSVSerializer
from sagemaker.deserializers import JSONDeserializer

predictor = PyTorchPredictor(
    endpoint_name=pipeline_endpoint_name,
    sagemaker_session=sess,
    serializer=CSVSerializer(),
    deserializer=JSONDeserializer(),
)

Try predict on some sample data

In [26]:
with open('processed/psuedo/seq.in', 'r') as f:
    lines = f.read()
    predicted = predictor.predict(lines)
predicted

{'text': [['伊', '坂', '幸', '太', '郎', '写', '了', '哪', '些', '书'],
  ['O',
   'N',
   'E',
   ' ',
   'P',
   'I',
   'E',
   'C',
   'E',
   '総',
   '集',
   '编',
   ' ',
   'T',
   'H',
   'E',
   ' ',
   'F',
   'I',
   'R',
   'S',
   'T',
   ' ',
   'L',
   'O',
   'G',
   '是',
   '谁',
   '写',
   '的'],
  ['高',
   '效',
   '管',
   '理',
   'W',
   'i',
   'n',
   'd',
   'o',
   'w',
   's',
   '网',
   '络',
   '/',
   'W',
   'i',
   'n',
   '3',
   '2',
   ' ',
   'P',
   'e',
   'r',
   'l',
   '应',
   '用',
   '之',
   '道',
   '的',
   '作',
   '者',
   '是',
   '谁'],
  ['D', 'a', 'v', 'e', '写', '了', '什', '么', '书'],
  ['洪', '荒', '之', '武', '道', '是', '谁', '写', '的'],
  ['玄', '黄', '真', '人', '写', '了', '哪', '些', '书'],
  ['风', '景', '景', '观', '工', '程', '体', '系', '化', '是', '谁', '的', '作', '品'],
  ['微', '知', '汇', '：', '万', '物', '简', '史', '的', '作', '者', '是', '谁'],
  ['孽', '阳', '的', '作', '者', '是', '谁'],
  ['茅', '月', '写', '了', '哪', '些', '书'],
  ['未', '来', '娱', '乐', '系', '统', '是', '谁', '写', '的'],
  ['小', '僧

It would be too long and go out of wait limitation if we predict all the data all at once, so we split them into little chunks.

In [27]:
from tqdm import tqdm
chunk_size = 20
predicted_cls = []
with open('processed/test/seq.in') as f:
    lines = f.readlines()
    chunks = [lines[i: i+chunk_size] for i in range(0, len(lines), chunk_size)]
    for chunk in tqdm(chunks):
        predicted = predictor.predict(chunk)
        predicted_cls += predicted['intentions']

100%|██████████| 18/18 [00:55<00:00,  3.08s/it]


### View Captured Data

In [23]:
s3 = boto3.Session().client('s3')
current_endpoint_capture_prefix = '{}{}'.format(data_capture_prefix, pipeline_endpoint_name)
result = s3.list_objects(Bucket=data_capture_bucket, Prefix=current_endpoint_capture_prefix)
capture_files = [capture_file.get("Key") for capture_file in result.get('Contents')]
print("Found Capture Files:")
print("\n ".join(capture_files))

Found Capture Files:
inference/qa-model-from-registry-ep-1632713407/AllTraffic/2021/09/27/03/46-40-392-89950adc-6ce6-4662-856c-c64f47b2c15f.jsonl
 inference/qa-model-from-registry-ep-1632713407/AllTraffic/2021/09/27/03/47-40-522-63c42e9b-874b-4bb3-b4fc-5a2d12281b83.jsonl


In [29]:
capture_files[0]

'inference/qa-model-from-registry-ep-1632713407/AllTraffic/2021/09/27/03/46-40-392-89950adc-6ce6-4662-856c-c64f47b2c15f.jsonl'

use the S3Downloader utility to view and download the captured data in Amazon S3:

In [26]:
from sagemaker.s3 import S3Downloader
traffic = S3Downloader.read_file(f"s3://{data_capture_bucket}/{capture_files[0]}")
traffic[:2000]

'{"captureData":{"endpointInput":{"observedContentType":"text/csv","mode":"INPUT","data":"5LyK5Z2C5bm45aSq6YOO5YaZ5LqG5ZOq5Lqb5LmmCk9ORSBQSUVDRee3j+mbhue8liBUSEUgRklSU1QgTE9H5piv6LCB5YaZ55qECumrmOaViOeuoeeQhldpbmRvd3PnvZHnu5wvV2luMzIgUGVybOW6lOeUqOS5i+mBk+eahOS9nOiAheaYr+iwgQpEYXZl5YaZ5LqG5LuA5LmI5LmmCua0quiNkuS5i+atpumBk+aYr+iwgeWGmeeahArnjoTpu4TnnJ/kurrlhpnkuoblk6rkupvkuaYK6aOO5pmv5pmv6KeC5bel56iL5L2T57O75YyW5piv6LCB55qE5L2c5ZOBCuW+ruefpeaxh++8muS4h+eJqeeugOWPsueahOS9nOiAheaYr+iwgQrlrb3pmLPnmoTkvZzogIXmmK/osIEK6IyF5pyI5YaZ5LqG5ZOq5Lqb5LmmCuacquadpeWoseS5kOezu+e7n+aYr+iwgeWGmeeahArlsI/lg6fkuI3mlbLmnKjpsbzmnInku4DkuYjokZfkvZwK6YeR6KOF5Zub5aSn5omN5a2Q5piv6LCB55qE5L2c5ZOBCue9l+awuOi0pOWvvOa8lOS6huWTquS6m+eUteW9sQrkuLrkuobkvaDmiJHmhL/mhI/ng63niLHmlbTkuKrkuJbnlYzmmK/osIHnmoTkvZzlk4EK6YOt6JmO5a+85ryU5LqG5ZOq5Lqb55S15b2xCuS4uuS6huS9oOaIkeaEv+aEj+eDreeIseaVtOS4quS4lueVjOaYr+iwgeWvvOa8lOeahArnga3nvarluIjnmoTlr7zmvJTmmK/osIEK5p2o6IuX5a+85ryU5LqG5ZOq5Lqb55S16KeG5YmnCueBree9quW4iOaYr+iwgeWvvOa8lO

In [30]:
help(random.sample)

Help on method sample in module random:

sample(population, k) method of random.Random instance
    Chooses k unique random elements from a population sequence or set.
    
    Returns a new list containing elements from the population while
    leaving the original population unchanged.  The resulting list is
    in selection order so that all sub-slices will also be valid random
    samples.  This allows raffle winners (the sample) to be partitioned
    into grand prize and second place winners (the subslices).
    
    Members of the population need not be hashable or unique.  If the
    population contains repeats, then each occurrence is a possible
    selection in the sample.
    
    To choose a sample in a range of integers, use range as an argument.
    This is especially fast and space efficient for sampling from a
    large population:   sample(range(10000000), 60)



In [36]:
import random
traffics = traffic.strip().split('\n')
print(f"Got {len(traffics)} piece of captured data")
i = random.sample(range(len(traffics)), 1)[0]
traffic_i = traffics[i]
print(f"No.{i} piece of data is:")
traffic_i

Got 9 piece of captured data
No.4 piece of data is:


'{"captureData":{"endpointInput":{"observedContentType":"text/csv","mode":"INPUT","data":"5LiJ5bqm6K+x5oOR55qE5a+85ryU5piv6LCBCgrkvZXol6nlr7zmvJTkuoblk6rkupvnlLXop4bliacKCuaIkeS5lOW4g+aWr+aYr+iwgeWGmeeahAoK5aaW57K+5YeE5Y2f5puy5LmL576O55S35bCx5Zyw5omR5YCS55qE5L2c6ICF5piv6LCBCgrorrjmmYvkuqjnmoTphY3lgbbmmK/osIEKCuadjuWYieaso+WSjOiwgee7k+WpmuS6hgoK5Yaz6IOc5piv6LCB55qE5L2c5ZOBCgrmnpfmmZPolJrmr5XkuJrkuo7lk6rph4wKCuacieWTquS6m+S6uuS7juS4reWNl+Wkp+WtpuavleS4mgoK55yL5LiK5Y675b6I576O55qE5a+85ryU5piv6LCBCgrpn6bovr7liKnmnaXoh6rkuo7lk6rkuKrlm73lrrYKCua1geW5tOemgeWcsOaYr+iwgeeahOS9nOWTgQoK5Lqy54ix55qE5piv6LCB5a+85ryU55qECgroo7Tlhpvlr7zmvJTkuoblk6rkupvnlLXop4bliacKCumtlOivq+Wlh+WFteaYr+iwgeWvvOa8lOeahAoK5Li5wrfljaHmlq/ljaHmi4nph4zlr7zmvJTkuoblk6rkupvnlLXlvbEKCuaZuuWPluWogeiZjuWxseaYr+iwgeWvvOa8lOeahAoK5b6Q5YWL5a+85ryU5LqG5ZOq5Lqb55S15b2xCgrmlr3lsJrmmI7ku47lk6rph4zmr5XkuJoKCuiwgeWGmeS6hue7neS7o+WAvuWfjuWkqeaJjemtlOazleW4iAo=","encoding":"BASE64"},"endpointOutput":{"observedContentType":"application/js

In [39]:
import json
endpoint_input_data = json.loads(traffic_i)['captureData']['endpointInput']['data']
endpoint_output_data = json.loads(traffic_i)['captureData']['endpointOutput']['data']

Decode payload with base64 library

In [70]:
import base64

base64.b64decode(endpoint_input_data).decode('utf-8')

'有哪些人从中国人民大学毕业\n\n将爱情进行到底是谁导演的\n\n超品神医是谁写的\n\n冰林写了哪些书\n\n王锋的国籍是什么\n\n王锋从哪里毕业\n\n有哪些人从北京大学毕业\n\n谁写了东坡易传\n\n王现昌的国籍是什么\n\n赏金猎人的导演是谁\n\n鲁斯达是哪个国家的人\n\n布拉德·皮特的妻子是谁\n\n安吉丽娜·朱莉的配偶是谁\n\n铁马战车是谁的作品\n\n天才宝贝恶魔冤家的作者是谁\n\n空白丶写了什么书\n\n江南逢李龟年是谁写的\n\n异界之再战风云是谁的作品\n\n品味人生写了什么书\n\n看透你的作者是谁\n'

In [71]:
base64.b64decode(endpoint_output_data).decode('utf-8')

'{"text": [["有", "哪", "些", "人", "从", "中", "国", "人", "民", "大", "学", "毕", "业"], ["将", "爱", "情", "进", "行", "到", "底", "是", "谁", "导", "演", "的"], ["超", "品", "神", "医", "是", "谁", "写", "的"], ["冰", "林", "写", "了", "哪", "些", "书"], ["王", "锋", "的", "国", "籍", "是", "什", "么"], ["王", "锋", "从", "哪", "里", "毕", "业"], ["有", "哪", "些", "人", "从", "北", "京", "大", "学", "毕", "业"], ["谁", "写", "了", "东", "坡", "易", "传"], ["王", "现", "昌", "的", "国", "籍", "是", "什", "么"], ["赏", "金", "猎", "人", "的", "导", "演", "是", "谁"], ["鲁", "斯", "达", "是", "哪", "个", "国", "家", "的", "人"], ["布", "拉", "德", "·", "皮", "特", "的", "妻", "子", "是", "谁"], ["安", "吉", "丽", "娜", "·", "朱", "莉", "的", "配", "偶", "是", "谁"], ["铁", "马", "战", "车", "是", "谁", "的", "作", "品"], ["天", "才", "宝", "贝", "恶", "魔", "冤", "家", "的", "作", "者", "是", "谁"], ["空", "白", "丶", "写", "了", "什", "么", "书"], ["江", "南", "逢", "李", "龟", "年", "是", "谁", "写", "的"], ["异", "界", "之", "再", "战", "风", "云", "是", "谁", "的", "作", "品"], ["品", "味", "人", "生", "写", "了", "什", "么", "书"], ["看", "透", "你", "的", "作", 

## Monitor SageMaker endpoints

There are mainly data quality monitoring and model quality monitoring, in which:

- data quality monitoring captures inference input, and compares data statistics like min, max with a baseline created from dataset [[Monitor Data Quality](https://docs.aws.amazon.com/sagemaker/latest/dg/model-monitor-data-quality.html)]
- model quality monitoring monitors the performance of a model by comparing the predictions that the model makes with the actual ground truth labels that the model attempts to predict. [[Monitor Model Quality](https://docs.aws.amazon.com/sagemaker/latest/dg/model-monitor-model-quality.html)]

Data quality is only applicapable for tabular data, therefore **not suitable** for this question understanding use case. Here we implement a quality monitoring for model quality.

Reference:
- AWS Doc: [Amazon SageMaker Model Monitor](https://docs.aws.amazon.com/sagemaker/latest/dg/model-monitor.html)
- SageMaker Doc: [Amazon SageMaker Model Monitor](https://sagemaker.readthedocs.io/en/stable/amazon_sagemaker_model_monitoring.html)
- [AWS Workshop: Model Monitor](https://sagemaker-immersionday.workshop.aws/lab4/monitoring.html)
- [Create a Model Quality Baseline](https://docs.aws.amazon.com/sagemaker/latest/dg/model-monitor-model-quality-baseline.html)

### Create a Model Quality Baseline

1.  Create an instance of the ModelQualityMonitor class. 

Check SageMaker ModelQualityMonitor API: [Doc](https://sagemaker.readthedocs.io/en/stable/api/inference/model_monitor.html)

In [19]:
baseline_output_bucket = 'sm-nlp-data'
baseline_job_name = "QABaseLineJob7"
baseline_job_output_s3 = f"s3://{baseline_output_bucket}/{baseline_job_name}/"

In [3]:
from sagemaker.model_monitor import ModelQualityMonitor
from sagemaker.model_monitor.dataset_format import DatasetFormat

In [7]:
model_quality_monitor = ModelQualityMonitor(
    role=role,
    instance_count=1,
    instance_type='ml.m5.4xlarge',
    volume_size_in_gb=20,
    max_runtime_in_seconds=3600,
    sagemaker_session=sess
)

2. Create a baseline dataset in JSON with test data

Here we utilize the predicted labels from steps before. This will be compared with ground truth labels to generate baseline job.

In [107]:
with open('processed/test/seq.in') as f:
    x_input = f.readlines()
    x_input = [x.strip() for x in x_input]
with open('processed/test/label') as f:
    y_output = f.readlines()
    y_output = [y.strip() for y in y_output]
with open('processed/test/seq.out') as f:
    seq_output = f.readlines()
    seq_output = [seq.strip().split() for seq in seq_output]

assert len(predicted_cls) == len(x_input), f"predicted label should have the same length with input sequence {len(predicted_cls)}!={len(x_input)}"

test_dataset = {
    'seq_in': x_input,
    'seq_out': seq_output,
    'predicted_label': predicted_cls,
    'label': y_output
}

In [111]:
import pandas as pd
test_frame = pd.DataFrame(test_dataset)
test_frame

Unnamed: 0,seq_in,seq_out,predicted_label,label
0,伊坂幸太郎写了哪些书,"[B_name, I_name, I_name, I_name, I_name, O, O,...",ask_books,ask_books
1,ONE PIECE総集编 THE FIRST LOG是谁写的,"[B_book, I_book, I_book, I_book, I_book, I_boo...",ask_author,ask_author
2,高效管理Windows网络/Win32 Perl应用之道的作者是谁,"[B_book, I_book, I_book, I_book, I_book, I_boo...",ask_author,ask_author
3,Dave写了什么书,"[B_name, I_name, I_name, I_name, O, O, O, O, O]",ask_books,ask_books
4,洪荒之武道是谁写的,"[B_book, I_book, I_book, I_book, I_book, O, O,...",ask_author,ask_author
...,...,...,...,...
353,有哪些人从深圳大学毕业,"[O, O, O, O, O, B_school, I_school, I_school, ...",ask_alumni,ask_alumni
354,烈火如歌的导演是谁,"[B_film, I_film, I_film, I_film, O, O, O, O, O]",ask_director,ask_director
355,烈火如歌的导演是谁,"[B_film, I_film, I_film, I_film, O, O, O, O, O]",ask_director,ask_director
356,梁胜权导演了哪些电视剧,"[B_name, I_name, I_name, O, O, O, O, O, O, O, O]",ask_films,ask_films


In [109]:
correct = 0
wrong = 0
for x, y in zip(predicted_cls, y_output):
    if x != y:
        wrong += 1
    else:
        correct += 1
print(f"Correct: {correct}")
print(f"Wrong: {wrong}")
print(f"Acc: {correct/len(predicted_cls)}")

Correct: 336
Wrong: 22
Acc: 0.9385474860335196


`suggest_baseline` works better with lines records, so we convert DataFrame to jsonlines file:

In [123]:
test_frame.to_json(path_or_buf='test_dataset.json', orient='records', lines=True, force_ascii=False)

Deprecated method, convert by hand:
```python
with open('test_dataset1.jsonl', 'w') as f:
    for seq_in, seq_out, predicted_label, label in zip(test_dataset['seq_in'], \
        test_dataset['seq_out'], test_dataset['predicted_label'], test_dataset['label']):
        json.dump({'seq_in': seq_in, 'seq_out': seq_out, 'predicted_label': predicted_label, \
            'label': label}, f, ensure_ascii=False)
```

3. Now call the suggest_baseline method of the ModelQualityMonitor object to run a baseline job. We need a baseline dataset that contains both predictions and labels stored in Amazon S3.</br> 
Suggest baseline specification: [ModelQualityMonitor.suggest_baseline](https://sagemaker.readthedocs.io/en/stable/api/inference/model_monitor.html#sagemaker.model_monitor.model_monitoring.ModelQualityMonitor.suggest_baseline)

In [11]:
job = model_quality_monitor.suggest_baseline(
    job_name=baseline_job_name,
    baseline_dataset='./test_dataset.json', # The S3 location of the validation dataset.
    dataset_format=DatasetFormat.json(lines=True), # Whether the file should be read as a json object per line
    output_s3_uri=baseline_job_output_s3, # The S3 location to store the results.
    problem_type='MulticlassClassification',
    inference_attribute= "predicted_label", # The column in the dataset that contains predictions.
    ground_truth_attribute= "label" # The column in the dataset that contains ground truth labels.
)


Job Name:  QABaseLineJob7
Inputs:  [{'InputName': 'baseline_dataset_input', 'AppManaged': False, 'S3Input': {'S3Uri': 's3://sagemaker-us-east-1-093729152554/model-monitor/baselining/QABaseLineJob7/input/baseline_dataset_input', 'LocalPath': '/opt/ml/processing/input/baseline_dataset_input', 'S3DataType': 'S3Prefix', 'S3InputMode': 'File', 'S3DataDistributionType': 'FullyReplicated', 'S3CompressionType': 'None'}}]
Outputs:  [{'OutputName': 'monitoring_output', 'AppManaged': False, 'S3Output': {'S3Uri': 's3://sm-nlp-data/QABaseLineJob7/', 'LocalPath': '/opt/ml/processing/output', 'S3UploadMode': 'EndOfJob'}}]


In [None]:
job.wait(logs=True)

After the baseline job finishes, you can see the constraints that the job generated. First, get the results of the baseline job by calling the `latest_baselining_job` method of the ModelQualityMonitor object. 

In [13]:
baseline_job = model_quality_monitor.latest_baselining_job

The baseline job suggests constraints, which are thresholds for metrics that model monitor measures. If a metric goes beyond the suggested threshold, Model Monitor reports a violation. To view the constraints that the baseline job generated, call the suggested_constraints method of the baseline job.

In [14]:
import pandas as pd
pd.DataFrame(baseline_job.suggested_constraints().body_dict['multiclass_classification_constraints']).T

Unnamed: 0,threshold,comparison_operator
accuracy,0.938547,LessThanThreshold
weighted_recall,0.938547,LessThanThreshold
weighted_precision,0.939565,LessThanThreshold
weighted_f0_5,0.939081,LessThanThreshold
weighted_f1,0.938622,LessThanThreshold
weighted_f2,0.938475,LessThanThreshold


### Schedule Model Quality Monitoring Jobs 

You can create a model monitoring schedule for the endpoint created earlier. Use the baseline resources (constraints and statistics) to compare against the real-time traffic. 

Check Amazon docs for setting up a scheduled monitor quality check: [Schedule Model Quality Monitoring Jobs ](https://docs.aws.amazon.com/sagemaker/latest/dg/model-monitor-model-quality-schedule.html)

API definition for `create_model_quality_job_definition`: [create_model_quality_job_definition](https://boto3.amazonaws.com/v1/documentation/api/latest/reference/services/sagemaker.html#SageMaker.Client.create_model_quality_job_definition)

In [None]:
# copy over the training dataset to Amazon S3 (if you already have it in Amazon S3, you could reuse it)
baseline_prefix = prefix + '/baselining'
baseline_data_prefix = baseline_prefix + '/data'
baseline_results_prefix = baseline_prefix + '/results'

baseline_data_uri = 's3://{}/{}'.format(bucket,baseline_data_prefix)
baseline_results_uri = 's3://{}/{}'.format(bucket, baseline_results_prefix)
print('Baseline data uri: {}'.format(baseline_data_uri))
print('Baseline results uri: {}'.format(baseline_results_uri))

In [None]:
training_data_file = open("test_data/training-dataset-with-header.csv", 'rb')
s3_key = os.path.join(baseline_prefix, 'data', 'training-dataset-with-header.csv')
boto3.Session().resource('s3').Bucket(bucket).Object(s3_key).upload_fileobj(training_data_file)

Create a model monitoring schedule for the endpoint using the baseline constraints and statistics to compare against real-time traffic. 

In [137]:
s3_report_path = f"s3://{baseline_output_bucket}/{baseline_job_name}/output"
s3_report_path

's3://sm-nlp-data/QABaseLineJob6/output'

Check the API for `create_monitoring_schedule`:

In [146]:
help(model_quality_monitor.create_monitoring_schedule)

Help on method create_monitoring_schedule in module sagemaker.model_monitor.model_monitoring:

create_monitoring_schedule(endpoint_input, ground_truth_input, problem_type, record_preprocessor_script=None, post_analytics_processor_script=None, output_s3_uri=None, constraints=None, monitor_schedule_name=None, schedule_cron_expression=None, enable_cloudwatch_metrics=True) method of sagemaker.model_monitor.model_monitoring.ModelQualityMonitor instance
    Creates a monitoring schedule.
    
    Args:
        endpoint_input (str or sagemaker.model_monitor.EndpointInput): The endpoint to
            monitor. This can either be the endpoint name or an EndpointInput.
        ground_truth_input (str): S3 URI to ground truth dataset.
        problem_type (str): The type of problem of this model quality monitoring. Valid
            values are "Regression", "BinaryClassification", "MulticlassClassification".
        record_preprocessor_script (str): The path to the record preprocessor script. Thi

`ground_truth_bucket` and `ground_truth_prefix` specifies where to store and retrive ground labeled truth data.

In [45]:
ground_truth_bucket = 'sm-nlp-data'
ground_truth_prefix = 'ground_truth'

In [None]:
from sagemaker.model_monitor import CronExpressionGenerator
from time import gmtime, strftime

mon_schedule_name = 'qa-model-monitor-schedule-' + strftime("%Y-%m-%d-%H-%M-%S", gmtime())
model_quality_monitor.create_monitoring_schedule(
    monitor_schedule_name=mon_schedule_name,
    endpoint_input=predictor.endpoint,
    output_s3_uri=s3_report_path,
    problem_type='MulticlassClassification',
    constraints=model_quality_monitor.suggested_constraints(),
    ground_truth_input=f"s3://{ground_truth_bucket}/{ground_truth_prefix}/"
    schedule_cron_expression=CronExpressionGenerator.daily(),
    enable_cloudwatch_metrics=True,
)

`create_model_quality_job_definition` is Boto3 (AWS SDK for Python) equivalence of `create_monitoring_schedule` from SageMaker python SDK (It isn't well documented either).

```python
import time
model_quality_job_definition_name = f"qa-model-quality-definition-{int(time.time())}"

sagemaker.create_model_quality_job_definition(
    JobDefinitionName=model_quality_job_definition_name,
    ModelQualityBaselineConfig={
        'BaseliningJobName': baseline_job_name,
        'ConstraintsResource': {
            'S3Uri': baseline_job_output_s3
        },
        ModelQualityAppSpecification={
            'ImageUri': 'string',
            'ContainerEntrypoint': [
                'string',
            ],
            'ContainerArguments': [
                'string',
            ],
            'RecordPreprocessorSourceUri': 'string',
            'PostAnalyticsProcessorSourceUri': 'string',
            'ProblemType': 'BinaryClassification'|'MulticlassClassification'|'Regression',
            'Environment': {
                'string': 'string'
            }
        },
    }
)
```

Describe and inspect the schedule: After you describe it, observe that the MonitoringScheduleStatus in MonitoringScheduleSummary returned by the ListMonitoringSchedules API changes to Scheduled. 

In [None]:
desc_schedule_result = my_default_monitor.describe_schedule()
print('Schedule status: {}'.format(desc_schedule_result['MonitoringScheduleStatus']))

### Ingest Ground Truth Labels and Merge Them With Predictions

Model quality monitoring compares the predictions your model makes with ground truth labels to measure the quality of the model. For this to work, you periodically label data captured by your endpoint and upload it to Amazon S3.

Check this doc: [Ingest Ground Truth Labels and Merge Them With Predictions](https://docs.aws.amazon.com/sagemaker/latest/dg/model-monitor-model-quality-merge.html)

Variable `traffic_i` comes from the [View Captured Data](#View-Captured-Data) section. `traffic_i` is a piece of logged inference event, it contains an event uuid:

In [46]:
json.loads(traffic_i)['eventMetadata']['eventId']

'4651a1bf-4b00-4248-a309-4a18b11e4277'

Here I label the captured data `traffic_i` by hand and upload them to the ground truth bucket. This functions as an example, for real-world application, there needs someone to continously label captured data to monitor model quality drift.

Labeled ground truth data needs to be in the format of
```json
{
  "groundTruthData": {
    "data": "1",
    "encoding": "CSV" # only CSV supported at launch, we assume "data" only consists of label
  },
  "eventMetadata": {
    "eventId": "aaaa-bbbb-cccc"
  },
  "eventVersion": "0"
}
```

and needs to be stored at `s3://bucket/prefix/yyyy/mm/dd/hh`, where the time is the time that the ground truth data is collected.

Available intentions:

In [56]:
with open('processed/intent_label.txt') as f:
    intentions = f.readlines()
    intentions = [intent.strip() for intent in intentions]
    for intent in intentions:
        print(intent)

UNK
ask_school
ask_alumni
ask_author
ask_books
ask_wife
ask_husband
ask_director
ask_films
ask_nationality


In [65]:
event_id = json.loads(traffic_i)['eventMetadata']['eventId']

In [61]:
import base64
endpoint_input_data = json.loads(traffic_i)['captureData']['endpointInput']['data']
questions = base64.b64decode(endpoint_input_data).decode('utf-8')
questions = questions.split('\n')
questions = [q.strip() for q in questions if q != '']
questions

['三度诱惑的导演是谁',
 '何藩导演了哪些电视剧',
 '我乔布斯是谁写的',
 '妖精凄卟曲之美男就地扑倒的作者是谁',
 '许晋亨的配偶是谁',
 '李嘉欣和谁结婚了',
 '决胜是谁的作品',
 '林晓蔚毕业于哪里',
 '有哪些人从中南大学毕业',
 '看上去很美的导演是谁',
 '韦达利来自于哪个国家',
 '流年禁地是谁的作品',
 '亲爱的是谁导演的',
 '裴军导演了哪些电视剧',
 '魔诫奇兵是谁导演的',
 '丹·卡斯卡拉里导演了哪些电影',
 '智取威虎山是谁导演的',
 '徐克导演了哪些电影',
 '施尚明从哪里毕业',
 '谁写了绝代倾城天才魔法师']

Hint:
answers are (just copy them all and paste):
```
ask_director
ask_films
ask_author
ask_author
ask_wife
ask_husband
ask_author
ask_school
ask_alumni
ask_director
ask_nationality
ask_author
ask_director
ask_films
ask_director
ask_films
ask_director
ask_films
ask_school
ask_author
```

In [64]:
labels = []
for q in questions:
    label = input(f"Label for {q} is: ")
    labels.append(label)

Label for 三度诱惑的导演是谁 is:  ask_director
Label for 何藩导演了哪些电视剧 is:  ask_films
Label for 我乔布斯是谁写的 is:  ask_author
Label for 妖精凄卟曲之美男就地扑倒的作者是谁 is:  ask_author
Label for 许晋亨的配偶是谁 is:  ask_wife
Label for 李嘉欣和谁结婚了 is:  ask_husband
Label for 决胜是谁的作品 is:  ask_author
Label for 林晓蔚毕业于哪里 is:  ask_school
Label for 有哪些人从中南大学毕业 is:  ask_alumni
Label for 看上去很美的导演是谁 is:  ask_director
Label for 韦达利来自于哪个国家 is:  ask_nationality
Label for 流年禁地是谁的作品 is:  ask_author
Label for 亲爱的是谁导演的 is:  ask_director
Label for 裴军导演了哪些电视剧 is:  ask_films
Label for 魔诫奇兵是谁导演的 is:  ask_director
Label for 丹·卡斯卡拉里导演了哪些电影 is:  ask_films
Label for 智取威虎山是谁导演的 is:  ask_director
Label for 徐克导演了哪些电影 is:  ask_films
Label for 施尚明从哪里毕业 is:  ask_school
Label for 谁写了绝代倾城天才魔法师 is:  ask_author


['ask_director',
 'ask_films',
 'ask_author',
 'ask_author',
 'ask_wife',
 'ask_husband',
 'ask_author',
 'ask_school',
 'ask_alumni',
 'ask_director',
 'ask_nationality',
 'ask_author',
 'ask_director',
 'ask_films',
 'ask_director',
 'ask_films',
 'ask_director',
 'ask_films',
 'ask_school',
 'ask_author']

In [86]:
from datetime import datetime

ground_truth_data = {
    "groundTruthData": {
        "data": ','.join(labels),
        "encoding": "CSV" # only CSV supported at launch, we assume "data" only consists of label
    },
    "eventMetadata": {
        "eventId": event_id
    },
    "eventVersion": "0"
}

now = datetime.today()
ground_truth_upload_uri = f"s3://{ground_truth_bucket}/{ground_truth_prefix}/{now.year}/{now.strftime('%m')}/{now.strftime('%d')}/{now.strftime('%H')}/"
ground_truth_upload_uri

's3://sm-nlp-data/ground_truth/2021/09/28/04/'

In [89]:
with open('output/ground_truth.json', 'w') as f:
    json.dump(ground_truth_data, f, ensure_ascii=False)

In [90]:
!aws s3 cp output/ground_truth.json $ground_truth_upload_uri

upload: output/ground_truth.json to s3://sm-nlp-data/ground_truth/2021/09/28/04/ground_truth.json
