# Hosting FLAIR models on Sagemaker

Below is some initial imports and configuration.

In [1]:
import boto3
import re

import os
import numpy as np
import pandas as pd
from sagemaker import get_execution_role
import json
import pickle


role = get_execution_role()

In [26]:
import sagemaker
from time import gmtime, strftime

sess = sagemaker.Session() # can use LocalSession() to run container locally

bucket = sess.default_bucket()
region = "us-east-2"
account = sess.boto_session.client('sts').get_caller_identity()['Account']
prefix_input = 'flair-input'
prefix_output = 'flair-ouput'

## Define parameters of your container

In [3]:
# Using Sagemaker PyTorch 1.5 serving container
# Full list of available deep learning containers is here: https://docs.aws.amazon.com/deep-learning-containers/latest/devguide/deep-learning-containers-images.html

container_serving = "pytorch-inference" # your container name
tag = "1.5.0-gpu-py36-cu101-ubuntu16.04" # you can have several version of container available
image = '763104351884.dkr.ecr.{}.amazonaws.com/{}:{}'.format(region, container_serving, tag)

print("Following container will be used for hosting: ",image)

Following container will be used for hosting:  763104351884.dkr.ecr.us-east-2.amazonaws.com/pytorch-inference:1.5.0-gpu-py36-cu101-ubuntu16.04


## Deploy remote endpoint

To process inference data when we are sending it over internet, we need to have two customer ser/deser methods.

In [19]:
from sagemaker.pytorch import PyTorchModel, PyTorch, PyTorchPredictor
from sagemaker.estimator import Estimator, Model

remote_model = PyTorchModel(name = "flair-tagger-v2",
                            model_data="s3://vadimd-empty-bucket/placeholder/emptyfile.tar.gz", # This will be replaced with location of trained model artifacts. 
                                                                                                # For now, default pre-trained FLAIR model will be used.
                            role=role,
                            sagemaker_session = sess,
                            entry_point="sequence_tagger_serving.py",
                            source_dir="serving_sources",
                            framework_version="1.5", py_version="3.6",
                            image=image)

In [20]:
remote_predictor = remote_model.deploy(instance_type='ml.g4dn.xlarge',
                                       initial_instance_count=1,
                                       update_endpoint = True, # comment or False if endpoint doesns't exist
                                       endpoint_name="sequence-endpoint-v1", # define a unqie endpoint name; if ommited, Sagemaker will generate it based on used container
#                                        wait=False
                                      )

---------------!

In [25]:
import pickle
import flair
import json

# sentence_to_predict = "Berlin is the capital and largest city of Germany by both area and population."
sentence_to_predict  = "New York City (NYC), often called The City or simply New York (NY), is the most populous city in the United States."

client = boto3.client('sagemaker-runtime')
content_type = 'application/json'
accept_type = "pickle"
headers = {'content-type': content_type}
payload = json.dumps(sentence_to_predict)
endpoint_name = "sequence-endpoint-v1"

response = client.invoke_endpoint(
    EndpointName=endpoint_name,
    Body=payload,
    ContentType=content_type,
    Accept = accept_type
)

prediction_obj = pickle.loads(response['Body'].read())
print(prediction_obj)

[Sentence: "New York City (NYC), often called The City or simply New York (NY), is the most populous city in the United States."   [− Tokens: 22  − Token-Labels: "New <B-LOC> York <I-LOC> City <E-LOC> (NYC), often called The <B-LOC> City <E-LOC> or simply New <B-LOC> York <E-LOC> (NY), is the most populous city in the United <B-LOC> States. <E-LOC>"]]
