# SageMaker BYOC MLFLOW Test

In [None]:
# import sys
# !{sys.executable} -m pip install mlflow
# !{sys.executable} -m pip install pymysql

In [4]:
import boto3
import json
import sagemaker
import os
import pandas as pd
import numpy as np
from sagemaker.estimator import Estimator
from sagemaker.predictor import Predictor
from sagemaker.serializers import NumpySerializer
from sagemaker.deserializers import NumpyDeserializer
from sagemaker.local import LocalSession
from sklearn.datasets import load_boston
from sagemaker.sklearn.estimator import SKLearn
from sklearn.model_selection import train_test_split
import mlflow
from pymysql import converters

In [5]:
sess = sagemaker.Session()
role = sagemaker.get_execution_role()
bucket = sess.default_bucket()

In [6]:
# Set mlflow tracking uri
session = boto3.session.Session()
client = session.client(service_name="secretsmanager", region_name="us-east-1")
mlflow_secret = client.get_secret_value(SecretId='acme/mlflow')
mlflowdb_conf = json.loads(mlflow_secret["SecretString"])
converters.encoders[np.float64] = converters.escape_float
converters.conversions = converters.encoders.copy()
converters.conversions.update(converters.decoders)
tracking_uri = f"mysql+pymysql://{mlflowdb_conf['username']}:{mlflowdb_conf['password']}@{mlflowdb_conf['host']}/mlflow"


In [8]:
# We use the Wine Quality dataset 
csv_url = (
            'http://archive.ics.uci.edu/ml/machine-learning-databases/wine-quality/winequality-red.csv'
        )
try:
    data = pd.read_csv(csv_url, sep=";")
except Exception as e:
    logger.exception(
                "Unable to download training & test CSV, check your internet connection. Error: %s", e
            )

# Split the data into training and test sets. (0.75, 0.25) split.
train, test = train_test_split(data)

In [None]:
# send data to S3. SageMaker will take training data from s3
train_path = sess.upload_data(path='data/wine_quality_train.csv', bucket=bucket, key_prefix='sm-byoc-mlflow/model-train-lr/data')
test_path = sess.upload_data(path='data/wine_quality_test.csv', bucket=bucket, key_prefix='sm-byoc-mlflow/model-train-lr/data')

In [None]:
local_mode = True

In [None]:
hyperparameters = {
    'tracking_uri': tracking_uri,
    'artifact_location': 's3://sagemaker-us-east-1-830861439844/mlruns',
    'experiment_name': 'sm-byoc-mlflow-lr',
    'run_name': 'sm-byoc-mlflow-model-train-lr-1',
    'tags': "{'Project_Name':'sm-byoc-mlflow', 'Model_Name':'WineQuality-Lr'}",
    # 'train-file': 'wine_quality_train.csv',
    # 'test-file': 'wine_quality_test.csv',
    'alpha': 0.5,
    'l1_ratio': 0.2,
    'features': 'fixed acidity,volatile acidity,citric acid,residual sugar,chlorides,free sulfur dioxide,total sulfur dioxide,density,pH,sulphates,alcohol',
    'target': 'quality'
    }


In [None]:
estimator = Estimator(
    role = role,
    instance_count = 1,
    instance_type = 'local' if local_mode else 'ml.m5.large',
    image_uri = '830861439844.dkr.ecr.us-east-1.amazonaws.com/sm-byoc-mlflow-lr',
    base_job_name = 'sm-byoc-mlflow-lr',
    hyperparameters = hyperparameters,
    subnets=['subnet-a99aa087', 
             'subnet-b64c73d1', 
             'subnet-900644ae',
             'subnet-695f9024',
             'subnet-67d8e03b',
             'subnet-7e12e370'],
    security_group_ids=['sg-c08c6593']
    
)      

In [None]:
estimator.fit({'train':train_path, 
               'test':test_path},wait=False,logs=True)

In [None]:
import mlflow
from mlflow.tracking import MlflowClient


mlflow.set_tracking_uri("mysql+pymysql://auroradb:zigtat^*uiop@mlflow.cluster-c0s8ygftw9nh.us-east-1.rds.amazonaws.com/mlflow")
client = MlflowClient()

_registered_models = client.list_registered_models()



In [None]:
for _registerd_model in _registered_models:
    print(dict(_registerd_model['latest_versions']))

#### To Be Tested 

In [None]:
# mlflow sagemaker run-local -m 's3://sagemaker-us-east-1-830861439844/mlruns/fffd1c803ef9476ba5dedf27fc1393e6/artifacts/model/' -p 6000 -i 'mlflow-pyfunc'
# mlflow sagemaker deploy -m 's3://sagemaker-us-east-1-830861439844/mlruns/fffd1c803ef9476ba5dedf27fc1393e6/artifacts/model/' -e 'arn:aws:iam::830861439844:role/service-role/AmazonSageMaker-ExecutionRole-20201215T000634' --region-name 'us-east-1' -a 'WineQuality-Lr'


In [18]:
import json
input_data = test[:1].drop(["quality"], axis=1).to_json(orient="split")
input_data

'{"columns":["fixed acidity","volatile acidity","citric acid","residual sugar","chlorides","free sulfur dioxide","total sulfur dioxide","density","pH","sulphates","alcohol"],"index":[792],"data":[[7.1,0.61,0.02,2.5,0.081,17.0,87.0,0.99745,3.48,0.6,9.7]]}'

In [None]:
import boto3
import json
import requests

app_name = 'WineQuality-Lr'
region = 'us-east-1'

port = 6000

# input_data = '{"columns":["alcohol", "chlorides", "citric acid", "density", "fixed acidity", "free sulfur dioxide", "pH", "residual sugar", "sulphates", "total sulfur dioxide", "volatile acidity"],"data":[[12.8, 0.029, 0.48, 0.98, 6.2, 29, 3.33, 1.2, 0.39, 75, 0.76]]}'
# input_data = json.loads(input_data)

endpoint = "http://localhost:{}/invocations".format(port)
headers = {"Content-type": "application/json; format=pandas-split"}
prediction = requests.post(endpoint, json=json.loads(input_data), headers=headers)
print(prediction.text)



In [None]:
from mlflow import sagemaker as mfs
vpc_config = {
                'SecurityGroupIds': [
                    'vpc-5e511a24',
                ],
                'Subnets': ['subnet-a99aa087', 
             'subnet-b64c73d1', 
             'subnet-900644ae',
             'subnet-695f9024',
             'subnet-67d8e03b',
             'subnet-7e12e370']
             }

mfs.deploy(app_name='WineQuality-Lr',
                        model_uri="s3://sagemaker-us-east-1-830861439844/mlruns/fffd1c803ef9476ba5dedf27fc1393e6/artifacts/model/",
                        execution_role_arn=role,
                        image_url='830861439844.dkr.ecr.us-east-1.amazonaws.com/mlflow-pyfunc:1.12.1',
                        region_name='us-east-1',
                        bucket='mlflow-sagemaker-us-east-1-830861439844',
                        mode=mlflow.sagemaker.DEPLOYMENT_MODE_CREATE,
                        instance_type='ml.t2.medium',
                        instance_count=1)
                        

In [None]:
mfs.run_local("s3://sagemaker-us-east-1-830861439844/mlruns/eb308d55589045f8b4c2b3e6620e5310/artifacts/model/", 
                           port=7000, image='mlflow-pyfunc', flavor=None)

## Invoke Remote Endpoint

In [19]:
import boto3

app_name = 'WineQuality-Lr'
region = 'us-east-1'

if __name__ == '__main__':
    sm = boto3.client('sagemaker', region_name=region)
    smrt = boto3.client('runtime.sagemaker', region_name=region)

    # Check endpoint status
    endpoint = sm.describe_endpoint(EndpointName=app_name)
    print("Endpoint status: ", endpoint["EndpointStatus"])
    
    prediction = smrt.invoke_endpoint(
        EndpointName=app_name,
        Body=input_data,
        ContentType='application/json; format=pandas-split'
    )
    prediction = prediction['Body'].read().decode("ascii")
    print(prediction)


Endpoint status:  InService
[5.290356327744414]
