# Wine Quality

In [1]:
import os
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.metrics import mean_squared_error, mean_absolute_error, r2_score
from sklearn.linear_model import ElasticNet
import mlflow
import mlflow.sklearn

In [2]:
#mlflow.set_tracking_uri("sqlite:///wine-quality.sqlite")

In [3]:
mlflow.get_tracking_uri()

'http://ec2-34-193-144-194.compute-1.amazonaws.com:5000'

In [4]:
# Loading data from a CSV file
df_wine = pd.read_csv('./wine-quality.csv')

# Separating the target class ('quality') from remainder of the training data
X = df_wine.drop(columns = 'quality')

y = df_wine[['quality']]

# Splitting the data into training and validation sets
X_train, X_val, y_train, y_val = train_test_split(X, y, random_state = 42)

## Model Training and Logging

In [5]:
# Fix Conda Python version used by Seldon Core MLflow in Kubernetes
conda_env = mlflow.sklearn.get_default_conda_env()

for i, dep in enumerate(conda_env['dependencies']):
    if type(dep) == str and dep.startswith("python="):
        conda_env['dependencies'][i] = "python"

conda_env

{'name': 'mlflow-env',
 'channels': ['defaults'],
 'dependencies': ['python', 'scikit-learn=0.23.1', {'pip': ['mlflow']}]}

In [6]:
# Defining model parameters
for name, alpha, l1_ratio in [("wine-model-a", 0.5, 0.5), ("wine-model-b", 1.0, 0.5)]:
    # Running MLFlow script
    with mlflow.start_run():

        # Instantiating model with model parameters
        model = ElasticNet(alpha = alpha, l1_ratio = l1_ratio)

        # Fitting training data to the model
        model.fit(X_train, y_train)

        # Running prediction on validation dataset
        preds = model.predict(X_val)

        # Getting metrics on the validation dataset
        rmse = mean_squared_error(preds, y_val)
        abs_error = mean_absolute_error(preds, y_val)
        r2 = r2_score(preds, y_val)

        # Logging params and metrics to MLFlow
        mlflow.log_param('alpha', alpha)
        mlflow.log_param('l1_ratio', l1_ratio)
        mlflow.log_metric('rmse', rmse)
        mlflow.log_metric('abs_error', abs_error)
        mlflow.log_metric('r2', r2)

        # Logging model to MLFlow. Requires MLflow (Server) with DB backend
        mlflow.sklearn.log_model(
            sk_model = model,
            artifact_path = "",
            registered_model_name = name,
            conda_env = conda_env
        )

Registered model 'wine-model-a' already exists. Creating a new version of this model...
Created version '4' of model 'wine-model-a'.
Registered model 'wine-model-b' already exists. Creating a new version of this model...
Created version '4' of model 'wine-model-b'.


In [7]:
df = mlflow.search_runs(filter_string="metrics.rmse < 1")

In [8]:
df

Unnamed: 0,run_id,experiment_id,status,artifact_uri,start_time,end_time,metrics.abs_error,metrics.rmse,metrics.r2,params.l1_ratio,params.alpha,tags.mlflow.source.type,tags.mlflow.user,tags.mlflow.log-model.history,tags.mlflow.source.name
0,0ad40a05a5b841b7af3578fe715194a1,0,FINISHED,s3://mlflow/0/0ad40a05a5b841b7af3578fe715194a1...,2021-02-10 23:24:09.917000+00:00,2021-02-10 23:24:12.288000+00:00,0.654921,0.735682,-25.157627,0.5,1.0,LOCAL,root,"[{""run_id"": ""0ad40a05a5b841b7af3578fe715194a1""...",/opt/conda/envs/rapids/lib/python3.7/site-pack...
1,37bc47bfe3974f5eb2dbe287fa0048be,0,FINISHED,s3://mlflow/0/37bc47bfe3974f5eb2dbe287fa0048be...,2021-02-10 23:24:07.137000+00:00,2021-02-10 23:24:09.747000+00:00,0.633893,0.674361,-13.469377,0.5,0.5,LOCAL,root,"[{""run_id"": ""37bc47bfe3974f5eb2dbe287fa0048be""...",/opt/conda/envs/rapids/lib/python3.7/site-pack...
2,98c431b4ffec4ede858e00b9f520396a,0,FINISHED,s3://mlflow/0/98c431b4ffec4ede858e00b9f520396a...,2021-02-10 22:56:13.257000+00:00,2021-02-10 22:56:15.609000+00:00,0.654921,0.735682,-25.157627,0.5,1.0,LOCAL,root,"[{""run_id"": ""98c431b4ffec4ede858e00b9f520396a""...",/opt/conda/envs/rapids/lib/python3.7/site-pack...
3,4a8e6595f58645b5891f8fd98346b958,0,FINISHED,s3://mlflow/0/4a8e6595f58645b5891f8fd98346b958...,2021-02-10 22:56:10.479000+00:00,2021-02-10 22:56:13.086000+00:00,0.633893,0.674361,-13.469377,0.5,0.5,LOCAL,root,"[{""run_id"": ""4a8e6595f58645b5891f8fd98346b958""...",/opt/conda/envs/rapids/lib/python3.7/site-pack...
4,d01d284fd9974a97b8780bc9d6457b71,0,FINISHED,s3://mlflow/0/d01d284fd9974a97b8780bc9d6457b71...,2021-02-10 22:54:18.142000+00:00,2021-02-10 22:54:20.612000+00:00,0.654921,0.735682,-25.157627,0.5,1.0,LOCAL,root,"[{""run_id"": ""d01d284fd9974a97b8780bc9d6457b71""...",/opt/conda/envs/rapids/lib/python3.7/site-pack...
5,831b02af0cbf43baa59ae71e846f009e,0,FINISHED,s3://mlflow/0/831b02af0cbf43baa59ae71e846f009e...,2021-02-10 22:54:15.459000+00:00,2021-02-10 22:54:17.972000+00:00,0.633893,0.674361,-13.469377,0.5,0.5,LOCAL,root,"[{""run_id"": ""831b02af0cbf43baa59ae71e846f009e""...",/opt/conda/envs/rapids/lib/python3.7/site-pack...
6,d772a559568f4db7bab58ec8bd118026,0,FINISHED,s3://mlflow/0/d772a559568f4db7bab58ec8bd118026...,2021-02-10 22:15:10.995000+00:00,2021-02-10 22:15:14.155000+00:00,0.654921,0.735682,-25.157627,0.5,1.0,LOCAL,root,"[{""run_id"": ""d772a559568f4db7bab58ec8bd118026""...",/opt/conda/envs/rapids/lib/python3.7/site-pack...
7,bbb6865cbf854ba48d54e85a2fedf700,0,FINISHED,s3://mlflow/0/bbb6865cbf854ba48d54e85a2fedf700...,2021-02-10 22:15:05.434000+00:00,2021-02-10 22:15:10.805000+00:00,0.633893,0.674361,-13.469377,0.5,0.5,LOCAL,root,"[{""run_id"": ""bbb6865cbf854ba48d54e85a2fedf700""...",/opt/conda/envs/rapids/lib/python3.7/site-pack...


In [9]:
#Fetching Run ID for best model
# run_id = df.loc[df['metrics.rmse'].idxmin()]['run_id']
#best_model_url = df.loc[df['metrics.rmse'].idxmin()]["artifact_uri"]
# Load model as a PyFuncModel.
#best_model = mlflow.pyfunc.load_model(best_model_url)
#y_pred = best_model.predict(X_val)
#print(y_pred[0:10])

# Kubernetes

Deploy AB model via Seldon Core to a local Kubernetes cluster. Generate some sample traffic. Then remove AB models and all its pods and services from the cluster

## Deploy AB Model from MLflow to Seldon Core

In [10]:
from mlflow.tracking import MlflowClient

client = MlflowClient()

model_a_version = client.get_latest_versions("wine-model-a", stages=["None"])[0].version
model_b_version = client.get_latest_versions("wine-model-b", stages=["None"])[0].version

In [11]:
import seldon_core_utils

In [15]:
seldon_core_utils.ab_deployment(
    name="ab",
    namespace="default",
    secret_name="mlflow-seldon-secret",
    model_a_name="wine-model-a",
    model_a_version=model_a_version,
    model_b_name="wine-model-b",
    model_b_version=model_b_version,
    model_a_traffic=50,
)



<Response [409]>

## Generate Traffic to AB Model

Wait for the model to be fully deployed, then execute the below statements!

In [16]:
import requests
import time

headers = {"Content-Type": "application/json"}
body = {"data":{"ndarray":[[7,0.27,0.36,20.7,0.045,45,170,1.001,3,0.45,8.8]]}}

for i in range(1, 101):
    if i % 10 == 0:
        print(f"Send {i} requests")
    requests.post(
        url="http://ab-wine-model-a.default.svc.cluster.local:8000/api/v1.0/predictions",
        json=body,
        headers=headers,
        verify=False,
        timeout=30
    )
    time.sleep(0.1)

Send 10 requests
Send 20 requests
Send 30 requests
Send 40 requests
Send 50 requests
Send 60 requests
Send 70 requests
Send 80 requests
Send 90 requests
Send 100 requests


## Delete AB Model from Seldon Core

In [None]:
seldon_core_utils.ab_undeployment(name="ab", namespace="default")