# Set up data scientist

In [1]:
%load_ext autoreload
%autoreload 2
import os
import cifar_utils
import clipper_manager as cl
import pandas as pd
import numpy as np
from sklearn import linear_model as lm
from sklearn import svm
import tensorflow as tf
from skimage import io
import matplotlib as mpl
mpl.rc("savefig", dpi=200)
import matplotlib.pyplot as plt
%matplotlib inline


from traitlets.config.manager import BaseJSONConfigManager
cfg_path = "/Users/crankshaw/anaconda2/envs/clipper/etc/jupyter/nbconfig"
cm = BaseJSONConfigManager(config_dir=cfg_path)
cm.update('livereveal', {
              'theme': 'simple',
              'transition': 'none',
              'start_slideshow_at': 'selected',
            'width': 1024,
              'height': 768,
})

# set up ec2 instance
import clipper_manager as cl
ec2_host = "ec2-54-67-123-237.us-west-1.compute.amazonaws.com"
user = "ubuntu"
key = os.path.expanduser("~/.ssh/aws_rsa")
clipper = cl.Cluster(ec2_host, user, key)
# clipper.cleanup()
# clipper.pull_docker_images()


# Load CIFAR-10 data and train/load models
cifar_path = os.path.expanduser("~/model-serving/data/cifar")
train_x, train_y = cifar_utils.filter_data(
    *cifar_utils.load_cifar(cifar_path, cifar_filename="cifar_train.data", norm=True))
test_x, test_y = cifar_utils.filter_data(
    *cifar_utils.load_cifar(cifar_path, cifar_filename="cifar_test.data", norm=True))
# no_norm_x, no_norm_y = cifar_utils.filter_data(
#     *cifar_utils.load_cifar(cifar_path, cifar_filename="cifar_test.data", norm=False))

# Train Scikit-Learn Logistic Regression model
def train_model(m, train_x, train_y):
    m.fit(train_x, train_y)
    return m
lin_model = train_model(lm.LogisticRegression(), train_x, train_y)

# Load pre-trained TF model
tf_cifar_model_path = os.path.expanduser(
    "~/clipper-cpp/examples/cifar_demo/tf_cifar_model/cifar10_model_full")
tf_session = tf.Session('', tf.Graph())
with tf_session.graph.as_default():
    saver = tf.train.import_meta_graph("%s.meta" % tf_cifar_model_path)
    saver.restore(tf_session, tf_cifar_model_path)

def tensorflow_score(session, test_x, test_y):
    """
    NOTE: This predict method expects pre-whitened (normalized) images
    """
    logits = session.run('softmax_logits:0',
                           feed_dict={'x:0': test_x})
    relevant_activations = logits[:, [cifar_utils.negative_class, cifar_utils.positive_class]]
    preds = np.argmax(relevant_activations, axis=1)
    preds[preds == 0] = -1.0
    return float(np.sum(preds == test_y)) / float(len(test_y))


Checking if Docker is running...
Number of image files: 50000
Number of image files: 10000


<h1 align="center">DEMO</h1> 

In [None]:
cifar_utils.show_example_images(no_norm_x, no_norm_y, 2)

<h2 align="center">Is it a bird? Is it a plane? It's probably not Superman...</h2> 
![Alt](superman.png#center)

In [None]:
import clipper_manager as cl
import os
ec2_host = "ec2-54-67-123-237.us-west-1.compute.amazonaws.com"
user = "ubuntu"
key = os.path.expanduser("~/.ssh/aws_rsa")
clipper = cl.Cluster(ec2_host, user, key)

clipper.start_clipper()

__TODO: Move all application stuff to frontend dev notebook__

In [None]:
clipper.list_apps()

In [None]:
app_name = "superman_detection"
candidate_models = [
    {"model_name": "sklearn_cifar", "model_version": 1},
    {"model_name": "tf_cifar", "model_version": 1},
]

clipper.register_application(
    name=app_name,
    candidate_models=candidate_models,
    input_type="doubles",
    slo_micros=20000)

In [None]:
clipper.list_apps()

# Start a serving workload

# First model: Logistic Regression...

In [2]:
print(type(lin_model))
print("Logistic Regression test score: %f" %
      lin_model.score(test_x, test_y))

<class 'sklearn.linear_model.logistic.LogisticRegression'>
Logistic Regression test score: 0.743500


# Deploy to Clipper

In [3]:
import clipper_manager as cl
ec2_host = "ec2-54-67-123-237.us-west-1.compute.amazonaws.com"
user = "ubuntu"
key = os.path.expanduser("~/.ssh/aws_rsa")
clipper = cl.Cluster(ec2_host, user, key)

model_added = clipper.deploy_model(
    name="sklearn_cifar",
    version=1,
    model_data=lin_model,
    container_name="clipper/sklearn_cifar_container:test",
    labels=["cifar", "sklearn"],
    input_type="doubles",
    num_containers=1
)
print("Model deploy successful? {success}".format(success=model_added))

Checking if Docker is running...
Found clipper/sklearn_cifar_container:test on host
Copied model data to host
Published model to Clipper
Model deploy successful? True


# Check application accuracy

## Deploy TensorFlow Model to Clipper

In [4]:
print(type(tf_session))
print("TensorFlow CNN test score: %f" % tensorflow_score(tf_session, test_x, test_y))

<class 'tensorflow.python.client.session.Session'>
TensorFlow CNN test score: 0.880000


In [5]:
model_added = clipper.deploy_model(
    name="tf_cifar",
    version=1,
    model_data="/Users/crankshaw/clipper-cpp/examples/cifar_demo/tf_cifar_model",
    container_name="clipper/tf_cifar_container:test",
    labels=["cifar", "tf"],
    input_type="doubles",
    num_containers=1
)
print("Model deploy successful? {success}".format(success=model_added))

Found clipper/tf_cifar_container:test on host
Copied model data to host
Published model to Clipper
Model deploy successful? True


# Check application accuracy again

Check accuracy of serving workload

## Collect feedback for multi-armed bandit model

In [6]:
app_name = "superman_detection"
weights = clipper.get_bandit_weights(app_name=app_name, uid=0)

sklearn_cifar: 0.500000
tf_cifar: 0.500000


In [7]:
cifar_utils.enable_feedback(ec2_host, app_name, test_x, test_y, 70)

In [8]:
weights = clipper.get_bandit_weights(app_name=app_name, uid=0)

sklearn_cifar: 0.230760
tf_cifar: 0.769240


# Check application accuracy again

So what we've just seen is...

+ Easy for developer to create and query application
+ Easy for data scientist to improve application accuracy while using heterogeneous models
+ Interact with them completely independently

# Clean Up

In [None]:
# clipper.stop_all()
# clipper.pull_docker_images()
clipper.cleanup()