# 1. Custom Vision Model Training

In [1]:
# Setup Custom Vision 
from azure.cognitiveservices.vision.customvision.training import CustomVisionTrainingClient

endpoint = "https://southcentralus.api.cognitive.microsoft.com"
training_key = "f944c5a7fd3c46ea9f8e8d201385d9cc"
project_name = "CatsVsDogs"
data_dir = 'data'

# Create trainer
trainer = CustomVisionTrainingClient(training_key, endpoint=endpoint)

# Find the object detection domain
obj_detection_domain = next(domain for domain in trainer.get_domains() 
    if domain.type == "ObjectDetection" and domain.name == "General (compact)")

# Create a new project
project = trainer.create_project(project_name, domain_id=obj_detection_domain.id)

In [2]:
from coco_pipe import *

input_tags = ['cat', 'dog']

# Get datastream of cats and dogs COCO images
stream = get_coco_stream(
    tags=input_tags, 
    ann_file='annotations/instances_val2017.json',
    data_dir=data_dir
) 

# Split datastream into training and validation
train, val = stream | mp.datasplit(split_value=0.3) | mp.make_train_test_split()

# Upload training stream to the Custom Vision service
coco_to_custom_vision(
    stream=train, 
    project_id=project.id, 
    trainer=trainer, 
    data_dir=data_dir
)

loading annotations into memory...
Done (t=0.62s)
creating index...
index created!
downloaded 0/1 images (t=0.0s)
downloaded 0/1 images (t=0.0s)
downloaded 0/1 images (t=0.0s)
downloaded 0/1 images (t=0.0s)
downloaded 0/1 images (t=0.0s)
downloaded 0/1 images (t=0.0s)
downloaded 0/1 images (t=0.0s)
downloaded 0/1 images (t=0.0s)
downloaded 0/1 images (t=0.0s)
downloaded 0/1 images (t=0.0s)
downloaded 0/1 images (t=0.0s)
downloaded 0/1 images (t=0.0s)
downloaded 0/1 images (t=0.0s)
downloaded 0/1 images (t=0.0s)
downloaded 0/1 images (t=0.0s)
downloaded 0/1 images (t=0.0s)
downloaded 0/1 images (t=0.0s)
downloaded 0/1 images (t=0.0s)
downloaded 0/1 images (t=0.0s)
downloaded 0/1 images (t=0.0s)
downloaded 0/1 images (t=0.0s)
downloaded 0/1 images (t=0.0s)
downloaded 0/1 images (t=0.0s)
downloaded 0/1 images (t=0.0s)
downloaded 0/1 images (t=0.0s)
downloaded 0/1 images (t=0.0s)
downloaded 0/1 images (t=0.0s)
downloaded 0/1 images (t=0.0s)
downloaded 0/1 images (t=0.0s)
downloaded 0/1 ima

In [3]:
import time

# Train the model
print ("Training...")
iteration = trainer.train_project(project.id)
while (iteration.status != "Completed"):
    iteration = trainer.get_iteration(project.id, iteration.id)
    print ("Training status: " + iteration.status)
    time.sleep(5)

# The iteration is now trained. Make it the default project endpoint
trainer.update_iteration(project.id, iteration.id, is_default=True)
print ("Done!")

Training...
Training status: Training
Training status: Training
Training status: Training
Training status: Training
Training status: Training
Training status: Training
Training status: Training
Training status: Training
Training status: Training
Training status: Training
Training status: Training
Training status: Training
Training status: Training
Training status: Training
Training status: Training
Training status: Training
Training status: Training
Training status: Training
Training status: Training
Training status: Training
Training status: Training
Training status: Training
Training status: Completed
Done!


# 2. Models Quantization

 
 ### 2.1 CoreML Quantization

In [4]:
# Compress CoreML model

import coremltools

# Option 1: convert a full precision (float) MLModel to a 16bit quantized MLModel
model_spec = coremltools.utils.load_spec('models/coreml/model.mlmodel')
model_fp16_spec = coremltools.utils.convert_neural_network_spec_weights_to_fp16(model_spec)
coremltools.utils.save_spec(model_fp16_spec, 'models/coreml/modelFP16.mlmodel')

# Option 2: convert a full precision (float) MLModel to a 8bit quantized MLModel
model = coremltools.models.MLModel('models/coreml/model.mlmodel')
model_fp8 = coremltools.models.neural_network.quantization_utils.quantize_weights(model, nbits=8)
model_fp8.save('models/coreml/modelFP8.mlmodel')

Quantizing using linear quantization
Optimizing Neural Network before Quantization:
Finished optimizing network. Quantizing neural network..
Quantizing layer layer1/conv
Quantizing layer layer2/conv
Quantizing layer layer3/conv
Quantizing layer layer4/conv
Quantizing layer layer5/conv
Quantizing layer layer6/conv
Quantizing layer layer7/conv
Quantizing layer layer8/conv
Quantizing layer model_outputs0


In [5]:
# Setup CoreML models

from models.coreml.python.predict import *

coreml_16 = coremltools.models.MLModel('models/coreml/modelFP16.mlmodel')
coreml_8 = coremltools.models.MLModel('models/coreml/modelFP8.mlmodel')
coreml_orig = coremltools.models.MLModel('models/coreml/model.mlmodel')

with open('models/coreml/labels.txt', 'r') as f:
    labels = [l.strip() for l in f.readlines()]

od_coreml_16 = CoreMLObjectDetection(coreml_16, labels)
od_coreml_8 = CoreMLObjectDetection(coreml_8, labels)
od_coreml_orig = CoreMLObjectDetection(coreml_orig, labels)

### 2.2 Tensorflow Quantization
  
 ❗️ **Note:** Don't forget to copy ```models.tflite.python.predict_lite``` file while doing experiments on your own model. This file is not uploaded with the exported models.

In [6]:
# Compress TensorFlow model
import tensorflow as tf

converter = tf.contrib.lite.TFLiteConverter.from_frozen_graph('models/tflite/model.pb', ['Placeholder'], ['model_outputs'])
converter.post_training_quantize = True
tflite_model_8 = converter.convert()
open("models/tflite/modelFP8.tflite", "wb").write(tflite_model_8)

11047480

In [7]:
# Setup TensorFlow Original Model

from models.tflite.python.predict import TFObjectDetection as OrigTFObjectDetection
from models.tflite.python.predict_lite import TFObjectDetection as LiteTFObjectDetection

graph_orig = tf.GraphDef()
with tf.gfile.FastGFile('models/tflite/model.pb', 'rb') as f:
    graph_orig.ParseFromString(f.read())
        
# Load labels
with open('models/tflite/labels.txt', 'r') as f:
    labels = [l.strip() for l in f.readlines()]

od_tflite_orig = OrigTFObjectDetection(graph_orig, labels)

od_tflite_8 = LiteTFObjectDetection('models/tflite/modelFP8.tflite', labels)

Instructions for updating:
Use tf.gfile.GFile.


# 3. Models Validation

In [8]:
from azure.cognitiveservices.vision.customvision.prediction import CustomVisionPredictionClient

prediction_key = 'd1c0878bf03a4d4b99fed25385154be7'
predictor = CustomVisionPredictionClient(prediction_key, endpoint=endpoint)

In [9]:
from utils import *

val = (
    val
    # Predict using online CustomVision
    | mp.apply('filename', 'raw_cv', 
        lambda x: predictor.predict_image(project.id, open(join(data_dir, x), mode="rb"), iteration.id).predictions)
    | mp.apply(['raw_cv', 'width', 'height'], 'cv_predictions', lambda x: x[0]
        | mp.select(lambda p: cv_prediction_as_dict(p, x[1], x[2]))
        | mp.as_list
      )    
    # Predict using CoreML original
    | apply_quantized_model(data_dir, od_coreml_orig, 'coreml_orig_predictions') 
    # Predict using CoreML compressed to 16FP
    | apply_quantized_model(data_dir, od_coreml_16, 'coreml_16_predictions') 
    # Predict using CoreML compressed to 8FP
    | apply_quantized_model(data_dir, od_coreml_8, 'coreml_8_predictions')  
    # Predict using TensorFlow original
    | apply_quantized_model(data_dir, od_tflite_orig, 'tf_orig_predictions') 
    # Predict using TensorFlow compressed to 8FP
    | apply_quantized_model(data_dir, od_tflite_8, 'tf_8_predictions') 
    | mp.delfield(['raw_cv'])
    | mp.as_list
)

In [10]:
val[0]

{'width': 500,
 'height': 375,
 'url': 'http://images.cocodataset.org/val2017/000000119828.jpg',
 'filename': '000000119828.jpg',
 'ground_truth': [{'x1': 118.55,
   'y1': 146.77,
   'width': 320.16,
   'height': 157.26,
   'tag': 'cat'}],
 'class_id': 'cat',
 'split': <SplitType.Test: 3>,
 'cv_predictions': [{'tag': 'cat',
   'prob': 0.0154298963,
   'x1': 491.55035000000004,
   'y1': 160.31925,
   'width': 8.44463705,
   'height': 82.2137295},
  {'tag': 'cat',
   'prob': 0.0103507452,
   'x1': 246.3790775,
   'y1': 224.40885000000003,
   'width': 8.959055000000001,
   'height': 11.042989799999999},
  {'tag': 'cat',
   'prob': 0.01881028,
   'x1': 228.6952885,
   'y1': 0.0,
   'width': 36.308035000000004,
   'height': 46.510851},
  {'tag': 'cat',
   'prob': 0.010007699,
   'x1': 426.09215,
   'y1': 151.133839125,
   'width': 66.68370949999999,
   'height': 44.310171},
  {'tag': 'cat',
   'prob': 0.028760666,
   'x1': 0.0,
   'y1': 163.362723,
   'width': 22.250033900000002,
   'height

In [12]:
import pickle

pickle.dump(val, open('results.pickle', 'wb'))
# stream = pickle.load(open('results.pickle', 'rb'))

In [15]:
# Print reports

for pred_field in [key for key in val[0].keys() if 'predictions' in key]:
    print("\n===== %s =====\n" % pred_field)
    print_report(
        stream=val, 
        input_tags=input_tags, 
        prob_thresh=0.4, 
        overlap_thresh=0.3,
        pred_field=pred_field, 
        gt_field='ground_truth'
    )


===== cv_predictions =====

Tag             |  Precision   |    Recall   
---------------------------------------------
cat             |   0.81818    |   0.13636   
dog             |   0.49333    |   0.47436   

===== coreml_orig_predictions =====

Tag             |  Precision   |    Recall   
---------------------------------------------
cat             |     1.0      |   0.060606  
dog             |   0.55769    |   0.37179   

===== coreml_16_predictions =====

Tag             |  Precision   |    Recall   
---------------------------------------------
cat             |     1.0      |   0.060606  
dog             |   0.55769    |   0.37179   

===== coreml_8_predictions =====

Tag             |  Precision   |    Recall   
---------------------------------------------
cat             |   0.30769    |   0.12121   
dog             |   0.42647    |   0.37179   

===== tf_orig_predictions =====

Tag             |  Precision   |    Recall   
---------------------------------------------
