In [34]:
import tensorflow as tf
from tensorflow.python.tools import strip_unused_lib
from tensorflow.python.framework import dtypes
from tensorflow.python.platform import gfile
import numpy as np
import tfcoreml
import coremltools
from coremltools.models import datatypes
from coremltools.models import neural_network
from coremltools.models.pipeline import *

### Converting from TensorFlow

First, load the SavedModel into a new TensorFlow graph object:

In [35]:
def load_saved_model(path):
    the_graph = tf.Graph()
    with tf.Session(graph=the_graph) as sess:
        tf.saved_model.loader.load(sess, 
                [tf.saved_model.tag_constants.SERVING], path)
    return the_graph

In [36]:
saved_model_path = ""
the_graph = load_saved_model(saved_model_path)

INFO:tensorflow:Saver not created because there are no variables in the graph to restore
INFO:tensorflow:The specified SavedModel has no variables; no checkpoints were restored.


Next, we’ll use a helper function to strip away unused subgraphs and save the result as another frozen model:

In [37]:
frozen_model_file = "frozen_model.pb"       
input_node = "Preprocessor/sub"
bbox_output_node = "concat"
class_output_node = "Postprocessor/convert_scores"

In [38]:
def optimize_graph(graph):
    gdef = strip_unused_lib.strip_unused(
            input_graph_def = graph.as_graph_def(),
            input_node_names = [input_node],
            output_node_names = [bbox_output_node, class_output_node],
            placeholder_type_enum = dtypes.float32.as_datatype_enum)

    with gfile.GFile(frozen_model_file, "wb") as f:
        f.write(gdef.SerializeToString())

In [39]:
optimize_graph(the_graph)

Instructions for updating:
Use tf.compat.v1.graph_util.extract_sub_graph


Now we’ve got something that tfcoreml will be happy with. To convert the frozen TensorFlow graph to Core ML, do the following:

In [40]:
coreml_model_path = "MobileNetV2_SSDLite.mlmodel"

input_width = 300
input_height = 300

input_tensor = input_node + ":0"
bbox_output_tensor = bbox_output_node + ":0"
class_output_tensor = class_output_node + ":0"

ssd_model = tfcoreml.convert(
    tf_model_path=frozen_model_file,
    mlmodel_path=coreml_model_path,
    input_name_shape_dict={ input_tensor: [1, input_height, input_width, 3] },
    image_input_names=input_tensor,
    output_feature_names=[bbox_output_tensor, class_output_tensor],
    is_bgr=False,
    red_bias=-1.0,
    green_bias=-1.0,
    blue_bias=-1.0,
    image_scale=2./255)


Loading the TF graph...
Graph Loaded.
Collecting all the 'Const' ops from the graph, by running it....
Done.
Now finding ops in the TF graph that can be dropped for inference
Now starting translation to CoreML graph.
Automatic shape interpretation succeeded for input blob Preprocessor/sub:0
1/1260: Analysing op name: Postprocessor/scale_logits/y ( type:  Const )
2/1260: Analysing op name: concat_1/axis ( type:  Const )
3/1260: Analysing op name: concat/axis ( type:  Const )
4/1260: Analysing op name: BoxPredictor_5/Reshape_1/shape/2 ( type:  Const )
5/1260: Analysing op name: BoxPredictor_5/Reshape_1/shape/1 ( type:  Const )
6/1260: Analysing op name: BoxPredictor_5/strided_slice_1/stack_2 ( type:  Const )
7/1260: Analysing op name: BoxPredictor_5/strided_slice_1/stack_1 ( type:  Const )
8/1260: Analysing op name: BoxPredictor_5/strided_slice_1/stack ( type:  Const )
9/1260: Analysing op name: BoxPredictor_5/ClassPredictor/biases ( type:  Const )
10/1260: Analysing op name: BoxPredict

979/1260: Analysing op name: FeatureExtractor/MobilenetV2/expanded_conv_7/depthwise/BatchNorm/FusedBatchNorm ( type:  FusedBatchNorm )
980/1260: Analysing op name: FeatureExtractor/MobilenetV2/expanded_conv_7/depthwise/Relu6 ( type:  Relu6 )
981/1260: Analysing op name: FeatureExtractor/MobilenetV2/expanded_conv_7/depthwise_output ( type:  Identity )
982/1260: Analysing op name: FeatureExtractor/MobilenetV2/expanded_conv_7/project/Conv2D ( type:  Conv2D )
983/1260: Analysing op name: FeatureExtractor/MobilenetV2/expanded_conv_7/project/BatchNorm/FusedBatchNorm ( type:  FusedBatchNorm )
984/1260: Analysing op name: FeatureExtractor/MobilenetV2/expanded_conv_7/project/Identity ( type:  Identity )
985/1260: Analysing op name: FeatureExtractor/MobilenetV2/expanded_conv_7/add ( type:  Add )
986/1260: Analysing op name: FeatureExtractor/MobilenetV2/expanded_conv_7/output ( type:  Identity )
987/1260: Analysing op name: FeatureExtractor/MobilenetV2/expanded_conv_8/input ( type:  Identity )
98

1073/1260: Analysing op name: BoxPredictor_0/BoxEncodingPredictor_depthwise/depthwise ( type:  DepthwiseConv2dNative )
1074/1260: Analysing op name: BoxPredictor_0/BoxEncodingPredictor_depthwise/BatchNorm/FusedBatchNorm ( type:  FusedBatchNorm )
1075/1260: Analysing op name: BoxPredictor_0/BoxEncodingPredictor_depthwise/Relu6 ( type:  Relu6 )
1076/1260: Analysing op name: BoxPredictor_0/BoxEncodingPredictor/Conv2D ( type:  Conv2D )
1077/1260: Analysing op name: BoxPredictor_0/BoxEncodingPredictor/BiasAdd ( type:  BiasAdd )
1078/1260: Analysing op name: BoxPredictor_0/Reshape ( type:  Reshape )
1079/1260: Analysing op name: FeatureExtractor/MobilenetV2/expanded_conv_13/depthwise/depthwise ( type:  DepthwiseConv2dNative )
1080/1260: Analysing op name: FeatureExtractor/MobilenetV2/expanded_conv_13/depthwise/BatchNorm/FusedBatchNorm ( type:  FusedBatchNorm )
1081/1260: Analysing op name: FeatureExtractor/MobilenetV2/expanded_conv_13/depthwise/Relu6 ( type:  Relu6 )
1082/1260: Analysing op 

1161/1260: Analysing op name: BoxPredictor_2/ClassPredictor_depthwise/depthwise ( type:  DepthwiseConv2dNative )
1162/1260: Analysing op name: BoxPredictor_2/ClassPredictor_depthwise/BatchNorm/FusedBatchNorm ( type:  FusedBatchNorm )
1163/1260: Analysing op name: BoxPredictor_2/ClassPredictor_depthwise/Relu6 ( type:  Relu6 )
1164/1260: Analysing op name: BoxPredictor_2/ClassPredictor/Conv2D ( type:  Conv2D )
1165/1260: Analysing op name: BoxPredictor_2/ClassPredictor/BiasAdd ( type:  BiasAdd )
1166/1260: Analysing op name: BoxPredictor_2/Reshape_1 ( type:  Reshape )
1167/1260: Analysing op name: BoxPredictor_2/Shape ( type:  Shape )
1168/1260: Analysing op name: BoxPredictor_2/strided_slice ( type:  StridedSlice )
1169/1260: Analysing op name: BoxPredictor_2/Reshape/shape ( type:  Pack )
1170/1260: Analysing op name: BoxPredictor_2/BoxEncodingPredictor_depthwise/depthwise ( type:  DepthwiseConv2dNative )
1171/1260: Analysing op name: BoxPredictor_2/BoxEncodingPredictor_depthwise/BatchN

1246/1260: Analysing op name: BoxPredictor_5/ClassPredictor/BiasAdd ( type:  BiasAdd )
1247/1260: Analysing op name: BoxPredictor_5/Reshape_1 ( type:  Reshape )
1248/1260: Analysing op name: concat_1 ( type:  ConcatV2 )
1249/1260: Analysing op name: Postprocessor/scale_logits ( type:  RealDiv )
1250/1260: Analysing op name: Postprocessor/convert_scores ( type:  Sigmoid )
1251/1260: Analysing op name: BoxPredictor_5/Shape ( type:  Shape )
1252/1260: Analysing op name: BoxPredictor_5/strided_slice ( type:  StridedSlice )
1253/1260: Analysing op name: BoxPredictor_5/Reshape/shape ( type:  Pack )
1254/1260: Analysing op name: BoxPredictor_5/BoxEncodingPredictor_depthwise/depthwise ( type:  DepthwiseConv2dNative )
1255/1260: Analysing op name: BoxPredictor_5/BoxEncodingPredictor_depthwise/BatchNorm/FusedBatchNorm ( type:  FusedBatchNorm )
1256/1260: Analysing op name: BoxPredictor_5/BoxEncodingPredictor_depthwise/Relu6 ( type:  Relu6 )
1257/1260: Analysing op name: BoxPredictor_5/BoxEncodin

### Cleaning it up

Let’s rename the input to "image" and the two outputs to "scores" and "boxes", respectively. This requires using the model’s spec object:

In [41]:
spec = ssd_model.get_spec()

spec.description.input[0].name = "image"
spec.description.input[0].shortDescription = "Input image"
spec.description.output[0].name = "scores"
spec.description.output[0].shortDescription = "Predicted class scores for each bounding box"
spec.description.output[1].name = "boxes"
spec.description.output[1].shortDescription = "Predicted coordinates for each bounding box"

It’s not enough to change these names in the spec.description. Any layers that are connected to the old input or output names must now use the new names too. Likewise for the object that handles the image preprocessing.

In [42]:
input_mlmodel = input_tensor.replace(":", "__").replace("/", "__")
class_output_mlmodel = class_output_tensor.replace(":", "__").replace("/", "__")
bbox_output_mlmodel = bbox_output_tensor.replace(":", "__").replace("/", "__")

for i in range(len(spec.neuralNetwork.layers)):
    if spec.neuralNetwork.layers[i].input[0] == input_mlmodel:
        spec.neuralNetwork.layers[i].input[0] = "image"
    if spec.neuralNetwork.layers[i].output[0] == class_output_mlmodel:
        spec.neuralNetwork.layers[i].output[0] = "scores"
    if spec.neuralNetwork.layers[i].output[0] == bbox_output_mlmodel:
        spec.neuralNetwork.layers[i].output[0] = "boxes"

spec.neuralNetwork.preprocessing[0].featureName = "image"

If we look at the outputs using print(spec.description), the "scores" output correctly shows up as a multi-array but its shape is not filled in:

In [43]:
num_classes = 9
num_anchors = 1917
spec.description.output[0].type.multiArrayType.shape.append(num_classes + 1)
spec.description.output[0].type.multiArrayType.shape.append(num_anchors)

In [44]:
spec = coremltools.utils.convert_neural_network_spec_weights_to_fp16(spec)
ssd_model = coremltools.models.MLModel(spec)
ssd_model.save(coreml_model_path)

### Decoding the bounding box predictions

In [7]:
input_width = 300
input_height = 300
num_classes = 9
num_anchors = 1917

def get_anchors(graph, tensor_name):
    image_tensor = graph.get_tensor_by_name("image_tensor:0")
    box_corners_tensor = graph.get_tensor_by_name(tensor_name)
    box_corners = sess.run(box_corners_tensor, feed_dict={
        image_tensor: np.zeros((1, input_height, input_width, 3))})

    ymin, xmin, ymax, xmax = np.transpose(box_corners)
    width = xmax - xmin
    height = ymax - ymin
    ycenter = ymin + height / 2.
    xcenter = xmin + width / 2.
    return np.stack([ycenter, xcenter, height, width])

anchors_tensor = "Concatenate/concat:0"
with the_graph.as_default():
    with tf.Session(graph=the_graph) as sess:
        anchors = get_anchors(the_graph, anchors_tensor)

### Decoding inside the Core ML model

In [8]:
input_features = [
    ("scores", datatypes.Array(num_classes + 1, num_anchors, 1)),
    ("boxes", datatypes.Array(4, num_anchors, 1))
]

output_features = [
    ("raw_confidence", datatypes.Array(num_anchors, num_classes)),
    ("raw_coordinates", datatypes.Array(num_anchors, 4))
]

builder = neural_network.NeuralNetworkBuilder(input_features, output_features)

In [9]:
builder.add_permute(name="permute_scores",
                    dim=(0, 3, 2, 1),
                    input_name="scores",
                    output_name="permute_scores_output")

In [10]:
builder.add_slice(name="slice_scores",
                  input_name="permute_scores_output",
                  output_name="raw_confidence",
                  axis="width",
                  start_index=1,
                  end_index=num_classes + 1)

In [11]:
builder.add_slice(name="slice_yx",
                  input_name="boxes",
                  output_name="slice_yx_output",
                  axis="channel",
                  start_index=0,
                  end_index=2)

In [12]:
builder.add_elementwise(name="scale_yx",
                        input_names="slice_yx_output",
                        output_name="scale_yx_output",
                        mode="MULTIPLY",
                        alpha=0.1)

In [13]:
anchors_yx = np.expand_dims(anchors[:2, :], axis=-1)
anchors_hw = np.expand_dims(anchors[2:, :], axis=-1)
builder.add_load_constant(name="anchors_yx",
                          output_name="anchors_yx",
                          constant_value=anchors_yx,
                          shape=[2, num_anchors, 1])

builder.add_load_constant(name="anchors_hw",
                          output_name="anchors_hw",
                          constant_value=anchors_hw,
                          shape=[2, num_anchors, 1])

In [14]:
builder.add_elementwise(name="yw_times_hw",
                        input_names=["scale_yx_output", "anchors_hw"],
                        output_name="yw_times_hw_output",
                        mode="MULTIPLY")

builder.add_elementwise(name="decoded_yx",
                        input_names=["yw_times_hw_output", "anchors_yx"],
                        output_name="decoded_yx_output",
                        mode="ADD")

In [15]:
builder.add_slice(name="slice_hw",
                  input_name="boxes",
                  output_name="slice_hw_output",
                  axis="channel",
                  start_index=2,
                  end_index=4)

builder.add_elementwise(name="scale_hw",
                        input_names="slice_hw_output",
                        output_name="scale_hw_output",
                        mode="MULTIPLY",
                        alpha=0.2)

In [16]:
builder.add_unary(name="exp_hw",
                  input_name="scale_hw_output",
                  output_name="exp_hw_output",
                  mode="exp")

builder.add_elementwise(name="decoded_hw",
                        input_names=["exp_hw_output", "anchors_hw"],
                        output_name="decoded_hw_output",
                        mode="MULTIPLY")

In [17]:
builder.add_slice(name="slice_y",
                  input_name="decoded_yx_output",
                  output_name="slice_y_output",
                  axis="channel",
                  start_index=0,
                  end_index=1)

builder.add_slice(name="slice_x",
                  input_name="decoded_yx_output",
                  output_name="slice_x_output",
                  axis="channel",
                  start_index=1,
                  end_index=2)

builder.add_slice(name="slice_h",
                  input_name="decoded_hw_output",
                  output_name="slice_h_output",
                  axis="channel",
                  start_index=0,
                  end_index=1)

builder.add_slice(name="slice_w",
                  input_name="decoded_hw_output",
                  output_name="slice_w_output",
                  axis="channel",
                  start_index=1,
                  end_index=2)

builder.add_elementwise(name="concat",
                        input_names=["slice_x_output", "slice_y_output", 
                                     "slice_w_output", "slice_h_output"],
                        output_name="concat_output",
                        mode="CONCAT")

In [18]:
builder.add_permute(name="permute_output",
                    dim=(0, 3, 2, 1),
                    input_name="concat_output",
                    output_name="raw_coordinates")

In [20]:
decoder_model = coremltools.models.MLModel(builder.spec)
decoder_model.save("Decoder.mlmodel")

### Non-maximum suppression

In [22]:
nms_spec = coremltools.proto.Model_pb2.Model()
nms_spec.specificationVersion = 3

In [23]:
for i in range(2):
    decoder_output = decoder_model._spec.description.output[i].SerializeToString()

    nms_spec.description.input.add()
    nms_spec.description.input[i].ParseFromString(decoder_output)

    nms_spec.description.output.add()
    nms_spec.description.output[i].ParseFromString(decoder_output)
    
nms_spec.description.output[0].name = "confidence"
nms_spec.description.output[1].name = "coordinates"

In [24]:
output_sizes = [num_classes, 4]
for i in range(2):
    ma_type = nms_spec.description.output[i].type.multiArrayType
    ma_type.shapeRange.sizeRanges.add()
    ma_type.shapeRange.sizeRanges[0].lowerBound = 0
    ma_type.shapeRange.sizeRanges[0].upperBound = -1
    ma_type.shapeRange.sizeRanges.add()
    ma_type.shapeRange.sizeRanges[1].lowerBound = output_sizes[i]
    ma_type.shapeRange.sizeRanges[1].upperBound = output_sizes[i]
    del ma_type.shape[:]

In [25]:
nms = nms_spec.nonMaximumSuppression
nms.confidenceInputFeatureName = "raw_confidence"
nms.coordinatesInputFeatureName = "raw_coordinates"
nms.confidenceOutputFeatureName = "confidence"
nms.coordinatesOutputFeatureName = "coordinates"
nms.iouThresholdInputFeatureName = "iouThreshold"
nms.confidenceThresholdInputFeatureName = "confidenceThreshold"

In [26]:
default_iou_threshold = 0.6
default_confidence_threshold = 0.4
nms.iouThreshold = default_iou_threshold
nms.confidenceThreshold = default_confidence_threshold

In [27]:
nms.pickTop.perClass = True

In [28]:
labels = np.loadtxt("labelmap.txt", dtype=str, delimiter="\n")
nms.stringClassLabels.vector.extend(labels)

In [29]:
nms_model = coremltools.models.MLModel(nms_spec)
nms_model.save("NMS.mlmodel")

### Putting it together into a pipeline

In [31]:
input_features = [ ("image", datatypes.Array(3, 300, 300)),
                   ("iouThreshold", datatypes.Double()),
                   ("confidenceThreshold", datatypes.Double()) ]

output_features = [ "confidence", "coordinates" ]

pipeline = Pipeline(input_features, output_features)

In [45]:
ssd_output = ssd_model._spec.description.output
ssd_output[0].type.multiArrayType.shape[:] = [num_classes + 1, num_anchors, 1]
ssd_output[1].type.multiArrayType.shape[:] = [4, num_anchors, 1]

In [46]:
pipeline.add_model(ssd_model)
pipeline.add_model(decoder_model)
pipeline.add_model(nms_model)

In [47]:
pipeline.spec.description.input[0].ParseFromString(
    ssd_model._spec.description.input[0].SerializeToString())
pipeline.spec.description.output[0].ParseFromString(
    nms_model._spec.description.output[0].SerializeToString())
pipeline.spec.description.output[1].ParseFromString(
    nms_model._spec.description.output[1].SerializeToString())

43

In [48]:
pipeline.spec.description.input[1].shortDescription = "(optional) IOU Threshold override"
pipeline.spec.description.input[2].shortDescription = "(optional) Confidence Threshold override"
pipeline.spec.description.output[0].shortDescription = u"Boxes \xd7 Class confidence"
pipeline.spec.description.output[1].shortDescription = u"Boxes \xd7 [x, y, width, height] (relative to image size)"

In [49]:
pipeline.spec.description.metadata.versionString = "ssdlite_mobilenet_v2_coco_2018_05_09"
pipeline.spec.description.metadata.shortDescription = "MobileNetV2 + SSDLite, trained on our spider dataset"
pipeline.spec.description.metadata.author = "Converted to Core ML by Matthijs Hollemans"
pipeline.spec.description.metadata.license = "https://github.com/tensorflow/models/blob/master/research/object_detection"

In [50]:
user_defined_metadata = {
    "classes": ",".join(labels),
    "iou_threshold": str(default_iou_threshold),
    "confidence_threshold": str(default_confidence_threshold)
}
pipeline.spec.description.metadata.userDefined.update(user_defined_metadata)

In [51]:
pipeline.spec.specificationVersion = 3
final_model = coremltools.models.MLModel(pipeline.spec)
final_model.save(coreml_model_path)