Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Add NMS to CoreML model output, works with Vision #7263

Closed
wants to merge 4 commits into from
Closed
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
128 changes: 124 additions & 4 deletions export.py
Original file line number Diff line number Diff line change
Expand Up @@ -186,7 +186,26 @@ def export_openvino(model, im, file, prefix=colorstr('OpenVINO:')):
LOGGER.info(f'\n{prefix} export failure: {e}')


def export_coreml(model, im, file, prefix=colorstr('CoreML:')):
class CoreMLExportModel(torch.nn.Module):

def __init__(self, base_model, img_size):
super().__init__()
self.base_model = base_model
self.img_size = img_size

def forward(self, x):
x = self.base_model(x)[0]
x = x.squeeze(0)
# Convert box coords to normalized coordinates [0 ... 1]
w = self.img_size[0]
h = self.img_size[1]
objectness = x[:, 4:5]
class_probs = x[:, 5:] * objectness
boxes = x[:, :4] * torch.tensor([1. / w, 1. / h, 1. / w, 1. / h])
return class_probs, boxes


def export_coreml(model, im, file, num_boxes, num_classes, labels, conf_thres, iou_thres, prefix=colorstr('CoreML:')):
# YOLOv5 CoreML export
try:
check_requirements(('coremltools',))
Expand All @@ -195,8 +214,108 @@ def export_coreml(model, im, file, prefix=colorstr('CoreML:')):
LOGGER.info(f'\n{prefix} starting export with coremltools {ct.__version__}...')
f = file.with_suffix('.mlmodel')

ts = torch.jit.trace(model, im, strict=False) # TorchScript model
ct_model = ct.convert(ts, inputs=[ct.ImageType('image', shape=im.shape, scale=1 / 255, bias=[0, 0, 0])])
export_model = CoreMLExportModel(model, img_size=opt.imgsz)

ts = torch.jit.trace(export_model, im, strict=False) # TorchScript model
orig_model = ct.convert(ts, inputs=[ct.ImageType('image', shape=im.shape, scale=1 / 255, bias=[0, 0, 0])])

spec = orig_model.get_spec()
old_box_output_name = spec.description.output[1].name
old_scores_output_name = spec.description.output[0].name
ct.utils.rename_feature(spec, old_scores_output_name, "raw_confidence")
ct.utils.rename_feature(spec, old_box_output_name, "raw_coordinates")
spec.description.output[0].type.multiArrayType.shape.extend([num_boxes, num_classes])
spec.description.output[1].type.multiArrayType.shape.extend([num_boxes, 4])
spec.description.output[0].type.multiArrayType.dataType = ct.proto.FeatureTypes_pb2.ArrayFeatureType.DOUBLE
spec.description.output[1].type.multiArrayType.dataType = ct.proto.FeatureTypes_pb2.ArrayFeatureType.DOUBLE

yolo_model = ct.models.MLModel(spec)

# Build Non Maximum Suppression model
nms_spec = ct.proto.Model_pb2.Model()
nms_spec.specificationVersion = 3

for i in range(2):
decoder_output = spec.description.output[i].SerializeToString()

nms_spec.description.input.add()
nms_spec.description.input[i].ParseFromString(decoder_output)

nms_spec.description.output.add()
nms_spec.description.output[i].ParseFromString(decoder_output)

nms_spec.description.output[0].name = "confidence"
nms_spec.description.output[1].name = "coordinates"

output_sizes = [num_classes, 4]
for i in range(2):
ma_type = nms_spec.description.output[i].type.multiArrayType
ma_type.shapeRange.sizeRanges.add()
ma_type.shapeRange.sizeRanges[0].lowerBound = 0
ma_type.shapeRange.sizeRanges[0].upperBound = -1
ma_type.shapeRange.sizeRanges.add()
ma_type.shapeRange.sizeRanges[1].lowerBound = output_sizes[i]
ma_type.shapeRange.sizeRanges[1].upperBound = output_sizes[i]
del ma_type.shape[:]

nms = nms_spec.nonMaximumSuppression
nms.confidenceInputFeatureName = "raw_confidence"
nms.coordinatesInputFeatureName = "raw_coordinates"
nms.confidenceOutputFeatureName = "confidence"
nms.coordinatesOutputFeatureName = "coordinates"
nms.iouThresholdInputFeatureName = "iouThreshold"
nms.confidenceThresholdInputFeatureName = "confidenceThreshold"

nms.iouThreshold = iou_thres
nms.confidenceThreshold = conf_thres
nms.pickTop.perClass = False
nms.stringClassLabels.vector.extend(labels)

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

mshamash if you end up merging newer commits from the main branch to this branch, these labels could/should be changed to labels.values() since it's a dictionary now 👍

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

@hietalajulius don't forget that a dictionary is not ordered. I guess it's better to do [labels[k] for k in sorted(labels.keys())]


nms_model = ct.models.MLModel(nms_spec)

# Assembling a pipeline model from the two models
input_features = [("image", ct.models.datatypes.Array(3, 300, 300)),
("iouThreshold", ct.models.datatypes.Double()),
("confidenceThreshold", ct.models.datatypes.Double())]

output_features = ["confidence", "coordinates"]

pipeline = ct.models.pipeline.Pipeline(input_features, output_features)

pipeline.add_model(yolo_model)
pipeline.add_model(nms_model)

# The "image" input should really be an image, not a multi-array
pipeline.spec.description.input[0].ParseFromString(spec.description.input[0].SerializeToString())

# Copy the declarations of the "confidence" and "coordinates" outputs
# The Pipeline makes these strings by default
pipeline.spec.description.output[0].ParseFromString(nms_spec.description.output[0].SerializeToString())
pipeline.spec.description.output[1].ParseFromString(nms_spec.description.output[1].SerializeToString())

# Add descriptions to the inputs and outputs
pipeline.spec.description.input[1].shortDescription = "(optional) IOU Threshold override"
pipeline.spec.description.input[2].shortDescription = "(optional) Confidence Threshold override"
pipeline.spec.description.output[0].shortDescription = "Boxes Class confidence"
pipeline.spec.description.output[1].shortDescription = "Boxes [x, y, width, height] (normalized to [0...1])"

# Add metadata to the model
pipeline.spec.description.metadata.shortDescription = "YOLOv5 object detector"
pipeline.spec.description.metadata.author = "Ultralytics"

# Add the default threshold values and list of class labels
user_defined_metadata = {
"iou_threshold": str(iou_thres),
"confidence_threshold": str(conf_thres),
"classes": ", ".join(labels)}

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

@hietalajulius @mshamash Also relevant here.

pipeline.spec.description.metadata.userDefined.update(user_defined_metadata)

# Don't forget this or Core ML might attempt to run the model on an unsupported operating system version!
pipeline.spec.specificationVersion = 3

ct_model = ct.models.MLModel(pipeline.spec)

f = str(file).replace('.pt', '.mlmodel')
ct_model.save(f)

LOGGER.info(f'{prefix} export success, saved as {f} ({file_size(f):.1f} MB)')
Expand Down Expand Up @@ -508,7 +627,8 @@ def run(
if xml: # OpenVINO
f[3] = export_openvino(model, im, file)
if coreml:
_, f[4] = export_coreml(model, im, file)
nb = shape[1]
_, f[4] = export_coreml(model, im, file, nb, nc, names, conf_thres, iou_thres)

# TensorFlow Exports
if any((saved_model, pb, tflite, edgetpu, tfjs)):
Expand Down