<a href="https://colab.research.google.com/github/ykitaguchi77/Colab_Scripts/blob/master/CoreML_convert%20from%20Pytorch%20models.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

#**Conversion of Pytorch script to CoreML**

## Coremltools 5

## Coremltools 4

既存のモデルを変換する場合<br>
https://coremltools.readme.io/docs/pytorch-conversion

In [1]:
from google.colab import drive
drive.mount('/content/drive')

Mounted at /content/drive


In [5]:
import torch.nn as nn

########################
# Convert Pytorch model     #
########################

import torch
import torchvision
!pip install coremltools
import coremltools as ct
import numpy as np


# Load a pre-trained version of MobileNetV2
base_model = torchvision.models.mobilenet_v2(pretrained=True)



class TorchClassificationModel(nn.Module):
    def __init__(self):
        super(TorchClassificationModel, self).__init__()
        self.layers = nn.Sequential(
            base_model,
            nn.Softmax(dim=1)
        )
    def forward(self, x):
        return self.layers(x)



# Set the model in evaluation mode
torch_model = TorchClassificationModel().eval()
# torch_model = base_model.eval()

# Trace with random data
example_input = torch.rand(1, 3, 224, 224) # after test, will get 'size mismatch' error message with size 256x256
traced_model = torch.jit.trace(torch_model, example_input)


# Download class labels (from a separate file)
import urllib
label_url = 'https://storage.googleapis.com/download.tensorflow.org/data/ImageNetLabels.txt'
class_labels = urllib.request.urlopen(label_url).read().decode("utf-8").splitlines()
class_labels = class_labels[1:] # remove the first class which is background
assert len(class_labels) == 1000


#Set the image scale and bias for input image preprocessing.
scale = 1.0 / (255.0 * 0.226)
red_bias = -0.485 / 0.226
green_bias = -0.456 / 0.226
blue_bias = -0.406 / 0.226

image_input = ct.ImageType(name="input_1",
                           shape=example_input.shape,
                           scale=scale,
                           bias=[red_bias, green_bias, blue_bias],)
                           #color_layout='BGR')

# image_output = ct.TensorType(dtype=np.float16)



# Convert to Core ML using the Unified Conversion API
mlmodel = ct.convert(
    traced_model,
    #convert_to="mlprogram", #これを入力するとmlpackageとして変換される
    inputs=[image_input], 
    classifier_config = ct.ClassifierConfig(class_labels), 
    # minimum_deployment_target=ct.target.macOS13
    compute_units=ct.ComputeUnit.CPU_ONLY,
)

# Save model
# mlmodel.save("/content/drive/MyDrive/temp_files/MobileNetV2.mlpackage")
mlmodel.save("MobileNetV2_pytorch.mlmodel")

# from google.colab import files
# files.download('/content/drive/MyDrive/temp_files/MobileNetV2.mlpackage')

Looking in indexes: https://pypi.org/simple, https://us-python.pkg.dev/colab-wheels/public/simple/


Converting PyTorch Frontend ==> MIL Ops: 100%|█████████▉| 381/382 [00:00<00:00, 2680.19 ops/s]
Running MIL Common passes: 100%|██████████| 38/38 [00:00<00:00, 70.21 passes/s]
Running MIL Clean up passes: 100%|██████████| 11/11 [00:00<00:00, 114.15 passes/s]
Translating MIL ==> NeuralNetwork Ops: 100%|██████████| 496/496 [00:00<00:00, 1751.29 ops/s]


In [None]:
from PIL import Image
import requests
import torch
from torchvision import models, transforms

url = "https://upload.wikimedia.org/wikipedia/commons/c/ca/Pomeranian.JPG"

response = requests.get(url)
image = response.content
file_name = "pomeranian.jpg"
with open(file_name, "wb") as f:
    f.write(image)


preprocess = transforms.Compose([
    transforms.Resize(224),
    transforms.CenterCrop(224),
    transforms.ToTensor(),
    transforms.Normalize(
        mean=[0.485, 0.456, 0.406],
        std=[0.229, 0.224, 0.225]
    )
])


sample_img = Image.open("/content/pomeranian.jpg")
display(sample_img)

In [3]:
from torch.nn import functional as F

img_tensor = preprocess(sample_img) #[3,224,224]
input = img_tensor.unsqueeze(0) #[1,3,224,224]
output = base_model(input)

probs = F.softmax(output, dim=1)
probs, indices = probs.sort(dim=1, descending=True)

for probs, indices in zip(probs, indices):
    for k in range(3):
        print(f"Top-{k + 1} {class_labels[indices[k]]} {probs[k]:.2%}")



Top-1 Pomeranian 98.85%
Top-2 chow 0.71%
Top-3 keeshond 0.19%


In [1]:
#########################
# Convert TensorfFlow model  #
#########################

%tensorflow_version 2.x
import tensorflow as tf

keras_model = tf.keras.applications.MobileNetV2(
    weights="imagenet", 
    input_shape=(224, 224, 3,),
    classes=1000,
)

# Download class labels (from a separate file)
import urllib
label_url = 'https://storage.googleapis.com/download.tensorflow.org/data/ImageNetLabels.txt'
class_labels = urllib.request.urlopen(label_url).read().splitlines()
class_labels = class_labels[1:] # remove the first class which is background
assert len(class_labels) == 1000

# make sure entries of class_labels are strings
for i, label in enumerate(class_labels):
  if isinstance(label, bytes):
    class_labels[i] = label.decode("utf8")


!pip install -U coremltools

import coremltools as ct

# Define the input type as image, 
# set pre-processing parameters to normalize the image 
# to have its values in the interval [-1,1] 
# as expected by the mobilenet model
image_input = ct.ImageType(shape=(1, 224, 224, 3,),
                           bias=[-1,-1,-1], scale=1/127)

# set class labels
classifier_config = ct.ClassifierConfig(class_labels)

# Convert the model using the Unified Conversion API
model = ct.convert(
    keras_model, inputs=[image_input], classifier_config=classifier_config,
)

# Set feature descriptions (these show up as comments in XCode)
model.input_description["input_1"] = "Input image to be classified"
model.output_description["classLabel"] = "Most likely image category"

# Set model author name
model.author = '"Original Paper: Mark Sandler, Andrew Howard, Menglong Zhu, Andrey Zhmoginov, Liang-Chieh Chen'

# Set the license of the model
model.license = "Please see https://github.com/tensorflow/tensorflow for license information, and https://github.com/tensorflow/models/tree/master/research/slim/nets/mobilenet for the original source of the model."

# Set a short description for the Xcode UI
model.short_description = "Detects the dominant objects present in an image from a set of 1001 categories such as trees, animals, food, vehicles, person etc. The top-1 accuracy from the original publication is 74.7%."

# Set a version for the model
model.version = "2.0"


# Save model
model.save("MobileNetV2.mlmodel")
                  
# Load a saved model
loaded_model = ct.models.MLModel("MobileNetV2.mlmodel")

Colab only includes TensorFlow 2.x; %tensorflow_version has no effect.
Downloading data from https://storage.googleapis.com/tensorflow/keras-applications/mobilenet_v2/mobilenet_v2_weights_tf_dim_ordering_tf_kernels_1.0_224.h5
Looking in indexes: https://pypi.org/simple, https://us-python.pkg.dev/colab-wheels/public/simple/
Collecting coremltools
  Downloading coremltools-6.0-cp37-none-manylinux1_x86_64.whl (1.5 MB)
[K     |████████████████████████████████| 1.5 MB 4.9 MB/s 
Installing collected packages: coremltools
Successfully installed coremltools-6.0


Running TensorFlow Graph Passes: 100%|██████████| 6/6 [00:00<00:00,  9.87 passes/s]
Converting TF Frontend ==> MIL Ops: 100%|██████████| 426/426 [00:00<00:00, 1183.23 ops/s]
Running MIL Common passes: 100%|██████████| 38/38 [00:01<00:00, 30.05 passes/s]
Running MIL Clean up passes: 100%|██████████| 11/11 [00:00<00:00, 99.35 passes/s]
Translating MIL ==> NeuralNetwork Ops: 100%|██████████| 487/487 [00:00<00:00, 1526.54 ops/s]


#**新しく学習を行う場合（MNISTの例）**<br>




参考サイト：<br>
https://qiita.com/shu223/items/6ddfbedb4fdfb2059a11<br>
https://chemicalfactory.hatenablog.com/entry/2020/01/26/230114

MNISTの学習

In [None]:
import torch
import torchvision
import torch.nn as nn
import torch.optim as optim
import torch.nn.functional as F
import torchvision.transforms as transforms
device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")



batch_size = 10

trainset = torchvision.datasets.MNIST(root='./data', train=True,
                                        download=True, transform=transforms.Compose([transforms.Grayscale(), transforms.Resize(28), transforms.ToTensor(), transforms.Normalize((0.5,),(0.5,))]))
trainloader = torch.utils.data.DataLoader(trainset, batch_size=batch_size,
                                            shuffle=True, num_workers=0)

testset = torchvision.datasets.MNIST(root='./data', train=False, 
                                        download=True, transform=transforms.Compose([transforms.Grayscale(), transforms.Resize(28), transforms.ToTensor(), transforms.Normalize((0.5,),(0.5,))]))
testloader = torch.utils.data.DataLoader(testset, batch_size=batch_size,
                                            shuffle=False, num_workers=0)

class MNIST_Conv_MN(nn.Module):
    def __init__(self):
        super(MNIST_Conv_MN, self).__init__()
        self.conv1 = nn.Conv2d(1, 8, 3) 
        self.pooling = nn.MaxPool2d(2, 2) 
        self.fc1 = nn.Linear(13 * 13 * 8, 10)

    def forward(self, x):
        x = self.conv1(x)
        x = F.relu(x)
        x = self.pooling(x)
        x = x.view(-1, 13 * 13 * 8)
        x = self.fc1(x)
        return x
    
model=MNIST_Conv_MN().to(device)
criterion = nn.CrossEntropyLoss()
optimizer = optim.Adam(model.parameters())

num_epochs = 10

for epoch in range(num_epochs):
    
    train_loss = 0
    train_acc = 0
    val_loss = 0
    val_acc = 0
    
    model.train()
    for i, (inputs, labels) in enumerate(trainloader):
        inputs, labels = inputs.to(device), labels.to(device)

        optimizer.zero_grad()
        outputs = model(inputs)
        loss = criterion(outputs, labels)
        
        train_loss += loss.item()
        train_acc += (outputs.max(1)[1] == labels).sum().item()
        loss.backward()
        optimizer.step()
    avg_train_loss = train_loss / len(trainloader.dataset)
    avg_train_acc = train_acc / len(trainloader.dataset)

    model.eval()
    with torch.no_grad():
        for inputs, labels in testloader:
            inputs, labels = inputs.to(device), labels.to(device)
            outputs = model(inputs)
            loss = criterion(outputs, labels)
            val_loss += loss.item()
            val_acc += (outputs.max(1)[1] == labels).sum().item()
        avg_val_loss = val_loss / len(testloader.dataset)
        avg_val_acc = val_acc / len(testloader.dataset)
        
        print("Epoch [{}/{}], Loss: {loss:.4f}, val_loss: {val_loss:.4f}, val_acc: {val_acc:.4f}"\
              .format(epoch+1, num_epochs, i+1, loss=avg_train_loss, val_loss=avg_val_loss, val_acc=avg_val_acc))
        
torch.save(model.state_dict(), "MNIST.pth")

Epoch [1/10], Loss: 0.0190, val_loss: 0.0091, val_acc: 0.9717
Epoch [2/10], Loss: 0.0084, val_loss: 0.0070, val_acc: 0.9781
Epoch [3/10], Loss: 0.0065, val_loss: 0.0069, val_acc: 0.9788
Epoch [4/10], Loss: 0.0054, val_loss: 0.0067, val_acc: 0.9799
Epoch [5/10], Loss: 0.0048, val_loss: 0.0066, val_acc: 0.9803
Epoch [6/10], Loss: 0.0042, val_loss: 0.0063, val_acc: 0.9807
Epoch [7/10], Loss: 0.0037, val_loss: 0.0068, val_acc: 0.9795
Epoch [8/10], Loss: 0.0034, val_loss: 0.0069, val_acc: 0.9801
Epoch [9/10], Loss: 0.0031, val_loss: 0.0063, val_acc: 0.9821
Epoch [10/10], Loss: 0.0027, val_loss: 0.0066, val_acc: 0.9819


MNISTをCoreMLに変換

In [None]:
#import coreML ver4.0
!pip install -U coremltools
import coremltools as ct


class MNIST_Conv_MN(nn.Module):
    def __init__(self):
        super(MNIST_Conv_MN, self).__init__()
        self.conv1 = nn.Conv2d(1, 8, 3) 
        self.pooling = nn.MaxPool2d(2, 2) 
        self.fc1 = nn.Linear(13 * 13 * 8, 10)

    def forward(self, x):
        x = self.conv1(x)
        x = F.relu(x)
        x = self.pooling(x)
        x = x.view(-1, 13 * 13 * 8)
        x = self.fc1(x)
        return x

model = MNIST_Conv_MN()
model.load_state_dict(torch.load('MNIST.pth'))
model.eval()

scripted_model = torch.jit.script(model)

#サイズの合ったランダムな入力を行う
example_input = torch.rand(1,1,28,28)
traced = torch.jit.trace(model, example_input)

#class_labelの設定
class_labels = list(range(10))

#traced.save("model.pt")
mlmodel = ct.convert(
    traced, 
    inputs=[ct.TensorType(name="input_1", shape=example_input.shape)],
    classifier_config = ct.ClassifierConfig(class_labels) 
)
mlmodel.save('MNIST.mlmodel')

Requirement already up-to-date: coremltools in /usr/local/lib/python3.7/dist-packages (4.1)


Converting Frontend ==> MIL Ops:   0%|          | 0/26 [00:00<?, ? ops/s]
Running MIL optimization passes: 100%|██████████| 18/18 [00:00<00:00, 1078.23 passes/s]
Translating MIL ==> MLModel Ops: 100%|██████████| 19/19 [00:00<00:00, 2773.72 ops/s]


In [None]:
from torchsummary import summary
summary(model, (1, 28, 28))

----------------------------------------------------------------
        Layer (type)               Output Shape         Param #
            Conv2d-1            [-1, 8, 26, 26]              80
         MaxPool2d-2            [-1, 8, 13, 13]               0
            Linear-3                   [-1, 10]          13,530
Total params: 13,610
Trainable params: 13,610
Non-trainable params: 0
----------------------------------------------------------------
Input size (MB): 0.00
Forward/backward pass size (MB): 0.05
Params size (MB): 0.05
Estimated Total Size (MB): 0.11
----------------------------------------------------------------


In [None]:
from PIL import Image
img = Image.open("/content/pytorch-list-of-transforms_01.jpg")
display(img)
