# Convert NN model with Multiple Output from PyTorch to TensorFlow

## Import libraries

In [1]:
import numpy as np

import os
import time
import torch
import torch.nn as nn
import torch.optim as optim
from torch.utils.data import Dataset, DataLoader
import onnx
from onnx_tf.backend import prepare
import tensorflow as tf


For more information, please see:
  * https://github.com/tensorflow/community/blob/master/rfcs/20180907-contrib-sunset.md
  * https://github.com/tensorflow/addons
If you depend on functionality not listed there, please file an issue.





## Generate simulated data
`y1` is a continuous output, while `y2` is a binary output

In [2]:
# Functions to generate y1 and y2 from X_train and X_test
def generate_y1(array):
    return 2 * np.sum(array) + np.random.randn()

def generate_y2(array):
    if np.sum(array) >= 0:
        return 1
    else:
        return 0

train_size = 8000
test_size = 2000

input_size = 20
hidden_sizes = [50, 50]

np.random.seed(0)
X_train = np.random.randn(train_size, input_size).astype(np.float32)
X_test = np.random.randn(test_size, input_size).astype(np.float32)
y1_train = np.apply_along_axis(func1d=generate_y1, axis=1, arr=X_train)
y1_test = np.apply_along_axis(func1d=generate_y1, axis=1, arr=X_test)
y2_train = np.apply_along_axis(func1d=generate_y2, axis=1, arr=X_train)
y2_test = np.apply_along_axis(func1d=generate_y2, axis=1, arr=X_test)
print('Shape of X_train:', X_train.shape)
print('Shape of X_train:', X_test.shape)
print('Shape of y1_train:', y1_train.shape)
print('Shape of y1_test:', y1_test.shape)
print('Shape of y2_train:', y2_train.shape)
print('Shape of y2_test:', y2_test.shape)

Shape of X_train: (8000, 20)
Shape of X_train: (2000, 20)
Shape of y1_train: (8000,)
Shape of y1_test: (2000,)
Shape of y2_train: (8000,)
Shape of y2_test: (2000,)


Check for class imbalance for y2

In [3]:
print('Percentage of 1\'s in y2_train: {:.2f} %'.format(np.sum(y2_train == 1) / len(y2_train) * 100))
print('Percentage of 0\'s in y2_train: {:.2f} %'.format(np.sum(y2_train == 0) / len(y2_train) * 100))
print()
print('Percentage of 1\'s in y2_test: {:.2f} %'.format(np.sum(y2_test == 1) / len(y2_test) * 100))
print('Percentage of 0\'s in y2_test: {:.2f} %'.format(np.sum(y2_test == 0) / len(y2_test) * 100))

Percentage of 1's in y2_train: 51.06 %
Percentage of 0's in y2_train: 48.94 %

Percentage of 1's in y2_test: 48.65 %
Percentage of 0's in y2_test: 51.35 %


Define `Dataset` subclass to facilitate batch training

In [4]:
class MultiOutputDataset(Dataset):
    def __init__(self, X, y1, y2):
        self.X = X
        self.y1 = y1
        self.y2 = y2
        
    def __len__(self):
        return len(self.X)
    
    def __getitem__(self, idx):
        return self.X[idx], self.y1[idx], self.y2[idx]

Create DataLoaders for training and test set, for batch training and evaluation

In [5]:
train_loader = DataLoader(dataset=MultiOutputDataset(X_train, y1_train, y2_train), batch_size=8, shuffle=True)
test_loader = DataLoader(dataset=MultiOutputDataset(X_test, y1_test, y2_test), batch_size=8, shuffle=False)

## Train and test model in PyTorch

Build model`

In [6]:
class MultipleOutputModel(nn.Module):
    def __init__(self, input_size, hidden_sizes):
        super(MultipleOutputModel, self).__init__()
        self.input_size = input_size
        self.fcs = []  # List of fully connected layers
        in_size = input_size
        
        for i, next_size in enumerate(hidden_sizes):
            fc = nn.Linear(in_features=in_size, out_features=next_size)
            in_size = next_size
            self.__setattr__('fc{}'.format(i), fc)  # # set name for each fullly connected layer
            self.fcs.append(fc)
            
        self.last_fc = nn.Linear(in_features=in_size, out_features=1)
        self.sigmoid = nn.Sigmoid()
        
    def forward(self, x):
        for i, fc in enumerate(self.fcs):
            x = fc(x)
            x = nn.ReLU()(x)
        out1 = self.last_fc(x)
        x2 = self.last_fc(x)
        out2 = self.sigmoid(nn.ReLU()(x2))
        return out1, out2

Set device to be used

In [7]:
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
print('Device used:', device)
model_pytorch = MultipleOutputModel(input_size=input_size, hidden_sizes=hidden_sizes)
model_pytorch = model_pytorch.to(device)

Device used: cpu


In [8]:
print(model_pytorch)

MultipleOutputModel(
  (fc0): Linear(in_features=20, out_features=50, bias=True)
  (fc1): Linear(in_features=50, out_features=50, bias=True)
  (last_fc): Linear(in_features=50, out_features=1, bias=True)
  (sigmoid): Sigmoid()
)


Set loss and optimizer

In [9]:
# Set mean squared error loss for y1 and binary cross entropy loss for y2
criterion1 = nn.MSELoss()
criterion2 = nn.BCELoss()
optimizer = optim.Adam(model_pytorch.parameters(), lr=1e-4)

Train model

In [10]:
num_epochs = 20
time_start = time.time()

for epoch in range(num_epochs):
    model_pytorch.train()
    
    train_loss_total = 0
    
    for data, target1, target2 in train_loader:
        data, target1, target2 = data.to(device), target1.float().to(device), target2.float().to(device)
        optimizer.zero_grad()
        output1, output2 = model_pytorch(data)
        train_loss_1 = criterion1(output1.squeeze(), target1)
#         print('target2:', target2)
#         print('output2:', output2)
        train_loss_2 = criterion2(output2.squeeze(), target2)
        train_loss = torch.add(train_loss_1, train_loss_2)
        train_loss.backward()
        optimizer.step()
        train_loss_total += train_loss.item() * data.size(0)
        
    print('Epoch {} completed. Train loss is {:.3f}'.format(epoch + 1, train_loss_total / train_size))
print('Time taken to completed {} epochs: {:.2f} minutes'.format(num_epochs, (time.time() - time_start) / 60))

Epoch 1 completed. Train loss is 71.719
Epoch 2 completed. Train loss is 16.996
Epoch 3 completed. Train loss is 2.556
Epoch 4 completed. Train loss is 1.981
Epoch 5 completed. Train loss is 1.791
Epoch 6 completed. Train loss is 1.710
Epoch 7 completed. Train loss is 1.663
Epoch 8 completed. Train loss is 1.631
Epoch 9 completed. Train loss is 1.599
Epoch 10 completed. Train loss is 1.574
Epoch 11 completed. Train loss is 1.554
Epoch 12 completed. Train loss is 1.535
Epoch 13 completed. Train loss is 1.515
Epoch 14 completed. Train loss is 1.499
Epoch 15 completed. Train loss is 1.482
Epoch 16 completed. Train loss is 1.469
Epoch 17 completed. Train loss is 1.460
Epoch 18 completed. Train loss is 1.445
Epoch 19 completed. Train loss is 1.439
Epoch 20 completed. Train loss is 1.427
Time taken to completed 20 epochs: 1.04 minutes


In [11]:
_, output2 = model_pytorch(torch.tensor(X_test))
min(output2)

tensor([0.5000], grad_fn=<SelectBackward>)

Evaluate model

In [12]:
model_pytorch.eval()

test_loss_total = 0
total_num_corrects = 0
threshold = 0.9
time_start = time.time()

for data, target1, target2 in test_loader:
    data, target1, target2 = data.to(device), target1.float().to(device), target2.float().to(device)
    output1, output2 = model_pytorch(data)
    test_loss_1 = criterion1(output1.squeeze(), target1)
    test_loss_2 = criterion2(output2.squeeze(), target2)
    test_loss = torch.add(test_loss_1, test_loss_2)
    test_loss.backward()
    optimizer.step()
    test_loss_total += test_loss.item() * data.size(0)
    
    pred = (output2 >= threshold).view_as(target2)  # to make pred have same shape as target
    num_correct = torch.sum(pred == target2.byte()).item()
    total_num_corrects += num_correct

print('Evaluation completed. Test loss is {:.3f}'.format(test_loss_total / test_size))
print('Test accuracy is {:.3f}'.format(total_num_corrects / test_size))
print('Time taken to complete evaluation: {:.2f} minutes'.format((time.time() - time_start) / 60))

Evaluation completed. Test loss is 6.879
Test accuracy is 0.897
Time taken to complete evaluation: 0.01 minutes


## Convert Model to ONNX Format

Save model weights in PyTorch format

In [13]:
if not os.path.exists('./models/'):
    os.mkdir('./models/')

torch.save(model_pytorch.state_dict(), './models/model_multi_outputs.pt')

Load model from `.pt` file and export to ONNX format

In [14]:
model_pytorch = MultipleOutputModel(input_size=input_size, hidden_sizes=hidden_sizes)
model_pytorch.load_state_dict(torch.load('./models/model_multi_outputs.pt'))
# Single pass of dummy variable required
dummy_input = torch.from_numpy(X_test[0].reshape(1, -1)).float().to(device)
dummy_output_1, dummy_output_2 = model_pytorch(dummy_input)
print('dummy_output_1:', dummy_output_1)
print('dummy_output_2:', dummy_output_2)

# Export to ONNX format
torch.onnx.export(model_pytorch, dummy_input, './models/model_multi_outputs.onnx', input_names=['input'],
                  output_names=['output1', 'output2'])

dummy_output_1: tensor([[11.5031]], grad_fn=<AddmmBackward>)
dummy_output_2: tensor([[1.0000]], grad_fn=<SigmoidBackward>)


## Convert Model to TensorFlow Format

Load ONNX model and convert to TensorFlow format

In [15]:
model_onnx = onnx.load('./models/model_multi_outputs.onnx')

tf_rep = prepare(model_onnx)

Instructions for updating:
Use keras.layers.flatten instead.


  handler.ONNX_OP, handler.DOMAIN or "ai.onnx"))
  handler.ONNX_OP, handler.DOMAIN or "ai.onnx"))
  handler.ONNX_OP, handler.DOMAIN or "ai.onnx"))
  handler.ONNX_OP, handler.DOMAIN or "ai.onnx"))
  handler.ONNX_OP, handler.DOMAIN, version))


Print out tensors and placeholders in model (helpful during inference in TensorFlow)

In [16]:
print(tf_rep.tensor_dict)

{'fc0.bias': <tf.Tensor 'Const:0' shape=(50,) dtype=float32>, 'fc0.weight': <tf.Tensor 'Const_1:0' shape=(50, 20) dtype=float32>, 'fc1.bias': <tf.Tensor 'Const_2:0' shape=(50,) dtype=float32>, 'fc1.weight': <tf.Tensor 'Const_3:0' shape=(50, 50) dtype=float32>, 'last_fc.bias': <tf.Tensor 'Const_4:0' shape=(1,) dtype=float32>, 'last_fc.weight': <tf.Tensor 'Const_5:0' shape=(1, 50) dtype=float32>, 'input': <tf.Tensor 'input:0' shape=(1, 20) dtype=float32>, '7': <tf.Tensor 'add:0' shape=(1, 50) dtype=float32>, '8': <tf.Tensor 'Relu:0' shape=(1, 50) dtype=float32>, '9': <tf.Tensor 'add_1:0' shape=(1, 50) dtype=float32>, '10': <tf.Tensor 'Relu_1:0' shape=(1, 50) dtype=float32>, 'output1': <tf.Tensor 'add_2:0' shape=(1, 1) dtype=float32>, '12': <tf.Tensor 'add_3:0' shape=(1, 1) dtype=float32>, '13': <tf.Tensor 'Relu_2:0' shape=(1, 1) dtype=float32>, 'output2': <tf.Tensor 'Sigmoid:0' shape=(1, 1) dtype=float32>}


Export model as `.pb` file

In [17]:
tf_rep.export_graph('./models/model_multi_outputs.pb')

## Do Inference in TensorFlow

Define function to load `.pb` file

In [18]:
def load_pb(path_to_pb):
    with tf.gfile.GFile(path_to_pb, 'rb') as f:
        graph_def = tf.GraphDef()
        graph_def.ParseFromString(f.read())
    with tf.Graph().as_default() as graph:
        tf.import_graph_def(graph_def, name='')
        return graph

Load graph, initialize session and do inference using the same dummy input above

In [19]:
tf_graph = load_pb('./models/model_multi_outputs.pb')
sess = tf.Session(graph=tf_graph)

output_1_tensor = tf_graph.get_tensor_by_name('add_2:0')
output_2_tensor = tf_graph.get_tensor_by_name('Sigmoid:0')
input_tensor = tf_graph.get_tensor_by_name('input:0')

output1 = sess.run(output_1_tensor, feed_dict={input_tensor: dummy_input})
output2 = sess.run(output_2_tensor, feed_dict={input_tensor: dummy_input})
print('output1:', output1)
print('output2:', output2)

output1: [[11.503143]]
output2: [[0.99998987]]
