# Deploy video models to TVM runtime

In [1]:
import gluoncv as gcv
import mxnet as mx
import tvm
print('Requires gluoncv>=0.8.0 alpha:', gcv.__version__)

Requires gluoncv>=0.8.0 alpha: 0.8.0


### Let's grab a pretrained model from GluonCV models

In [2]:
model_name = 'resnet18_v1b_kinetics400'
net = gcv.model_zoo.get_model(model_name, nclass=400, pretrained=True, ctx=mx.gpu(0))

### In case you missed the concept, we support injecting preprocessing pipeline into the network itself
With embedded preprocessing block, it's much easier to consume the model without writing preprocessing code in other languages such as C++/JAVA.

In [3]:
from mxnet.gluon import HybridBlock
class TvmPreprocess(HybridBlock):
    def __init__(self, **kwargs):
        super(TvmPreprocess, self).__init__(**kwargs)
        with self.name_scope():
            mean = mx.nd.array([123.675, 116.28, 103.53]).reshape((1, 3, 1, 1))
            scale = mx.nd.array([58.395, 57.12, 57.375]).reshape((1, 3, 1, 1))
            self.init_mean = self.params.get_constant('init_mean', mean)
            self.init_scale = self.params.get_constant('init_scale', scale)

    # pylint: disable=arguments-differ
    def hybrid_forward(self, F, x, init_mean, init_scale):
        x = F.broadcast_minus(x, init_mean)
        x = F.broadcast_div(x, init_scale)
        return x

### Export to TVM is similar to the process of generating mxnet static network representation

In [4]:
gcv.utils.export_tvm(model_name, net, data_shape=(1, 3, 224, 224), preprocess=TvmPreprocess(), 
                     target='cuda', ctx=mx.gpu(0), opt_level=3, use_autotvm=False)

Cannot find config for target=cuda, workload=('conv2d_nchw.cuda', ('TENSOR', (1, 64, 56, 56), 'float32'), ('TENSOR', (64, 64, 3, 3), 'float32'), (1, 1), (1, 1, 1, 1), (1, 1), 'float32'). A fallback configuration is used, which may bring great performance regression.
Cannot find config for target=cuda, workload=('conv2d_nchw.cuda', ('TENSOR', (1, 128, 28, 28), 'float32'), ('TENSOR', (128, 128, 3, 3), 'float32'), (1, 1), (1, 1, 1, 1), (1, 1), 'float32'). A fallback configuration is used, which may bring great performance regression.
Cannot find config for target=cuda, workload=('conv2d_nchw.cuda', ('TENSOR', (1, 256, 14, 14), 'float32'), ('TENSOR', (256, 256, 3, 3), 'float32'), (1, 1), (1, 1, 1, 1), (1, 1), 'float32'). A fallback configuration is used, which may bring great performance regression.
Cannot find config for target=cuda, workload=('conv2d_nchw.cuda', ('TENSOR', (1, 512, 7, 7), 'float32'), ('TENSOR', (512, 512, 3, 3), 'float32'), (1, 1), (1, 1, 1, 1), (1, 1), 'float32'). A fal

Meanwhile we can save the synset to disk so we can map categories to original names.

In [5]:
with open('{}_synset.txt'.format(model_name), 'wt') as f:
    for c in net.classes:
        f.write(c + '\n')

In [6]:
!ls -lh $model_name*

-rw-rw-r-- 1 xavier xavier  73M Jun 10 20:25 resnet18_v1b_kinetics400_deploy_0000.params
-rw-rw-r-- 1 xavier xavier  23K Jun 10 20:25 resnet18_v1b_kinetics400_deploy_graph.json
-rwxrwxr-x 1 xavier xavier 477K Jun 10 20:25 resnet18_v1b_kinetics400_deploy_lib.so
-rw-rw-r-- 1 xavier xavier 5.9K Jun 10 20:25 resnet18_v1b_kinetics400_synset.txt
