# Convolutional Networks with minpy+mxnet

In this notebook, we show how to implement a CNN with minpy and mxnet. Your job is to design the forward model and train the parameters. Note that the convolution layers are efficiently implemented by using mxnet symbols. You should get more than 70% accuracy on validation dataset.
 

In [1]:
"""import dependencies"""
import sys
import argparse

import minpy
import minpy.numpy as np
import mxnet as mx
from minpy.nn.io import NDArrayIter
# Can also use MXNet IO here
# from mxnet.io import NDArrayIter
from minpy.core import Function
from minpy.nn import layers
from minpy.nn.model import ModelBase
from minpy.nn.solver import Solver
from data_utils import get_CIFAR10_data

# Please uncomment following if you have GPU-enabled MXNet installed.
#from minpy.context import set_context, gpu
#set_context(gpu(0)) # set the global context as gpu(0)

In [3]:
input_size=(3, 32, 32)
flattened_input_size=3 * 32 * 32
# bash get_datasets.sh if you do not have the cifar10 dataset
data_dir = './cifar-10-batches-py'
data = get_CIFAR10_data(data_dir)
num_classes=8

In [11]:
# TODO: ajust the following parameters to obtain the best performance
batch_size=128
hidden_size=1024
reg = 0.001
num_filter = 128
ks = (5,5)
num_epo = 10
learning_rate = 2e-4
# END TODO


class ConvolutionNet(ModelBase):
    def __init__(self):
        super(ConvolutionNet, self).__init__()
        # TODO: Define symbols using multiple layers of convolution and max pooling to extract better features
        # from input image.
        net = mx.sym.Variable(name='X')
        
        net = mx.sym.Convolution(
                data=net, name='conv1', kernel=ks, num_filter=num_filter)
        net = mx.sym.Activation(
                data=net, act_type='relu')
        net = mx.sym.Pooling(
                data=net, name='pool1', pool_type='max', kernel=(2, 2),
                stride=(2, 2))
        
        net = mx.sym.Convolution(
                data=net, name='conv2', kernel=ks, num_filter=num_filter)
        net = mx.sym.Activation(
                data=net, act_type='relu')
        net = mx.sym.Pooling(
                data=net, name='pool2', pool_type='max', kernel=(2, 2),
                stride=(2, 2))
        # END TODO
        net = mx.sym.Flatten(data=net)
        
        #Create CNN function and add parameters to the model.
        self.conv = Function(
                net, input_shapes={'X': (batch_size,) + input_size},
                name='conv')
        self.add_params(self.conv.get_params())
        # Define ndarray parameters used for classification part.
        output_shape = self.conv.get_one_output_shape()
        conv_out_size = output_shape[1]
        # TODO: add parameters of full connected layers, based on your forward model   
        self.add_param(name='w1', shape=(conv_out_size, hidden_size)) \
            .add_param(name='b1', shape=(hidden_size,)) \
            .add_param(name='gamma1', shape=(hidden_size,)) \
            .add_param(name='beta1', shape=(hidden_size,)) \
            .add_param(name='w2', shape=(hidden_size, num_classes)) \
            .add_param(name='b2', shape=(num_classes,))
        # END TODO
    def forward(self, X, mode):
        # TODO: build your forward model
        out = self.conv(X=X, **self.params)
        out = layers.affine(out, self.params['w1'], self.params['b1'])
        out = layers.relu(out)
        out = layers.affine(out, self.params['w2'], self.params['b2'])
        # END TODO
        return out

    def loss(self, predict, y):
        loss_reg = reg
        for name, weight in self.params.iteritems():
    	    loss_reg += np.sum(weight**2)
        return layers.softmax_loss(predict, y) + 0.5*reg*loss_reg


      

In [12]:
    # Create model.
    model = ConvolutionNet()
    # Create data iterators for training and testing sets.

    train_dataiter = NDArrayIter(data=data['X_train'],
                                 label=data['y_train'],
                                 batch_size=batch_size,
                                 shuffle=True)
    test_dataiter = NDArrayIter(data=data['X_test'],
                                label=data['y_test'],
                                batch_size=batch_size,
                                shuffle=False)   
    
    # Train your CNN model.
    solver = Solver(model,
                    train_dataiter,
                    test_dataiter,
                    num_epochs=num_epo,
                    init_rule='gaussian',
                    init_config={
                        'stdvar': 0.001
                    },
                    #update_rule='sgd_momentum',
	                #update_rule='rmsprop',
                    update_rule ='adam', # You may also try different optimization rules
                    optim_config={
                        'learning_rate': learning_rate
                    },
                    verbose=True,
                    print_every=20)
    # Initialize model parameters.
    solver.init()
    # Train!
    solver.train()

(Iteration 1 / 1148) loss: 2.304477
(Iteration 21 / 1148) loss: 2.077509
(Iteration 41 / 1148) loss: 1.997448
(Iteration 61 / 1148) loss: 1.969889
(Iteration 81 / 1148) loss: 1.814104
(Iteration 101 / 1148) loss: 1.749671
(Iteration 121 / 1148) loss: 1.600048
(Iteration 141 / 1148) loss: 1.509827
(Iteration 161 / 1148) loss: 1.567641
(Iteration 181 / 1148) loss: 1.470913
(Iteration 201 / 1148) loss: 1.488777
(Iteration 221 / 1148) loss: 1.490559
(Iteration 241 / 1148) loss: 1.354200
(Iteration 261 / 1148) loss: 1.508165
(Iteration 281 / 1148) loss: 1.242928
(Iteration 301 / 1148) loss: 1.434858
(Iteration 321 / 1148) loss: 1.430913
(Iteration 341 / 1148) loss: 1.233354
(Iteration 361 / 1148) loss: 1.316367
(Iteration 381 / 1148) loss: 1.198666
(Epoch 1 / 3) train acc: 0.535156; val_acc: 0.533203
(Iteration 401 / 1148) loss: 1.279022
(Iteration 421 / 1148) loss: 1.055716
(Iteration 441 / 1148) loss: 1.405368
(Iteration 461 / 1148) loss: 1.139587
(Iteration 481 / 1148) loss: 1.517576
(It