# Convolutional Networks with minpy+mxnet

In this notebook, we show how to implement a CNN with minpy and mxnet. Your job is to design the forward model and train the parameters. Note that the convolution layers are efficiently implemented by using mxnet symbols. You should get more than 70% accuracy on validation dataset.
 

In [11]:
'''This is for cs231 assignment2, a convolutional neural network using Minpy and Mxnet'''

import sys
import argparse

import minpy
import minpy.numpy as np
import mxnet as mx
from minpy.nn.io import NDArrayIter
# Can also use MXNet IO here
# from mxnet.io import NDArrayIter
from minpy.core import Function
from minpy.nn import layers
from minpy.nn.model import ModelBase
from minpy.nn.solver import Solver
from cs231n.data_utils import get_CIFAR10_data

# Please uncomment following if you have GPU-enabled MXNet installed.
#from minpy.context import set_context, gpu
#set_context(gpu(0)) # set the global context as gpu(0)

batch_size=128
input_size=(3, 32, 32)
flattened_input_size=3 * 32 * 32
hidden_size=64
num_classes=10
reg = 0.001
nfilter = 16
ks = (3,3)
nepo = 5
learning_rate = 2e-4


class ConvolutionNet(ModelBase):
    def __init__(self):
        super(ConvolutionNet, self).__init__()
        # Define your cnn below.
        net = mx.sym.Variable(name='X')
        
        net = mx.sym.Convolution(
                data=net, name='conv1', kernel=ks, num_filter=nfilter)
        net = mx.symbol.BatchNorm(data=net, name='bn1')
        net = mx.sym.Activation(
                data=net, act_type='relu')
        net = mx.sym.Pooling(
                data=net, name='pool1', pool_type='max', kernel=(2, 2),
                stride=(2, 2))
        net = mx.sym.Convolution(
                data=net, name='conv2', kernel=ks, num_filter=nfilter)
        net = mx.symbol.BatchNorm(data=net, name='bn2')
        net = mx.sym.Activation(
                data=net, act_type='relu')
        net = mx.sym.Pooling(
                data=net, name='pool2', pool_type='max', kernel=(2, 2),
                stride=(2, 2))
        net = mx.sym.Flatten(data=net)
        
        # Create forward function and add parameters to this model.
        self.conv = Function(
                net, input_shapes={'X': (batch_size,) + input_size},
                name='conv')
        self.add_params(self.conv.get_params())
        
        # Define ndarray parameters used for fully connected and bn layers.
        output_shape = self.conv.get_one_output_shape()
        conv_out_size = output_shape[1]
        self.add_param(name='w1', shape=(conv_out_size, hidden_size)) \
            .add_param(name='b1', shape=(hidden_size,)) \
            .add_param(name='w2', shape=(hidden_size, num_classes)) \
            .add_param(name='b2', shape=(num_classes,))\
            .add_aux_param(name='running_mean', value=None) \
            .add_aux_param(name='running_var', value=None)\
            .add_param(name='gamma1', shape=(hidden_size,), init_rule='constant', init_config={'value': 1.0}) \
            .add_param(name='beta1', shape=(hidden_size,), init_rule='constant') 

    def forward(self, X, mode):
        out = self.conv(X=X, **self.params)
        out = layers.affine(out, self.params['w1'], self.params['b1'])
       
        # add a BN layer into fully conneted layers
        out, self.aux_params['running_mean'], self.aux_params['running_var'] = layers.batchnorm(\
            out, self.params['gamma1'], self.params['beta1'], running_mean=self.aux_params['running_mean'],\
            running_var=self.aux_params['running_var'])
       
        out = layers.relu(out)
        out = layers.affine(out, self.params['w2'], self.params['b2'])
        return out

    def loss(self, predict, y):
        loss_reg = 0
        for name, weight in self.params.iteritems():
            loss_reg += np.sum(weight**2)
        return layers.softmax_loss(predict, y) + loss_reg*reg*0.5

# Create model.
model = ConvolutionNet()
# Create data iterators for training and testing sets.
data = get_CIFAR10_data()
train_dataiter = NDArrayIter(data=data['X_train'],
                             label=data['y_train'],
                             batch_size=batch_size,
                             shuffle=True)
test_dataiter = NDArrayIter(data=data['X_test'],
                            label=data['y_test'],
                            batch_size=batch_size,
                            shuffle=False)
# Create solver.
solver = Solver(model,
                train_dataiter,
                test_dataiter,
                num_epochs=nepo,
                init_rule='gaussian',
                init_config={
                    'stdvar': 0.001
                },
                #update_rule='sgd_momentum',
            #update_rule='rmsprop',
                update_rule ='adam',
                optim_config={
                    'learning_rate': learning_rate
                },
                verbose=True,
                print_every=40)
# Initialize model parameters.
solver.init()
# Train!
solver.train()

(Iteration 1 / 1914) loss: 2.334938
(Iteration 41 / 1914) loss: 2.195728
(Iteration 81 / 1914) loss: 2.061162
(Iteration 121 / 1914) loss: 1.992989
(Iteration 161 / 1914) loss: 1.899949


KeyboardInterrupt: 