# Import Libraries

In [1]:
import numpy as np
import mxnet as mx
from mxnet import autograd
from mxnet import gluon
from mxnet import nd

mx.random.seed(1)

In [2]:
ctx = mx.cpu()

# Define a function to encode the integer to its binary representation

In [3]:
def binary_encode(i, num_digits):
    return np.array([i >> d & 1 for d in range(num_digits)])

# Define a function to label the data and map the labels back to categorical strings

In [4]:
def fizz_buzz_encode(i):
    if   i % 15 == 0: 
        return 0
    elif i % 5  == 0: 
        return 1
    elif i % 3  == 0: 
        return 2
    else:             
        return 3
    
def fizz_buzz(i, prediction):
    if prediction == 0:
        return "fizzbuzz"
    elif prediction == 1:
        return "buzz"
    elif prediction == 2:
        return "fizz"
    else:
        return str(i)

# Create the Numpy NdArray for training, validation and test data

In [5]:
MAX_NUMBER = 20000
NUM_DIGITS = np.log2(MAX_NUMBER).astype(np.int)+1
trainX = np.array([binary_encode(i, NUM_DIGITS) for i in range(101, np.int(MAX_NUMBER/2))])
trainY = np.array([fizz_buzz_encode(i)          for i in range(101, np.int(MAX_NUMBER/2))])
valX = np.array([binary_encode(i, NUM_DIGITS) for i in range(np.int(MAX_NUMBER/2), MAX_NUMBER)])
valY = np.array([fizz_buzz_encode(i)          for i in range(np.int(MAX_NUMBER/2), MAX_NUMBER)])
testX = np.array([binary_encode(i, NUM_DIGITS) for i in range(1, 101)])
testY = np.array([fizz_buzz_encode(i)          for i in range(1, 101)])

# Create mxnet NDarrayiter for training, validation and test data

In [6]:
batch_size = 100
num_inputs = NUM_DIGITS
num_outputs = 4
train_data = mx.io.NDArrayIter(trainX, trainY,
                               batch_size, shuffle=True)
val_data = mx.io.NDArrayIter(valX, valY,
                               batch_size, shuffle=True)
test_data = mx.io.NDArrayIter(testX, testY,
                              batch_size, shuffle=False)

# Lets define the function to calculate accuracy of a model

In [7]:
def evaluate_accuracy(data_iterator, net):
    acc = mx.metric.Accuracy()
    data_iterator.reset()
    for i, batch in enumerate(data_iterator):
        data = batch.data[0].as_in_context(ctx)
        label = batch.label[0].as_in_context(ctx)
        output = net(data)
        predictions = nd.argmax(output, axis=1)
        acc.update(preds=predictions, labels=label)
    return predictions,acc.get()[1]

# <span style="color:blue">Logistic Regression from Scratch

# Define the bias and weight matrix 

In [8]:
weight_scale = .01

W = nd.random_normal(shape=(num_inputs, num_outputs))
b = nd.random_normal(shape=num_outputs)

params = [W, b]

# Allocate space for each parameter's gradients.

In [9]:
for param in params:
    param.attach_grad()

# We shall pass our $yhat\_linear$ and compute the softmax and its log all at once inside the $softmax\_cross\_entropy$ loss function simultaneously

In [10]:
def softmax_cross_entropy(yhat_linear, y):
    return - nd.nansum(y * nd.log_softmax(yhat_linear), axis=0, exclude=True)

# Define the model

In [11]:
def net(X):
    y_linear = nd.dot(X, W) + b
    return y_linear

# Define the Optimizer

In [12]:
def SGD(params, lr):
    for param in params:
        param[:] = param - lr * param.grad

# Lets execute the training loops

In [13]:
epochs = 100
learning_rate = .01
smoothing_constant = .01

for e in range(epochs):
    train_data.reset()
    for i, batch in enumerate(train_data):
        data = batch.data[0].as_in_context(ctx)#.reshape((-1, num_inputs))
        label = batch.label[0].as_in_context(ctx)
        label_one_hot = nd.one_hot(label, 4)
        with autograd.record():
            output = net(data)
            loss = softmax_cross_entropy(output, label_one_hot)
        loss.backward()
        SGD(params, learning_rate)
        
        ##########################
        #  Keep a moving average of the losses
        ##########################
        curr_loss = nd.mean(loss).asscalar()
        moving_loss = (curr_loss if ((i == 0) and (e == 0)) 
                       else (1 - smoothing_constant) * moving_loss + (smoothing_constant) * curr_loss)

    _,test_accuracy = evaluate_accuracy(val_data, net)
    _,train_accuracy = evaluate_accuracy(train_data, net)
    print("Epoch %s. Loss: %s, Train_acc %s, Val_acc %s" %
          (e, moving_loss, train_accuracy, test_accuracy))

Epoch 0. Loss: 2.3163840174, Train_acc 0.533434343434, Val_acc 0.5334
Epoch 1. Loss: 1.59599629381, Train_acc 0.533434343434, Val_acc 0.5334
Epoch 2. Loss: 1.32759693647, Train_acc 0.533434343434, Val_acc 0.5334
Epoch 3. Loss: 1.22796101584, Train_acc 0.533434343434, Val_acc 0.5334
Epoch 4. Loss: 1.1910187941, Train_acc 0.533434343434, Val_acc 0.5334
Epoch 5. Loss: 1.17732768185, Train_acc 0.533434343434, Val_acc 0.5334
Epoch 6. Loss: 1.17225337721, Train_acc 0.533434343434, Val_acc 0.5334
Epoch 7. Loss: 1.17037177288, Train_acc 0.533434343434, Val_acc 0.5334
Epoch 8. Loss: 1.16967331976, Train_acc 0.533434343434, Val_acc 0.5334
Epoch 9. Loss: 1.1694135974, Train_acc 0.533434343434, Val_acc 0.5334
Epoch 10. Loss: 1.16931672309, Train_acc 0.533434343434, Val_acc 0.5334
Epoch 11. Loss: 1.16928037754, Train_acc 0.533434343434, Val_acc 0.5334
Epoch 12. Loss: 1.16926663325, Train_acc 0.533434343434, Val_acc 0.5334
Epoch 13. Loss: 1.16926133941, Train_acc 0.533434343434, Val_acc 0.5334
Epoch

# Lets see what the model predicts

In [14]:
predictions,test_accuracy = evaluate_accuracy(test_data, net)
output = np.vectorize(fizz_buzz)(np.arange(1, 101), predictions.asnumpy().astype(np.int))
print(output)
print("Test Accuracy : ",test_accuracy)

['1' '2' '3' '4' '5' '6' '7' '8' '9' '10' '11' '12' '13' '14' '15' '16'
 '17' '18' '19' '20' '21' '22' '23' '24' '25' '26' '27' '28' '29' '30' '31'
 '32' '33' '34' '35' '36' '37' '38' '39' '40' '41' '42' '43' '44' '45' '46'
 '47' '48' '49' '50' '51' '52' '53' '54' '55' '56' '57' '58' '59' '60' '61'
 '62' '63' '64' '65' '66' '67' '68' '69' '70' '71' '72' '73' '74' '75' '76'
 '77' '78' '79' '80' '81' '82' '83' '84' '85' '86' '87' '88' '89' '90' '91'
 '92' '93' '94' '95' '96' '97' '98' '99' '100']
Test Accuracy :  0.53


# <span style="color:blue">MultiLayer Perceptron using Gluon

# Lets reset the Training, Validation and the Test data

In [15]:
train_data.reset()
val_data.reset()
test_data.reset()

# Define the Gluon Sequestial Model

In [16]:
num_hidden = 256
net = gluon.nn.Sequential()
with net.name_scope():
    net.add(gluon.nn.Dense(num_inputs, activation="relu"))
    net.add(gluon.nn.Dense(num_hidden, activation="relu"))
    net.add(gluon.nn.Dense(num_hidden, activation="relu"))
    net.add(gluon.nn.Dense(num_outputs))

# Initialize Parameter

In [17]:
net.collect_params().initialize(mx.init.Xavier(magnitude=2.24), ctx=ctx)

# Softmax Cross Entropy Loss

In [18]:
loss = gluon.loss.SoftmaxCrossEntropyLoss()

# Stochastic Gradient Descent Optimizer

In [19]:
trainer = gluon.Trainer(net.collect_params(), 'sgd', {'learning_rate': .01,'momentum':0.9})



# Lets Train the MLP model 

In [20]:
epochs = 100
moving_loss = 0.

for e in range(epochs):
    train_data.reset()
    for i, batch in enumerate(train_data):
        data = batch.data[0].as_in_context(ctx)
        label = batch.label[0].as_in_context(ctx)
        with autograd.record():
            output = net(data)
            cross_entropy = loss(output, label)
            cross_entropy.backward()
        trainer.step(data.shape[0])
        if i == 0:
            moving_loss = nd.mean(cross_entropy).asscalar()
        else:
            moving_loss = .99 * moving_loss + .01 * nd.mean(cross_entropy).asscalar()

    _,val_accuracy = evaluate_accuracy(val_data, net)
    _,train_accuracy = evaluate_accuracy(train_data, net)
    print("Epoch %s. Loss: %s, Train_acc %s, Val_acc %s" %
          (e, moving_loss, train_accuracy, val_accuracy))

Epoch 0. Loss: 1.25437421571, Train_acc 0.533434343434, Val_acc 0.5334
Epoch 1. Loss: 1.15721442045, Train_acc 0.533434343434, Val_acc 0.5334
Epoch 2. Loss: 1.15406116567, Train_acc 0.533434343434, Val_acc 0.5334
Epoch 3. Loss: 1.15232871223, Train_acc 0.533434343434, Val_acc 0.5334
Epoch 4. Loss: 1.15116667802, Train_acc 0.533434343434, Val_acc 0.5334
Epoch 5. Loss: 1.15035152163, Train_acc 0.533434343434, Val_acc 0.5334
Epoch 6. Loss: 1.14974839881, Train_acc 0.533434343434, Val_acc 0.5334
Epoch 7. Loss: 1.14925333898, Train_acc 0.533434343434, Val_acc 0.5334
Epoch 8. Loss: 1.1488747834, Train_acc 0.533434343434, Val_acc 0.5334
Epoch 9. Loss: 1.14854265592, Train_acc 0.533434343434, Val_acc 0.5334
Epoch 10. Loss: 1.1482677152, Train_acc 0.533434343434, Val_acc 0.5334
Epoch 11. Loss: 1.14803200135, Train_acc 0.533434343434, Val_acc 0.5334
Epoch 12. Loss: 1.14777964218, Train_acc 0.533434343434, Val_acc 0.5334
Epoch 13. Loss: 1.14758265618, Train_acc 0.533434343434, Val_acc 0.5334
Epoc

# Lets see what the model predicts

In [21]:
predictions,test_accuracy = evaluate_accuracy(test_data, net)
output = np.vectorize(fizz_buzz)(np.arange(1, 101), predictions.asnumpy().astype(np.int))
print(output)
print("Test Accuracy : ",test_accuracy)

['1' '2' 'fizz' '4' '5' 'fizz' '7' '8' 'fizz' '10' '11' 'fizz' '13' '14'
 'fizz' '16' '17' 'fizz' '19' '20' 'fizz' '22' '23' 'fizz' '25' '26' 'fizz'
 '28' '29' 'fizz' '31' '32' 'fizz' '34' '35' 'fizz' '37' '38' 'fizz' '40'
 '41' 'fizz' '43' '44' 'fizz' '46' '47' 'fizz' '49' '50' 'fizz' '52' '53'
 'fizz' '55' '56' 'fizz' '58' '59' '60' '61' '62' 'fizz' '64' '65' 'fizz'
 '67' '68' 'fizz' '70' '71' 'fizz' '73' '74' 'fizz' '76' '77' 'fizz' '79'
 '80' 'fizz' '82' '83' 'fizz' '85' '86' 'fizz' '88' '89' 'fizz' '91' '92'
 'fizz' '94' '95' 'fizz' '97' '98' 'fizz' '100']
Test Accuracy :  0.8


# <span style="color:blue">CNN using mxnet symbol

# Lets reshape the data (x_dim,y_dim) &rarr; (x_dim,#of channels = 1,y_dim)

In [22]:
trainX= trainX.reshape(trainX.shape[0],1,trainX.shape[1])
valX= valX.reshape(valX.shape[0],1,valX.shape[1])
testX= testX.reshape(testX.shape[0],1,testX.shape[1])

# Prepare the NDArrayIters corresponding to Training, Testing and Validation data

In [23]:
train_data = mx.io.NDArrayIter(trainX, trainY,
                               batch_size, shuffle=True)
val_data = mx.io.NDArrayIter(valX, valY,
                               batch_size, shuffle=True)
test_data = mx.io.NDArrayIter(testX, testY,
                              batch_size, shuffle=False)

# Define the CNN Model

In [24]:
data = mx.sym.var('data')
# first conv layer
conv1 = mx.sym.Convolution(data=data, kernel=(2,), num_filter=20)
tanh1 = mx.sym.Activation(data=conv1, act_type="relu")
pool1 = mx.sym.Pooling(data=tanh1, pool_type="max", kernel=(2,), stride=(2,))
# second conv layer
conv2 = mx.sym.Convolution(data=pool1, kernel=(2,), num_filter=50)
tanh2 = mx.sym.Activation(data=conv2, act_type="relu")
pool2 = mx.sym.Pooling(data=tanh2, pool_type="max", kernel=(2,), stride=(2,))
# first fullc layer
flatten = mx.sym.flatten(data=pool2)
fc1 = mx.symbol.FullyConnected(data=flatten, num_hidden=500)
tanh3 = mx.sym.Activation(data=fc1, act_type="relu")
# second fullc
fc2 = mx.sym.FullyConnected(data=tanh3, num_hidden=num_outputs)
# softmax loss
lenet = mx.sym.SoftmaxOutput(data=fc2, name='softmax')
cnn_model = mx.mod.Module(symbol=lenet, context=mx.cpu())



# Train the CNN Model

In [25]:
cnn_model.fit(train_data,
                eval_data=val_data,
                optimizer='sgd',
                optimizer_params={'learning_rate':0.01,'momentum':0.9},
                eval_metric='acc',
                num_epoch=100)

# Lets see what the model predicts

In [26]:
acc = mx.metric.Accuracy()
cnn_model.score(test_data, acc)
probabilities = cnn_model.predict(test_data)
predictions = nd.argmax(probabilities, axis=1)
output = np.vectorize(fizz_buzz)(np.arange(1, 101), predictions.asnumpy().astype(np.int))
print(output)
print("Test Accuracy : ",acc.get_name_value()[0][1])

['1' '2' '3' '4' '5' '6' '7' '8' '9' '10' '11' '12' '13' '14' '15' '16'
 '17' '18' '19' '20' '21' '22' '23' '24' '25' '26' '27' '28' '29' '30' '31'
 '32' '33' '34' '35' '36' '37' '38' '39' '40' '41' '42' '43' '44' '45' '46'
 '47' '48' '49' '50' '51' '52' '53' '54' '55' '56' '57' '58' '59' '60' '61'
 '62' '63' '64' '65' '66' '67' '68' '69' '70' '71' '72' '73' '74' '75' '76'
 '77' '78' '79' '80' '81' '82' '83' '84' '85' '86' '87' '88' '89' '90' '91'
 '92' '93' '94' '95' '96' '97' '98' '99' '100']
Test Accuracy :  0.53
