In [10]:
import torch
import torch.nn as nn
import torchvision.transforms as transforms
import torchvision.datasets as datasets
from torch.autograd import Variable
import matplotlib.pyplot as plt

from keras.utils import np_utils #funny

  from ._conv import register_converters as _register_converters
Using TensorFlow backend.


In [2]:
#import other necessary modules
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
from sklearn.model_selection import train_test_split

%matplotlib inline

In [3]:
#load datasets
#read training and test sets
data = np.loadtxt('data/train.csv', delimiter = ',', skiprows = 1)
test = np.loadtxt('data/test.csv', delimiter = ',', skiprows = 1)

In [79]:
#set features and labels (arrays)
#There's no need to set default filter layer in pytorch
X_train = data[:, 1:].reshape((-1,1,28,28)).astype(float) #m, c, w, h??????
Y_train = data[:, 0].reshape((-1,1))
X_test = test.reshape((-1,1,28,28)).astype(float)

In [80]:
#train test split
X_train, X_dev, Y_train, Y_dev = train_test_split(X_train,
                                                   Y_train,
                                                   test_size = 0.025,
                                                   random_state = 1024,
                                                   stratify = Y_train)

In [86]:
#Normalization
X_train /= 255.
X_dev /= 255.
X_test /= 255.
Y_train = np_utils.to_categorical(Y_train, 10)
Y_dev = np_utils.to_categorical(Y_dev, 10)

In [82]:
class CNNModel(nn.Module):
    def __init__(self):
        super(CNNModel, self).__init__()
        # conv 1                      # (1,28,28)
        self.conv1 = nn.Conv2d(in_channels=1, out_channels=6, kernel_size=5, stride=1, padding=2)
        self.relu1 = nn.ReLU()        # (6,28,28)  28+4-5+1
        # max 1
        self.maxpool1 = nn.MaxPool2d(kernel_size=2)
                                      # (6,14,14)
        # conv 2
        self.conv2 = nn.Conv2d(in_channels=6, out_channels=16, kernel_size=5, stride=1, padding=0)
        self.relu2 = nn.ReLU()        # (16,10,10) 
        # max 2
        self.maxpool2 = nn.MaxPool2d(kernel_size=2)
                                      # (16, 5, 5)
        #conv 3
        self.conv3 = nn.Conv2d(in_channels=16, out_channels=64, kernel_size=5, stride=1, padding=0)
        self.relu3 = nn.ReLU()        # (64,1,1) 
        # conv 4
        self.conv4 = nn.Conv2d(in_channels=64, out_channels = 120, kernel_size = 1, stride = 1, padding = 0)
        self.relu4 = nn.ReLU()

        # fully connected layer
        self.fc1 = nn.Linear(120, 64)
        self.relu5 = nn.ReLU()
        self.fc2 = nn.Linear(64, 10)
        #self.sfx = nn.Softmax()

    def forward(self, x):
        # conv 1
        x = self.conv1(x)
        x = self.relu1(x)
        x = self.maxpool1(x)
        # conv 2
        x = self.conv2(x)
        x = self.relu2(x)
        x = self.maxpool2(x)
        # conv 3
        x = self.conv3(x)
        x = self.relu3(x)
        # conv 4
        x = self.conv4(x)
        x = self.relu4(x)

        # flatten
        x = x.view(x.size(0), -1)
        x = self.fc1(x)
        out = self.fc2(x)
        #out = self.sfx(out)

        return out

In [83]:
def random_mini_batches(X, Y, mini_batch_size = 64, seed = 0):
    """
    Creates a list of random minibatches from (X, Y)
    
    Arguments:
    X -- input data, of shape (input size, number of examples) (m, Hi, Wi, Ci)
    Y -- true "label" vector (containing 0 if cat, 1 if non-cat), of shape (1, number of examples) (m, n_y)
    mini_batch_size - size of the mini-batches, integer
    seed -- this is only for the purpose of grading, so that you're "random minibatches are the same as ours.
    
    Returns:
    mini_batches -- list of synchronous (mini_batch_X, mini_batch_Y)
    """
    
    m = X.shape[0]                  # number of training examples
    mini_batches = []
    np.random.seed(seed)
    
    # Step 1: Shuffle (X, Y)
    permutation = list(np.random.permutation(m))
    shuffled_X = X[permutation,:,:,:]
    shuffled_Y = Y[permutation,:]

    # Step 2: Partition (shuffled_X, shuffled_Y). Minus the end case.
    num_complete_minibatches = int(m/mini_batch_size) # number of mini batches of size mini_batch_size in your partitionning
    for k in range(0, num_complete_minibatches):
        mini_batch_X = shuffled_X[k * mini_batch_size : k * mini_batch_size + mini_batch_size,:,:,:]
        mini_batch_Y = shuffled_Y[k * mini_batch_size : k * mini_batch_size + mini_batch_size,:]
        mini_batch = (mini_batch_X, mini_batch_Y)
        mini_batches.append(mini_batch)
    
    # Handling the end case (last mini-batch < mini_batch_size)
    if m % mini_batch_size != 0:
        mini_batch_X = shuffled_X[num_complete_minibatches * mini_batch_size : m,:,:,:]
        mini_batch_Y = shuffled_Y[num_complete_minibatches * mini_batch_size : m,:]
        mini_batch = (mini_batch_X, mini_batch_Y)
        mini_batches.append(mini_batch)
    
    return mini_batches

In [87]:
model = CNNModel()
loss_func = nn.MSELoss()
learning_rate = 0.01
optimizer = torch.optim.Adam(model.parameters(), lr=learning_rate)

In [89]:
iters = 0
num_epochs = 1
for epoch in range(num_epochs):
    seeds = 1
    mini_batches = random_mini_batches(X_train, Y_train, mini_batch_size = 64, seed = seeds)
    for minibatch in mini_batches:
        (minibatch_X, minibatch_Y) = minibatch
        #print(minibatch_X[0].shape)
        # convert np to tensor
        x = torch.FloatTensor(minibatch_X)
        y = torch.FloatTensor(minibatch_Y)
        # convert tensor to Variable
        images = Variable(x)
        labels = Variable(y)

        # clear gradients w.r.t parameters
        optimizer.zero_grad()

        # forward pass
        outputs = model(images)
        
        #print(outputs.data.numpy()[0])
        #print(labels.data.numpy()[0])

        # calculate loss
        loss = loss_func(outputs, labels)

        # get gradient w.r.t parameters
        loss.backward()

        # update parameters
        optimizer.step()
        
