In [1]:
import numpy as np
import csv
import matplotlib.pyplot as plt
import time 
import os, sys  
import string   
import math
import copy

import torch
import torch.nn as nn
import torch.nn.functional as F
from torch.autograd import Variable



In [2]:
def display_num_param(net):
    nb_param = 0
    for param in net.parameters():
        nb_param += param.numel()
    print('There are {} ({:.2f} million) parameters in this neural network'.format(
        nb_param, nb_param/1e6)
         )


def get_error( scores , labels ):
    
    scores = torch.transpose(scores, 0, 1)  ### num of words, seq_lenght if each word, 26
    
    bs=scores.size(0)
    
    predicted_labels = scores.argmax(dim=2)[0]
    
    indicator = torch.all(torch.eq(predicted_labels, labels))

    num_matches=indicator.sum()
    
    return 1-num_matches.float()/bs    


def csv2images(fileStr):
    dataStr = csv.reader(open(fileStr), delimiter='\n', quotechar='|')
    data = []
    next(dataStr)
    for row in dataStr:
        eachRow = ','.join(row);
        rowArr = list(map(int, eachRow.split(',')));
        data.append(rowArr)
    
    return data

In [3]:
train_images = csv2images('sign_mnist_train.csv')
test_images = csv2images('sign_mnist_test.csv')

print("loading data...")
trainWordstrainImages = torch.load('trainWordstrainImages.pt')
testWordstestImages = torch.load('testWordstestImages.pt')
print("done")

loading data...
done


In [4]:
alphas = list(string.ascii_lowercase)

trainWordstrainImages_data = []   #### this is the data we will use for training CSLTM
trainWordstrainImages_labels = []

testWordstestImages_data = []  
testWordstestImages_labels = []
    
def get_data():

    for wim in trainWordstrainImages[:40000]:

        word_imgs = torch.tensor( [ train_images[i][1:] for i in wim[1] ] )  ###fetch image data in sequence
        word_imgs = word_imgs.reshape(word_imgs.shape[0], 28, 28).float()  ### reshape to 28 x 28 images in sequence
        word_imgs /= 255            #### make float!!

        img_labels = [ train_images[i][0] for i in wim[1] ] 

        trainWordstrainImages_data.append(word_imgs)
        trainWordstrainImages_labels.append(torch.tensor(img_labels))
        
    
    
    for wim in testWordstestImages[:4000]:

        word_imgs = torch.tensor( [ test_images[i][1:] for i in wim[1] ] )  ###fetch image data in sequence
        word_imgs = word_imgs.reshape(word_imgs.shape[0], 28, 28).float()  ### reshape to 28 x 28 images in sequence
        word_imgs /= 255            #### make float!!

        img_labels = [ test_images[i][0] for i in wim[1] ] 

        testWordstestImages_data.append(word_imgs)
        testWordstestImages_labels.append(torch.tensor(img_labels))


get_data()

In [5]:
print(len(trainWordstrainImages_data), len(testWordstestImages_data))

idx = 33
print(trainWordstrainImages_data[idx].shape, trainWordstrainImages_labels[idx].shape)
print(testWordstestImages_data[idx].shape, testWordstestImages_labels[idx].shape)

40000 4000
torch.Size([8, 28, 28]) torch.Size([8])
torch.Size([11, 28, 28]) torch.Size([11])


In [6]:
class CLSTM(nn.Module):
    
    def __init__(self, h_dim, classes, bi_dir=False):
        
        super(CLSTM, self).__init__()

        self.hidden_dim = h_dim
        
        # CL1:   28 x 28  -->    64 x 28 x 28 
        self.conv1 = nn.Conv2d(1, 64, kernel_size=3, padding=1)
        
        # MP1: 64 x 28 x 28 -->    64 x 14 x 14
        self.pool1  = nn.MaxPool2d(2,2)
        
        # CL2:   64 x 14 x 14  -->    64 x 14 x 14 
        self.conv2 = nn.Conv2d(64,  64,  kernel_size=3, padding=1 )
        
        # MP2: 64 x 14 x 14  -->    64 x 7 x 7
        self.pool2 = nn.MaxPool2d(2,2)
        
        # CL3:   64 x 7 x 7  -->    64 x 7 x 7 
        self.conv3 = nn.Conv2d(64,  64,  kernel_size=3, padding=1 )
        
        self.to_pad = 0
        # MP3: 64 x 7 x 7  -->    64 x 4 x 4 or 64 x 3 x 3, depending on padding
        self.pool3 = nn.MaxPool2d(2,2, padding=self.to_pad)
        
        if self.to_pad:
            # LL1:   64 x 4 x 4 = 1024 -->  128 
            self.linear1 = nn.Linear(1024, 128)
        else:
            # LL1:   64 x 3 x 3 = 576 -->  128 
            self.linear1 = nn.Linear(576, 128)
        
        
        #self.dropout1 = nn.Dropout(0.5)
        self.dropout1 = nn.Dropout(0.7)
        
        #self.dropout2 = nn.Dropout(0.4)
        self.dropout2 = nn.Dropout(0.7)
        
        # LSTM
        self.lstm_in_dim = 128
        self.lstm = nn.LSTM(self.lstm_in_dim, self.hidden_dim, bidirectional=bi_dir)

        # linear
        self.hidden2label1 = nn.Linear( self.hidden_dim*(1+int(bi_dir)), classes )
        
        

    def forward(self, x, h_init, c_init):
        
        x = self.conv1(x)
        x = F.relu(x)

        x = self.pool1(x)
         
        x = self.conv2(x)
        x = F.relu(x)
        
        x = self.pool2(x)
 
        x = self.conv3(x)
        x = F.relu(x)
        
        # MP3: 64 x 7 x 7  -->    64 x 4 x 4 or 64 x 3 x 3, depending on padding
        x = self.pool3(x)
        
        if self.to_pad: x = x.view(-1, 1024)   ### reshape
        else: x = x.view(-1, 576)
        x = self.linear1(x)
        x = F.relu(x)     
        
        # Droput
        x = self.dropout1(x)

        cnn_x = F.relu(x) 
        
        # LSTM
        g_seq = cnn_x.unsqueeze(dim=1)
        lstm_out, (h_final, c_final) = self.lstm(g_seq, (h_init, c_init))
        
        # Droput
        lstm_out = self.dropout2(lstm_out)
        
        # linear
        cnn_lstm_out = self.hidden2label1(lstm_out)  ###activtions are implicit

        # output
        scores = cnn_lstm_out

        return scores, h_final, c_final

In [10]:
if torch.cuda.is_available():
    device= torch.device("cuda")
else:
    device= torch.device("cpu")

classes = 26
hidden_dim_of_lstm1 = 256

bi_dir = True
clstm = CLSTM(hidden_dim_of_lstm1, classes, bi_dir)
num_lstm_layers = 1;

if device.type == "cuda":
    clstm = clstm.cuda()

print(display_num_param(clstm))

my_lr = 0.04
criterion=nn.CrossEntropyLoss()

There are 952218 (0.95 million) parameters in this neural network
None


In [11]:
def evaluation(eval_net, typ=None):
    
    
    test_data = testWordstestImages_data
    test_labels = testWordstestImages_labels
        
        
    num_words = len(test_data)
    num_words = 2000
    #print('num of test data: ', num_words, '\n')

    start=time.time()

    #clear the loss for every epoch 
    total_loss = 0
    running_loss = 0
    running_error = 0
    num_batches = 0
    
    for wrd in range(num_words):
        
        input_tensor = test_data[wrd].unsqueeze(dim=1)
        target_tensor= test_labels[wrd]
        
        if device.type == "cuda":
            input_tensor = input_tensor.cuda()
            target_tensor = target_tensor.cuda()
        
        #initial hidden states
        h = torch.zeros((1+int(bi_dir))*num_lstm_layers, 1, hidden_dim_of_lstm1) ### if bi_dir=True, first dimension is 2 for bi-directional
        c = torch.zeros((1+int(bi_dir))*num_lstm_layers, 1, hidden_dim_of_lstm1)

        h = h.to(device)
        c = c.to(device)

        out, h, c = eval_net(input_tensor, h, c)
        
        running_error += get_error( out.detach() , target_tensor).item()
        
        out =   out.view(  len(input_tensor) , classes)
        

        running_loss += criterion(out, target_tensor).item()        
        

        num_batches += 1
        
    total_loss = running_loss/num_batches
    total_error = running_error/num_batches
    elapsed = time.time()-start
   
    print('\n::::::::::::::::::::::::EVAL::::::::::::::::::::::::\n ', '\t exp(loss)=', math.exp(total_loss),'\t (loss)=' , (total_loss), 
                                                                      '\t error=', total_error*100 ,'percent')
    print("::::::::::::::::::::::::::::::::::::::::::::::::::::")
    

In [None]:
num_words = len(trainWordstrainImages_data)
num_words = 20000
print('Num of train data: ', num_words, '\n')

#shuffled_indices = torch.randperm(num_words)

start=time.time()
for epoch in range(1000):
    
    if (epoch+1) %10==0:
        my_lr = my_lr / 1.2
    
    # optimizer
    optimizer = torch.optim.SGD(clstm.parameters(), lr=my_lr)
    
    #clear the loss for every epoch 
    total_loss = 0
    running_loss = 0
    running_error = 0
    num_batches = 0
    
    for wrd in range(num_words):
    #for wrd in shuffled_indices:
        
        input_tensor = trainWordstrainImages_data[wrd].unsqueeze(dim=1)
        target_tensor= trainWordstrainImages_labels[wrd]
        
        if device.type == "cuda":
            input_tensor = input_tensor.cuda()
            target_tensor = target_tensor.cuda()
        
        #initial hidden states
        h = torch.zeros((1+int(bi_dir))*num_lstm_layers, 1, hidden_dim_of_lstm1) ### if bi_dir=True, first dimension is 2 for bi-directional
        c = torch.zeros((1+int(bi_dir))*num_lstm_layers, 1, hidden_dim_of_lstm1)


        h = h.to(device)
        c = c.to(device)

        # set the gradient values to zero 
        optimizer.zero_grad()
        encoder_outs=[]


        out, h, c = clstm(input_tensor, h, c)
        
        running_error += get_error( out.detach() , target_tensor).item()
        
        out =   out.view(  len(input_tensor) , classes)
 
        loss = criterion(out, target_tensor)
       
        loss.backward()
        
        optimizer.step()
    
        running_loss += loss.item()       
        
        
        num_batches += 1
        
    total_loss = running_loss/num_batches
    total_error = running_error/num_batches
    elapsed = time.time()-start
   
    print('TRAIN::: ', 'epoch=',epoch, '\t time=', elapsed,'\t lr=', my_lr, '\t exp(loss)=', math.exp(total_loss),'\t (loss)=' , (total_loss), 
                                                                      '\t error=', total_error*100 ,'percent')
    
    if (epoch+1)%10==0:
        #eval_net = copy.deepcopy(clstm).eval()   ### this should be in evaluation mode
        eval_net = clstm.eval()
        evaluation(eval_net)
        torch.save(clstm, 'CLSTM.pth')
        clstm = clstm.train()
    
    print('\n')

Num of train data:  20000 

TRAIN:::  epoch= 0 	 time= 121.95036435127258 	 lr= 0.04 	 exp(loss)= 1.9661237427904572 	 (loss)= 0.6760639611432598 	 error= 58.24 percent


TRAIN:::  epoch= 1 	 time= 243.31008505821228 	 lr= 0.04 	 exp(loss)= 1.3678195645864681 	 (loss)= 0.31321791326903986 	 error= 33.055 percent


TRAIN:::  epoch= 2 	 time= 365.16282296180725 	 lr= 0.04 	 exp(loss)= 1.094389691086841 	 (loss)= 0.09019684814774188 	 error= 15.620000000000001 percent


TRAIN:::  epoch= 3 	 time= 486.6857125759125 	 lr= 0.04 	 exp(loss)= 1.0428794451324424 	 (loss)= 0.04198558461379817 	 error= 9.17 percent


TRAIN:::  epoch= 4 	 time= 607.9333639144897 	 lr= 0.04 	 exp(loss)= 1.0228241251896713 	 (loss)= 0.022567551553735434 	 error= 5.36 percent


TRAIN:::  epoch= 5 	 time= 729.4032871723175 	 lr= 0.04 	 exp(loss)= 1.016567503790954 	 (loss)= 0.016431759939343923 	 error= 3.8350000000000004 percent


TRAIN:::  epoch= 6 	 time= 851.0047750473022 	 lr= 0.04 	 exp(loss)= 1.0113245997830922

  "type " + obj.__name__ + ". It won't be checked "




TRAIN:::  epoch= 10 	 time= 1341.3444292545319 	 lr= 0.03333333333333333 	 exp(loss)= 1.0042982114420624 	 (loss)= 0.004289000515503158 	 error= 1.055 percent


TRAIN:::  epoch= 11 	 time= 1462.8457388877869 	 lr= 0.03333333333333333 	 exp(loss)= 1.0034169935099084 	 (loss)= 0.0034111688523583367 	 error= 0.89 percent


TRAIN:::  epoch= 12 	 time= 1583.9690253734589 	 lr= 0.03333333333333333 	 exp(loss)= 1.0028422092722744 	 (loss)= 0.00283817783249117 	 error= 0.75 percent


TRAIN:::  epoch= 13 	 time= 1704.8097331523895 	 lr= 0.03333333333333333 	 exp(loss)= 1.0027489541558205 	 (loss)= 0.0027451826914858396 	 error= 0.715 percent


TRAIN:::  epoch= 14 	 time= 1825.9969408512115 	 lr= 0.03333333333333333 	 exp(loss)= 1.002403970651429 	 (loss)= 0.0024010857365579868 	 error= 0.5700000000000001 percent


TRAIN:::  epoch= 15 	 time= 1947.2077431678772 	 lr= 0.03333333333333333 	 exp(loss)= 1.0021473389168387 	 (loss)= 0.0021450366798265734 	 error= 0.585 percent


TRAIN:::  epoch= 16