In [0]:
#Importing required libraries
%matplotlib inline

import torch
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim
from torchvision import datasets, transforms
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sn
from tabulate import tabulate
import time
import matplotlib.pyplot as plt
from torch.autograd import Variable

cuda = torch.cuda.is_available()

torch.manual_seed(42)
if cuda:
    torch.cuda.manual_seed(42)

In [24]:
#Downloading the cifar-10 datasets and normalization
batch_size = 100

train_loader = torch.utils.data.DataLoader(
    datasets.CIFAR10('data', train=True, download=True,
                   transform=transforms.Compose([
                       transforms.ToTensor(),
                       transforms.Normalize((0,), (1,))
                   ])),
    batch_size=batch_size, shuffle=True)

test_loader = torch.utils.data.DataLoader(
    datasets.CIFAR10('data', train=False, transform=transforms.Compose([
                       transforms.ToTensor(),
                       transforms.Normalize((0,), (1,))
                   ])),
    batch_size=batch_size, shuffle=True)

class_name=['airplane','automobile','bird','cat','deer','dog','frog','horse','ship','truck']

array = [['Model','Final Accuracy','Convergence time(sec)']]

depth_array = [['Number of Layers','Final Accuracy','Convergence time(sec)']]

Files already downloaded and verified


In [0]:
#Network with Droupout regularization implementated using relu and 3 layers
class Net(nn.Module):
    def __init__(self):
        super(Net, self).__init__()
        self.fc1 = nn.Linear(3*32*32, 2000)
        self.fc1_drop = nn.Dropout(0.2)
        self.fc2 = nn.Linear(2000, 200)
        self.fc2_drop = nn.Dropout(0.2)
        self.fc3 = nn.Linear(200, 10)

    def forward(self, x):
        x = x.view(-1, 3*32*32)
        x = torch.relu(self.fc1(x))
        x = self.fc1_drop(x)
        x = torch.relu(self.fc2(x))
        x = self.fc2_drop(x)
        return F.log_softmax(self.fc3(x),dim=1)
    

model = Net()
if cuda:
    model.cuda()
    
    
#Traning network with dropout regularization
def train(epoch, log_interval=1000):
    model.train()
    for batch_idx, (data, target) in enumerate(train_loader):
        if cuda:
            data, target = data.cuda(), target.cuda()
        optimizer.zero_grad()
        output = model(data)
        loss = F.nll_loss(output, target)
        loss.backward()
        optimizer.step()
        if batch_idx % log_interval == 0:
            print('Train Epoch: {} [{}/{} ({:.0f}%)]\tLoss: {:.6f}'.format(
                epoch, batch_idx * len(data), len(train_loader.dataset),
                100. * float(batch_idx) / len(train_loader), loss.item()))
            
#Testing network with dropout regularization
def test(loss_vector, accuracy_vector):
    model.eval()
    test_loss, correct = 0, 0
    for data, target in test_loader:
        if cuda:
            data, target = data.cuda(), target.cuda()
        data.requires_grad_(False)
        target.requires_grad_(False)
        output = model(data)
        test_loss += F.nll_loss(output, target).item()
        pred = output.data.max(1)[1] 
        correct += pred.eq(target.data).cpu().sum().float()
        for i in range(50):
                  confusion_matrix[pred[i]][target[i]]+=1
    test_loss /= len(test_loader)
    loss_vector.append(test_loss)
    
    accuracy = 100. * correct / float(len(test_loader.dataset))
    accuracy_vector.append(accuracy)
    
    print('\nTest set: Average loss: {:.4f}, Accuracy: {}/{} ({:.2f}%)\n'.format(
        test_loss, correct, len(test_loader.dataset), float(accuracy)))

In [0]:
#SGD optimizer with Dropout regularization
arr=['SGD with Dropout']

#creating confusion matrix
confusion_matrix=np.array([[0 for x in range(10)] for y in range(10)])
confusion_matrix.astype(int)

optimizer = optim.SGD(model.parameters(), lr=0.01, momentum=0.5)
epochs = 10

lossv, accv = [], []
start=time.time()
for epoch in range(1, epochs + 1):
    train(epoch)
    test(lossv, accv)
end=time.time()
df_cm = pd.DataFrame(confusion_matrix, index = [i for i in class_name],
                  columns = [i for i in class_name])

#plot confusion matrix for this network 
plt.figure(figsize = (20,10))
x=sn.heatmap(df_cm, annot=True)

arr.append(round(float(accv[-1]),2))
arr.append(round(float(end-start),2))
array.append(arr)

In [0]:
#ADAM optimizer with Dropout regularization
arr=['Adam with Dropout']
confusion_matrix=np.array([[0 for x in range(10)] for y in range(10)])
confusion_matrix.astype(int)
optimizer = optim.Adam(model.parameters(), lr=0.01)
epochs = 10

lossv, accv = [], []

start=time.time()
for epoch in range(1, epochs + 1):
    train(epoch)
    test(lossv, accv)
end=time.time()

df_cm = pd.DataFrame(confusion_matrix, index = [i for i in class_name],
                  columns = [i for i in class_name])
#plot confusion matrix for this network 
plt.figure(figsize = (20,10))
x=sn.heatmap(df_cm, annot=True)

arr.append(round(float(accv[-1]),2))
arr.append(round(float(end-start),2))
array.append(arr)

In [0]:
#Adagrad optimizer with Dropout regularization
arr=['Adagrad with Dropout']
confusion_matrix=np.array([[0 for x in range(10)] for y in range(10)])
confusion_matrix.astype(int)
optimizer = optim.Adagrad(model.parameters(), lr=0.01, lr_decay=0.001, weight_decay=0.01, initial_accumulator_value=0)
epochs = 10

lossv, accv = [], []

start=time.time()
for epoch in range(1, epochs + 1):
    train(epoch)
    test(lossv, accv)
end=time.time()

df_cm = pd.DataFrame(confusion_matrix, index = [i for i in class_name],
                  columns = [i for i in class_name])
#plot confusion matrix for this network 
plt.figure(figsize = (20,20))
x=sn.heatmap(df_cm, annot=True)

arr.append(round(float(accv[-1]),2))
arr.append(round(float(end-start),2))
array.append(arr)

In [0]:
#Adadelta optimizer with Dropout regularization
arr=['Adadelta with Dropout']
confusion_matrix=np.array([[0 for x in range(10)] for y in range(10)])
confusion_matrix.astype(int)
optimizer = optim.Adadelta(model.parameters(), lr=1.0, rho=0.9, eps=1e-06, weight_decay=0.1)
epochs = 10

lossv, accv = [], []
start=time.time()
for epoch in range(1, epochs + 1):
    train(epoch)
    test(lossv, accv)
end=time.time()
df_cm = pd.DataFrame(confusion_matrix, index = [i for i in class_name],
                  columns = [i for i in class_name])

#plot confusion matrix for this network 
plt.figure(figsize = (20,10))
x=sn.heatmap(df_cm, annot=True)

arr.append(round(float(accv[-1]),2))
arr.append(round(float(end-start),2))
array.append(arr)

In [0]:
#RMSprop optimizer with Dropout regularization
arr=['RMSprop with dropout']
confusion_matrix=np.array([[0 for x in range(10)] for y in range(10)])
confusion_matrix.astype(int)
optimizer = optim.RMSprop(model.parameters(), lr=0.01, alpha=0.99, eps=1e-08, weight_decay=0.1, momentum=0.9, centered=True)
epochs = 10

lossv, accv = [], []

start=time.time()
for epoch in range(1, epochs + 1):
    train(epoch)
    test(lossv, accv)
end=time.time()

df_cm = pd.DataFrame(confusion_matrix, index = [i for i in class_name],
                  columns = [i for i in class_name])
#plot confusion matrix for this network 
plt.figure(figsize = (20,10))
x=sn.heatmap(df_cm, annot=True)

arr.append(round(float(accv[-1]),2))
arr.append(round(float(end-start),2))
array.append(arr)

In [0]:
#Network with L2 regularizer using tanh and sigmoid with 3 layers
class Net_L2(nn.Module):
    def __init__(self):
        super(Net_L2, self).__init__()
        self.fc1 = nn.Linear(3*32*32,100)
        
        self.fc2 = nn.Linear(100, 50)
        
        self.fc3 = nn.Linear(50, 10)
       

    def forward(self, x):
        x = x.view(-1, 3*32*32)
        x = torch.tanh(self.fc1(x))
        
        x = torch.sigmoid(self.fc2(x))
        
        return F.relu(self.fc3(x))

model = Net_L2()
if cuda:
    model.cuda()

In [0]:
#SGD optimizer with L2 regularization
arr=['SGD with L2 Regularization']

#creating confusion matrix
confusion_matrix=np.array([[0 for x in range(10)] for y in range(10)])
confusion_matrix.astype(int)

#L2 regularizer giver as parameters to SGD using weight_decay
optimizer = optim.SGD(model_l2.parameters(), lr=0.001, momentum=0.5,weight_decay=0.09)
epochs = 10

lossv, accv = [], []
start=time.time()
for epoch in range(1, epochs + 1):
    train(epoch)
    test(lossv, accv)
end=time.time()
df_cm = pd.DataFrame(confusion_matrix, index = [i for i in class_name],
                  columns = [i for i in class_name])

#plot confusion matrix for this network 
plt.figure(figsize = (20,10))
x=sn.heatmap(df_cm, annot=True)

arr.append(round(float(accv[-1]),2))
arr.append(round(float(end-start),2))
array.append(arr)

In [0]:
#Adadelta optimizer with L2 regularization
arr=['Adadelta with L2 Regularization']

#creating confusion matrix
confusion_matrix=np.array([[0 for x in range(10)] for y in range(10)])
confusion_matrix.astype(int)

#L2 regularizer giver as parameters to Adadelta using weight_decay
optimizer  = optim.Adadelta(model_l2.parameters(), lr=0.001, weight_decay=0.09)
epochs = 10

lossv, accv = [], []
start=time.time()
for epoch in range(1, epochs + 1):
    train(epoch)
    test(lossv, accv)
end=time.time()
df_cm = pd.DataFrame(confusion_matrix, index = [i for i in class_name],
                  columns = [i for i in class_name])

#plot confusion matrix for this network 
plt.figure(figsize = (20,10))
x=sn.heatmap(df_cm, annot=True)

arr.append(round(float(accv[-1]),2))
arr.append(round(float(end-start),2))
array.append(arr)

In [0]:
#Adam optimizer with L2 regularization
arr=['Adam with L2 Regularization']

#creating confusion matrix
confusion_matrix=np.array([[0 for x in range(10)] for y in range(10)])
confusion_matrix.astype(int)

#L2 regularizer giver as parameters to Adam using weight_decay
optimizer = optim.Adam(model_l2.parameters(), lr=0.001, eps=1e-08, weight_decay=0.09, amsgrad=False)
epochs = 10

lossv, accv = [], []
start=time.time()
for epoch in range(1, epochs + 1):
    train(epoch)
    test(lossv, accv)
end=time.time()
df_cm = pd.DataFrame(confusion_matrix, index = [i for i in class_name],
                  columns = [i for i in class_name])

#plot confusion matrix for this network 
plt.figure(figsize = (20,10))
x=sn.heatmap(df_cm, annot=True)

arr.append(round(float(accv[-1]),2))
arr.append(round(float(end-start),2))
array.append(arr)

In [0]:
#Adagrad optimizer with L2 regularization
arr=['Adagrad with L2 Regularization']

#creating confusion matrix
confusion_matrix=np.array([[0 for x in range(10)] for y in range(10)])
confusion_matrix.astype(int)

#L2 regularizer giver as parameters to Adagrad using weight_decay
optimizer = optim.Adagrad(model_l2.parameters(), lr=0.01, lr_decay=0.001, weight_decay=0.09, initial_accumulator_value=0)
epochs = 10

lossv, accv = [], []
start=time.time()
for epoch in range(1, epochs + 1):
    train(epoch)
    test(lossv, accv)
end=time.time()
df_cm = pd.DataFrame(confusion_matrix, index = [i for i in class_name],
                  columns = [i for i in class_name])

#plot confusion matrix for this network 
plt.figure(figsize = (20,10))
x=sn.heatmap(df_cm, annot=True)

arr.append(round(float(accv[-1]),2))
arr.append(round(float(end-start),2))
array.append(arr)

In [0]:
#RMSProp optimizer with L2 regularization
arr=['RMSProp with L2 Regularization']

#creating confusion matrix
confusion_matrix=np.array([[0 for x in range(10)] for y in range(10)])
confusion_matrix.astype(int)

#L2 regularizer giver as parameters to Adagrad using weight_decay
optimizer = optim.RMSprop(model_l2.parameters(), lr=0.01, alpha=0.99, eps=1e-08, weight_decay=0.09, momentum=0, centered=False)
epochs = 10

lossv, accv = [], []
start=time.time()
for epoch in range(1, epochs + 1):
    train(epoch)
    test(lossv, accv)
end=time.time()
df_cm = pd.DataFrame(confusion_matrix, index = [i for i in class_name],
                  columns = [i for i in class_name])


#plot confusion matrix for this network 
plt.figure(figsize = (20,10))
x=sn.heatmap(df_cm, annot=True)

arr.append(round(float(accv[-1]),2))
arr.append(round(float(end-start),2))
array.append(arr)

In [0]:
#Network using L1 regularization with relu, tanh and sigmoid with 3 layers
class Net_L1(nn.Module):
    def __init__(self):
        super(Net_L1, self).__init__()
        self.fc1 = nn.Linear(3*32*32,70)
        self.fc2 = nn.Linear(70, 50)
        self.fc3 = nn.Linear(50, 10)
        
        
       

    def forward(self, x):
        x = x.view(-1, 3*32*32)
        x = F.relu(self.fc1(x))
        x = torch.tanh(self.fc2(x))
        return torch.sigmoid(self.fc3(x))
        
        

model_l1 = Net_L1()
if cuda:
    model_l1.cuda()
    



print(model_l1)

#Training Dataset with L1 regularization

def train_l1(epoch, log_interval=1000):
    model_l1.train()
    for batch_idx, (data, target) in enumerate(train_loader):
        if cuda:
            data, target = data.cuda(), target.cuda()
        optimizer.zero_grad()
        output = model_l1(data)
        l1_reg_lamda=0.5
        l1_reg=0
        
        for W in model_l1.parameters():
          W=Variable(W,requires_grad=True)
          l1_reg += torch.sum(torch.abs(W))
        
        
        loss = F.nll_loss(output, target) + l1_reg_lamda*l1_reg
        loss.backward()
        
        W.grad.data
        
        optimizer.step()
        
        if batch_idx % log_interval == 0:
            print('Train Epoch: {} [{}/{} ({:.0f}%)]\tLoss: {:.6f}'.format(
                epoch, batch_idx * len(data), len(train_loader.dataset),
                100. * float(batch_idx) / len(train_loader), loss.item()))
            
            
def test_l1(loss_vector, accuracy_vector):
    model_l1.eval()
    test_loss, correct = 0, 0
    for data, target in test_loader:
        if cuda:
            data, target = data.cuda(), target.cuda()
        data.requires_grad_(False)
        target.requires_grad_(False)
        output = model_l1(data)
        test_loss += F.nll_loss(output, target)
        
        pred = output.data.max(1)[1]
        test_loss += F.nll_loss(output, target) 
        correct += pred.eq(target.data).cpu().sum().float()
        #Updating confusion matrix
        for i in range(50):
                  confusion_matrix[pred[i]][target[i]]+=1
       
    test_loss /= len(test_loader)
    loss_vector.append(test_loss)
    
    accuracy = 100. * correct / float(len(test_loader.dataset))
    accuracy_vector.append(accuracy)
    
    print('\nTest set: Average loss: {:.4f}, Accuracy: {}/{} ({:.2f}%)\n'.format(
        test_loss, correct, len(test_loader.dataset), float(accuracy)))

In [0]:
#SGD optimizer with L1 regularization
arr=['SGD with L1 Regularization']

#creating confusion matrix
confusion_matrix=np.array([[0 for x in range(10)] for y in range(10)])
confusion_matrix.astype(int)

#L1 regularizer giver as parameters to SGD using weight_decay
optimizer = optim.SGD(model_l1.parameters(), lr=0.001, momentum=0.5)
epochs = 10

lossv, accv = [], []
start=time.time()
for epoch in range(1, epochs + 1):
    train(epoch)
    test(lossv, accv)
end=time.time()
df_cm = pd.DataFrame(confusion_matrix, index = [i for i in class_name],
                  columns = [i for i in class_name])


#plot confusion matrix for this network 
plt.figure(figsize = (20,10))
x=sn.heatmap(df_cm, annot=True)

arr.append(round(float(accv[-1]),2))
arr.append(round(float(end-start),2))
array.append(arr)

In [0]:
#adadelta optimizer with L1 regularization
arr=['adadelta with L1 Regularization']

#creating confusion matrix
confusion_matrix=np.array([[0 for x in range(10)] for y in range(10)])
confusion_matrix.astype(int)

#L1 regularizer giver as parameters to adadelta using weight_decay
optimizer  = optim.Adadelta(model_l1.parameters(), lr=0.001, weight_decay=0.09)
epochs = 10

lossv, accv = [], []
start=time.time()
for epoch in range(1, epochs + 1):
    train(epoch)
    test(lossv, accv)
end=time.time()
df_cm = pd.DataFrame(confusion_matrix, index = [i for i in class_name],
                  columns = [i for i in class_name])

#plot confusion matrix for this network 
plt.figure(figsize = (20,10))
x=sn.heatmap(df_cm, annot=True)

arr.append(round(float(accv[-1]),2))
arr.append(round(float(end-start),2))
array.append(arr)

In [0]:
#adam optimizer with L1 regularization
arr=['adam with L1 Regularization']

#creating confusion matrix
confusion_matrix=np.array([[0 for x in range(10)] for y in range(10)])
confusion_matrix.astype(int)

#L1 regularizer giver as parameters to adaM using weight_decay
optimizer = optim.Adam(model_l1.parameters(), lr=0.001, eps=1e-08, weight_decay=0.09, amsgrad=False)
epochs = 10

lossv, accv = [], []
start=time.time()
for epoch in range(1, epochs + 1):
    train(epoch)
    test(lossv, accv)
end=time.time()
df_cm = pd.DataFrame(confusion_matrix, index = [i for i in class_name],
                  columns = [i for i in class_name])

#plot confusion matrix for this network 
plt.figure(figsize = (20,10))
x=sn.heatmap(df_cm, annot=True)

arr.append(round(float(accv[-1]),2))
arr.append(round(float(end-start),2))
array.append(arr)

In [0]:
#adagrad optimizer with L1 regularization
arr=['adagrad with L1 Regularization']

#creating confusion matrix
confusion_matrix=np.array([[0 for x in range(10)] for y in range(10)])
confusion_matrix.astype(int)

#L1 regularizer giver as parameters to adagrad using weight_decay
optimizer = optim.Adagrad(model_l1.parameters(), lr=0.01, lr_decay=0.001, weight_decay=0.09, initial_accumulator_value=0)
epochs = 10

lossv, accv = [], []
start=time.time()
for epoch in range(1, epochs + 1):
    train(epoch)
    test(lossv, accv)
end=time.time()
df_cm = pd.DataFrame(confusion_matrix, index = [i for i in class_name],
                  columns = [i for i in class_name])

#plot confusion matrix for this network 
plt.figure(figsize = (20,10))
x=sn.heatmap(df_cm, annot=True)

arr.append(round(float(accv[-1]),2))
arr.append(round(float(end-start),2))
array.append(arr)

In [0]:
#RMSprop optimizer with L1 regularization
arr=['RMSprop with L1 Regularization']

#creating confusion matrix
confusion_matrix=np.array([[0 for x in range(10)] for y in range(10)])
confusion_matrix.astype(int)

#L1 regularizer giver as parameters to RMSprop using weight_decay
optimizer = optim.RMSprop(model_l1.parameters(), lr=0.01, alpha=0.99, eps=1e-08, weight_decay=0.09, momentum=0, centered=False)
epochs = 10

lossv, accv = [], []
start=time.time()
for epoch in range(1, epochs + 1):
    train(epoch)
    test(lossv, accv)
end=time.time()
df_cm = pd.DataFrame(confusion_matrix, index = [i for i in class_name],
                  columns = [i for i in class_name])

#plot confusion matrix for this network
plt.figure(figsize = (20,10))
x=sn.heatmap(df_cm, annot=True)

arr.append(round(float(accv[-1]),2))
arr.append(round(float(end-start),2))
array.append(arr)

In [0]:
# Table comparing different optimizers details of accuracy and convergence time

print(tabulate(array,
       headers="firstrow"))

In [0]:
#Network to test defined hinge loss function
class Net_Hinge(nn.Module):
    def __init__(self):
        super(Net_Hinge, self).__init__()
        self.fc1 = nn.Linear(3*32*32, 2000)
        self.fc1_drop = nn.Dropout(0.2)
        self.fc2 = nn.Linear(2000, 200)
        self.fc2_drop = nn.Dropout(0.2)
        self.fc3 = nn.Linear(200, 10)

    def forward(self, x):
        x = x.view(-1, 3*32*32)
        x = torch.sigmoid(self.fc1(x))
        x = self.fc1_drop(x)
        x = torch.sigmoid(self.fc2(x))
        x = self.fc2_drop(x)
        return F.log_softmax(self.fc3(x),dim=1)

model = Net_Hinge()
if cuda:
    model.cuda()
    
def hingeloss(input,target):
  hinge_loss=0
  for i in range(len(input)):
    sy=input[i][target[i]]
    ay=[si-sy+1 for si in input[i]]
    ay=np.max(ay)
    if(ay>0):
      hinge_loss+=ay
  return hinge_loss

def train_hinge(epoch, log_interval=100):
    model.train()
    for batch_idx, (data, target) in enumerate(train_loader):
        if cuda:
            data, target = data.cuda(), target.cuda()
        optimizer.zero_grad()
        output = model(data)
        loss = hingeloss(output, target)
        loss.backward()
        optimizer.step()
        if batch_idx % log_interval == 0:
            print('Train Epoch: {} [{}/{} ({:.0f}%)]\tLoss: {:.6f}'.format(
                epoch, batch_idx * len(data), len(train_loader.dataset),
                100. * float(batch_idx) / len(train_loader), loss.item()))

def test_hinge(loss_vector, accuracy_vector):
    model.eval()
    test_loss, correct = 0, 0
    for data, target in test_loader:
        if cuda:
            data, target = data.cuda(), target.cuda()
        output = model(data)
        test_loss += hingeloss(output, target).item()
        pred = output.data.max(1)[1] 
        correct += pred.eq(target.data).cpu().sum().float()
        for i in range(50):
                  confusion_matrix[pred[i]][target[i]]+=1
    test_loss /= len(test_loader)
    loss_vector.append(test_loss)
    
    accuracy = 100. * correct / float(len(test_loader.dataset))
    accuracy_vector.append(accuracy)
    
    print('\nTest set: Average loss: {:.4f}, Accuracy: {}/{} ({:.2f}%)\n'.format(
        test_loss, correct, len(test_loader.dataset), float(accuracy)))

    
#Running network with defined hinge loss function
arr=['SGD with Dropout']
confusion_matrix=np.array([[0 for x in range(10)] for y in range(10)])
confusion_matrix.astype(int)

optimizer = optim.SGD(model.parameters(), lr=0.01, momentum=0.5)
epochs = 10

lossv, accv = [], []
start=time.time()
for epoch in range(1, epochs + 1):
    train_hinge(epoch)
    test_hinge(lossv, accv)
end=time.time()
df_cm = pd.DataFrame(confusion_matrix, index = [i for i in class_name],
                  columns = [i for i in class_name])

plt.figure(figsize = (20,10))
x=sn.heatmap(df_cm, annot=True)

arr.append(round(float(accv[-1]),2))
arr.append(round(float(end-start),2))
array.append(arr)

In [0]:
#finding the best depth-Depth given now is 1 hidden layer
#Network initiation
class Net_depth_1(nn.Module):
    def __init__(self):
        super(Net_depth_1, self).__init__()
        self.fc1 = nn.Linear(3*32*32, 2000)
        self.fc1_drop = nn.Dropout(0.2)
        self.fc2 = nn.Linear(2000, 10)

    def forward(self, x):
        x = x.view(-1, 3*32*32)
        x = torch.relu(self.fc1(x))
        x = self.fc1_drop(x)
        return F.log_softmax(self.fc2(x),dim=1)
    

model = Net_depth_1()
if cuda:
    model.cuda()
    
print(model)


#SGD optimizer with Dropout regularization
arr=['1 Layer']

#creating confusion matrix
confusion_matrix=np.array([[0 for x in range(10)] for y in range(10)])
confusion_matrix.astype(int)

optimizer = optim.SGD(model.parameters(), lr=0.01, momentum=0.5)
epochs = 10

lossv, accv = [], []
start=time.time()
for epoch in range(1, epochs + 1):
    train(epoch)
    test(lossv, accv)
end=time.time()
df_cm = pd.DataFrame(confusion_matrix, index = [i for i in class_name],
                  columns = [i for i in class_name])

for col in class_name:
   df_cm[col] = df_cm[col].apply(lambda x: int(x) if x == x else "")
plt.figure(figsize = (20,10))
x=sn.heatmap(df_cm, annot=True)

arr.append(round(float(accv[-1]),2))
arr.append(round(float(end-start),2))
depth_array.append(arr)

In [0]:
#finding the best depth-Depth given now is 2 hidden layers
#Network initiation
class Net_depth_2(nn.Module):
    def __init__(self):
        super(Net_depth_2, self).__init__()
        self.fc1 = nn.Linear(3*32*32, 2000)
        self.fc1_drop = nn.Dropout(0.2)
        self.fc2 = nn.Linear(2000, 200)
        self.fc2_drop = nn.Dropout(0.2)
        self.fc3 = nn.Linear(200, 10)
        

    def forward(self, x):
        x = x.view(-1, 3*32*32)
        x = torch.relu(self.fc1(x))
        x = self.fc1_drop(x)
        x = torch.relu(self.fc2(x))
        x = self.fc2_drop(x)
        return F.log_softmax(self.fc3(x),dim=1)
    

model = Net_depth_2()
if cuda:
    model.cuda()
    
print(model)   


#SGD optimizer with Dropout regularization
arr=['2 Layers']

#creating confusion matrix
confusion_matrix=np.array([[0 for x in range(10)] for y in range(10)])
confusion_matrix.astype(int)

optimizer = optim.SGD(model.parameters(), lr=0.01, momentum=0.5)
epochs = 10

lossv, accv = [], []
start=time.time()
for epoch in range(1, epochs + 1):
    train(epoch)
    test(lossv, accv)
end=time.time()
df_cm = pd.DataFrame(confusion_matrix, index = [i for i in class_name],
                  columns = [i for i in class_name])

for col in class_name:
   df_cm[col] = df_cm[col].apply(lambda x: int(x) if x == x else "")
plt.figure(figsize = (20,10))
x=sn.heatmap(df_cm, annot=True)

arr.append(round(float(accv[-1]),2))
arr.append(round(float(end-start),2))
depth_array.append(arr)


In [0]:
#finding the best depth-Depth given now is 3 hidden layers
#Network initiation
class Net_depth_3(nn.Module):
    def __init__(self):
        super(Net_depth_3, self).__init__()
        self.fc1 = nn.Linear(3*32*32, 2000)
        self.fc1_drop = nn.Dropout(0.2)
        self.fc2 = nn.Linear(2000, 1000)
        self.fc2_drop = nn.Dropout(0.2)
        self.fc3 = nn.Linear(1000, 200)
        self.fc3_drop = nn.Dropout(0.2)
        self.fc4 = nn.Linear(200, 10)
        
        

    def forward(self, x):
        x = x.view(-1, 3*32*32)
        x = torch.relu(self.fc1(x))
        x = self.fc1_drop(x)
        x = torch.relu(self.fc2(x))
        x = self.fc2_drop(x)
        x = torch.relu(self.fc3(x))
        x = self.fc3_drop(x)
        return F.log_softmax(self.fc4(x),dim=1)
    

model = Net_depth_3()
if cuda:
    model.cuda()
    
    
print(model)


#SGD optimizer with Dropout regularization
arr=['3 Layers']

#creating confusion matrix
confusion_matrix=np.array([[0 for x in range(10)] for y in range(10)])
confusion_matrix.astype(int)

optimizer = optim.SGD(model.parameters(), lr=0.01, momentum=0.5)
epochs = 10

lossv, accv = [], []
start=time.time()
for epoch in range(1, epochs + 1):
    train(epoch)
    test(lossv, accv)
end=time.time()
df_cm = pd.DataFrame(confusion_matrix, index = [i for i in class_name],
                  columns = [i for i in class_name])

for col in class_name:
   df_cm[col] = df_cm[col].apply(lambda x: int(x) if x == x else "")
plt.figure(figsize = (20,10))
x=sn.heatmap(df_cm, annot=True)

arr.append(round(float(accv[-1]),2))
arr.append(round(float(end-start),2))
depth_array.append(arr)

In [0]:
#finding the best depth-Depth given now is 4 hidden layers
#Network initiation
class Net_depth_4(nn.Module):
    def __init__(self):
        super(Net_depth_4, self).__init__()
        self.fc1 = nn.Linear(3*32*32, 2000)
        self.fc1_drop = nn.Dropout(0.2)
        self.fc2 = nn.Linear(2000, 1000)
        self.fc2_drop = nn.Dropout(0.2)
        self.fc3 = nn.Linear(1000, 200)
        self.fc3_drop = nn.Dropout(0.2)
        self.fc4 = nn.Linear(200, 100)
        self.fc4_drop = nn.Dropout(0.2)
        self.fc5 = nn.Linear(100, 10)
        
        

    def forward(self, x):
        x = x.view(-1, 3*32*32)
        x = torch.relu(self.fc1(x))
        x = self.fc1_drop(x)
        x = torch.relu(self.fc2(x))
        x = self.fc2_drop(x)
        x = torch.relu(self.fc3(x))
        x = self.fc3_drop(x)
        x = torch.relu(self.fc4(x))
        x = self.fc4_drop(x)
        return F.log_softmax(self.fc5(x),dim=1)
    

model = Net_depth_4()
if cuda:
    model.cuda()
    
    
print(model)

#SGD optimizer with Dropout regularization
arr=['4 Layers']

#creating confusion matrix
confusion_matrix=np.array([[0 for x in range(10)] for y in range(10)])
confusion_matrix.astype(int)

optimizer = optim.SGD(model.parameters(), lr=0.01, momentum=0.5)
epochs = 10

lossv, accv = [], []
start=time.time()
for epoch in range(1, epochs + 1):
    train(epoch)
    test(lossv, accv)
end=time.time()
df_cm = pd.DataFrame(confusion_matrix, index = [i for i in class_name],
                  columns = [i for i in class_name])

for col in class_name:
   df_cm[col] = df_cm[col].apply(lambda x: int(x) if x == x else "")
plt.figure(figsize = (20,10))
x=sn.heatmap(df_cm, annot=True)

arr.append(round(float(accv[-1]),2))
arr.append(round(float(end-start),2))
depth_array.append(arr)

In [0]:
#finding the best depth-Depth given now is 5 hidden layers
#Network initiation
class Net_depth_5(nn.Module):
    def __init__(self):
        super(Net_depth_5, self).__init__()
        self.fc1 = nn.Linear(3*32*32, 2000)
        self.fc1_drop = nn.Dropout(0.2)
        self.fc2 = nn.Linear(2000, 1000)
        self.fc2_drop = nn.Dropout(0.2)
        self.fc3 = nn.Linear(1000, 200)
        self.fc3_drop = nn.Dropout(0.2)
        self.fc4 = nn.Linear(200, 100)
        self.fc4_drop = nn.Dropout(0.2)
        self.fc5 = nn.Linear(100, 50)
        self.fc5_drop = nn.Dropout(0.2)
        self.fc6 = nn.Linear(50, 10)
        
        
        

    def forward(self, x):
        x = x.view(-1, 3*32*32)
        x = torch.relu(self.fc1(x))
        x = self.fc1_drop(x)
        x = torch.relu(self.fc2(x))
        x = self.fc2_drop(x)
        x = torch.relu(self.fc3(x))
        x = self.fc3_drop(x)
        x = torch.relu(self.fc4(x))
        x = self.fc4_drop(x)
        x = torch.relu(self.fc5(x))
        x = self.fc5_drop(x)
        return F.log_softmax(self.fc6(x),dim=1)
    

model = Net_depth_5()
if cuda:
    model.cuda()
    
    
print(model)

#SGD optimizer with Dropout regularization
arr=['5 Layers']

#creating confusion matrix
confusion_matrix=np.array([[0 for x in range(10)] for y in range(10)])
confusion_matrix.astype(int)

optimizer = optim.SGD(model.parameters(), lr=0.01, momentum=0.5)
epochs = 10

lossv, accv = [], []
start=time.time()
for epoch in range(1, epochs + 1):
    train(epoch)
    test(lossv, accv)
end=time.time()
df_cm = pd.DataFrame(confusion_matrix, index = [i for i in class_name],
                  columns = [i for i in class_name])

for col in class_name:
   df_cm[col] = df_cm[col].apply(lambda x: int(x) if x == x else "")
plt.figure(figsize = (20,10))
x=sn.heatmap(df_cm, annot=True)

arr.append(round(float(accv[-1]),2))
arr.append(round(float(end-start),2))
depth_array.append(arr)

In [0]:
print(tabulate(depth_array,
       headers="firstrow"))