In [1]:
import torch
import torch.nn as nn
import torch.optim as optim
from random import randint
import time
import utils
import os
import pandas as pd

## Load Data

In [2]:
dataset_folder='../dataset_v1.1/'
train_folder = '../dataset_v1.1/train/'
test_folder = '../dataset_v1.1/test/'

train_size = len(os.listdir(train_folder))
test_size = len(os.listdir(test_folder))
#os.remove('.DS_Store')
train_size,test_size
#os.listdir(test_folder)

(980, 251)

In [3]:
label_df = pd.read_csv(dataset_folder+'label.csv')
label_df.head()

Unnamed: 0,filename,breed,breed fac
0,affenpinscher_1.jpg,affenpinscher,0
1,affenpinscher_10.jpg,affenpinscher,0
2,affenpinscher_100.jpg,affenpinscher,0
3,affenpinscher_101.jpg,affenpinscher,0
4,affenpinscher_102.jpg,affenpinscher,0


In [4]:
train_data=torch.load(dataset_folder+'train.pt')
#print(train_image[0])
print(train_data.size())

torch.Size([980, 3, 128, 128])


In [5]:
test_data=torch.load(dataset_folder+'test.pt')
#print(train_image)
print(test_data.size())

torch.Size([250, 3, 128, 128])


In [6]:
train_label=torch.load(dataset_folder+'train_label.pt')
#print(train_label)

In [7]:
test_label=torch.load(dataset_folder+'test_label.pt')
#print(test_label)

## 3 Layers MLP

In [8]:
class three_layer_net(nn.Module):

    def __init__(self, input_size, hidden_size1, hidden_size2, hidden_size3,output_size):
        super(three_layer_net , self).__init__()

        self.layer1 = nn.Linear(  input_size   , hidden_size1  , bias=False  )
        self.layer2 = nn.Linear(  hidden_size1 , hidden_size2  , bias=False  )
        self.layer3 = nn.Linear(  hidden_size2 , hidden_size3  , bias=False  )
        self.layer4 = nn.Linear(  hidden_size3 , output_size   , bias=False  )        
        
    def forward(self, x):
        
        y       = self.layer1(x)
        y_hat   = torch.relu(y)
        z       = self.layer2(y_hat)
        z_hat   = torch.relu(z)
        a       = self.layer3(z_hat)
        a_hat   = torch.relu(a)
        scores  = self.layer4(a_hat)
        
        return scores

### Build the net with the following size:
* input size = 16384*3=49152
* hidden size 1 = 500
* hidden size 2 = 500
* output size = 12
### How many parameters in total? (the one layer net had 30,000 parameters)

In [9]:
net=three_layer_net(49152,500,500,500,12)
print(net)
criterion = nn.CrossEntropyLoss()
optimizer=torch.optim.SGD( net.parameters() , lr=0.01 )
bs= 10

three_layer_net(
  (layer1): Linear(in_features=49152, out_features=500, bias=False)
  (layer2): Linear(in_features=500, out_features=500, bias=False)
  (layer3): Linear(in_features=500, out_features=500, bias=False)
  (layer4): Linear(in_features=500, out_features=12, bias=False)
)


In [10]:
def eval_on_test_set():

    running_error=0
    num_batches=0


    for i in range(0,249,bs):

        # extract the minibatch
        minibatch_data =  test_data[i:i+bs]
        minibatch_label= test_label[i:i+bs]
        # send them to the gpu
        #minibatch_data=minibatch_data.to(device)
        #minibatch_label=minibatch_label.to(device)

        # reshape the minibatch
        inputs = minibatch_data.view(bs,49152)

        # feed it to the network
        scores=net( inputs ) 

        # compute the error made on this batch
        error = utils.get_error( scores , minibatch_label)

        # add it to the running error
        running_error += error.item()

        num_batches+=1


    # compute error rate on the full test set
    total_error = running_error/num_batches

    print( 'error rate on test set =', total_error*100 ,'percent')

In [11]:
start=time.time()

for epoch in range(10):
    
    running_loss=0
    running_error=0
    num_batches=0
    
    shuffled_indices=torch.randperm(980)
 
    for count in range(0,980,bs):
    
        # Set the gradients to zeros
        optimizer.zero_grad()
        
        # create a minibatch       
        indices=shuffled_indices[count:count+bs]
        minibatch_data =  train_data[indices]
        minibatch_label=  train_label[indices]
        
        # send them to the gpu
        #minibatch_data=minibatch_data.to(device)
        #minibatch_label=minibatch_label.to(device)
        
        # reshape the minibatch
        inputs = minibatch_data.view(bs,49152)

        # tell Pytorch to start tracking all operations that will be done on "inputs"
        inputs.requires_grad_()

        # forward the minibatch through the net 
        scores=net( inputs ) 

        # Compute the average of the losses of the data points in the minibatch
        loss =  criterion( scores , minibatch_label) 
        
        # backward pass to compute dL/dU, dL/dV and dL/dW   
        loss.backward()

        # do one step of stochastic gradient descent: U=U-lr(dL/dU), V=V-lr(dL/dU), ...
        optimizer.step()
        

        # START COMPUTING STATS
        
        # add the loss of this batch to the running loss
        running_loss += loss.detach().item()
        
        # compute the error made on this batch and add it to the running error       
        error = utils.get_error( scores.detach() , minibatch_label)
        running_error += error.item()
        
        num_batches+=1        
    
    
    # compute stats for the full training set
    total_loss = running_loss/num_batches
    total_error = running_error/num_batches
    elapsed = time.time()-start
    
#if epoch%2 == 0:
    print('epoch=',epoch, '\t time=', elapsed, '\t loss=', total_loss , '\t error=', total_error*100 ,'percent')
    eval_on_test_set() 
    print(' ')

epoch= 0 	 time= 8.765172004699707 	 loss= 2.469693407720449 	 error= 87.14285632785486 percent
error rate on test set = 83.20000004768372 percent
 
epoch= 1 	 time= 18.08050775527954 	 loss= 2.3839030679391353 	 error= 83.16326457626965 percent
error rate on test set = 86.0 percent
 
epoch= 2 	 time= 27.267404079437256 	 loss= 2.3066590318874436 	 error= 83.0612241005411 percent
error rate on test set = 82.39999961853027 percent
 
epoch= 3 	 time= 36.39789795875549 	 loss= 2.2639428729913673 	 error= 78.46938766995255 percent
error rate on test set = 81.60000014305115 percent
 
epoch= 4 	 time= 45.72426986694336 	 loss= 2.224763716970171 	 error= 77.04081608324634 percent
error rate on test set = 78.3999993801117 percent
 
epoch= 5 	 time= 54.96675086021423 	 loss= 2.1781095582611707 	 error= 75.81632617785006 percent
error rate on test set = 78.79999995231628 percent
 
epoch= 6 	 time= 64.37316298484802 	 loss= 2.133484975415833 	 error= 73.87755111772188 percent
error rate on test s