# Dog Breed Classifier

## Dataset
Initially, we collected more than 3,000 dog images consist of more than 30 breeds. 10 breeds are selected to be containded in the final dataset.  The train folder contains 1,179 images of dogs. Each image in the folder has the breed label and a numeric id as part of the filename. The test folder contains 100 images.
For each image in the train and test set, the distribution of each dog breeds are equal

In [1]:
import torch
import torch.nn as nn
import torch.optim as optim
from random import randint
import time
import utils
import os
import pandas as pd
import math
#import seaborn as sns

In [2]:
device= torch.device("cuda")
print(device)

cuda


### Load Dataset from pt file

In [3]:
dataset_folder=''
train_data=torch.load(dataset_folder+'train_data_final.pt')
print(train_data.size())

test_data=torch.load(dataset_folder+'test_data_final.pt')
print(test_data.size())

train_label=torch.load(dataset_folder+'train_label_final.pt')
print(train_label.size())

test_label=torch.load(dataset_folder+'test_label_final.pt')
print(test_label.size())

torch.Size([979, 3, 64, 64])
torch.Size([100, 3, 64, 64])
torch.Size([979])
torch.Size([100])


## Explore the dataset

train_label_list=train_label.tolist()
#test_label_list
df_train_label = pd.DataFrame(test_label_list)
column_name = ["label"]
df_train_label.columns = ["label"]
df

def plot_image_list_count(df_train_label):
    seaborn.countplot(label)
    plt.title('Cats and Dogs')
    
plot_image_list_count(df_train_label)

## 3 Layers MLP

In [4]:
class three_layer_net(nn.Module):

    def __init__(self, input_size, hidden_size1, hidden_size2, hidden_size3,output_size):
        super(three_layer_net , self).__init__()

        self.layer1 = nn.Linear(  input_size   , hidden_size1  , bias=False  )
        self.layer2 = nn.Linear(  hidden_size1 , hidden_size2  , bias=False  )
        self.layer3 = nn.Linear(  hidden_size2 , hidden_size3  , bias=False  )
        self.layer4 = nn.Linear(  hidden_size3 , output_size   , bias=False  )        
        
    def forward(self, x):
        
        y       = self.layer1(x)
        y_hat   = torch.relu(y)
        z       = self.layer2(y_hat)
        z_hat   = torch.relu(z)
        a       = self.layer3(z_hat)
        a_hat   = torch.relu(a)
        scores  = self.layer4(a_hat)
        
        return scores

In [5]:
net=three_layer_net(12288,500,500,500,11)
print(net)
criterion = nn.CrossEntropyLoss()
optimizer=torch.optim.SGD( net.parameters() , lr=0.01 )
bs= 10

three_layer_net(
  (layer1): Linear(in_features=12288, out_features=500, bias=False)
  (layer2): Linear(in_features=500, out_features=500, bias=False)
  (layer3): Linear(in_features=500, out_features=500, bias=False)
  (layer4): Linear(in_features=500, out_features=11, bias=False)
)


In [6]:
def eval_on_test_set_mlp():

    running_error=0
    num_batches=0


    for i in range(0,100,bs):

        # extract the minibatch
        minibatch_data =  test_data[i:i+bs]
        minibatch_label= test_label[i:i+bs]
        # send them to the gpu
        #minibatch_data=minibatch_data.to(device)
        #minibatch_label=minibatch_label.to(device)

        # reshape the minibatch
        inputs = minibatch_data.view(bs,12288)

        # feed it to the network
        scores=net( inputs ) 

        # compute the error made on this batch
        error = utils.get_error( scores , minibatch_label)

        # add it to the running error
        running_error += error.item()

        num_batches+=1


    # compute error rate on the full test set
    total_error = running_error/num_batches

    print( 'error rate on test set =', total_error*100 ,'percent')

In [7]:
start=time.time()

for epoch in range(10):
    
    running_loss=0
    running_error=0
    num_batches=0
    
    shuffled_indices=torch.randperm(970)
 
    for count in range(0,970,bs):
    
        # Set the gradients to zeros
        optimizer.zero_grad()
        
        # create a minibatch       
        indices=shuffled_indices[count:count+bs]
        minibatch_data =  train_data[indices]
        minibatch_label=  train_label[indices]
        
        # send them to the gpu
        #minibatch_data=minibatch_data.to(device)
        #minibatch_label=minibatch_label.to(device)
        
        # reshape the minibatch
        inputs = minibatch_data.view(bs,12288)

        # tell Pytorch to start tracking all operations that will be done on "inputs"
        inputs.requires_grad_()

        # forward the minibatch through the net 
        scores=net( inputs ) 

        # Compute the average of the losses of the data points in the minibatch
        loss =  criterion( scores , minibatch_label) 
        
        # backward pass to compute dL/dU, dL/dV and dL/dW   
        loss.backward()

        # do one step of stochastic gradient descent: U=U-lr(dL/dU), V=V-lr(dL/dU), ...
        optimizer.step()
        

        # START COMPUTING STATS
        
        # add the loss of this batch to the running loss
        running_loss += loss.detach().item()
        
        # compute the error made on this batch and add it to the running error       
        error = utils.get_error( scores.detach() , minibatch_label)
        running_error += error.item()
        
        num_batches+=1        
    
    
    # compute stats for the full training set
    total_loss = running_loss/num_batches
    total_error = running_error/num_batches
    elapsed = time.time()-start
    
#if epoch%2 == 0:
    print('epoch=',epoch, '\t time=', elapsed, '\t loss=', total_loss , '\t error=', total_error*100 ,'percent')
    eval_on_test_set_mlp() 
    print(' ')

epoch= 0 	 time= 0.5229840278625488 	 loss= 2.3628940238166103 	 error= 85.2577318235771 percent
error rate on test set = 88.99999856948853 percent
 
epoch= 1 	 time= 0.9450070858001709 	 loss= 2.2780536745012423 	 error= 83.19587590768165 percent
error rate on test set = 80.0 percent
 
epoch= 2 	 time= 1.3466553688049316 	 loss= 2.1638254222181654 	 error= 80.8247417518773 percent
error rate on test set = 80.0 percent
 
epoch= 3 	 time= 1.7558119297027588 	 loss= 2.0949826326566874 	 error= 79.07216469037164 percent
error rate on test set = 73.99999976158142 percent
 
epoch= 4 	 time= 2.1461446285247803 	 loss= 2.052941962615731 	 error= 77.83505117770324 percent
error rate on test set = 69.99999940395355 percent
 
epoch= 5 	 time= 2.5286316871643066 	 loss= 2.0159183691457376 	 error= 75.25773183586672 percent
error rate on test set = 70.99999964237213 percent
 
epoch= 6 	 time= 2.935267925262451 	 loss= 1.9890743838143103 	 error= 74.43298955553585 percent
error rate on test set = 7

## Create a CNN for Dog Breed Classification

In [8]:
class CNN(nn.Module):

    def __init__(self):

        super(CNN, self).__init__()

        # block 1:         3 x 64 x 64 --> 64 x 32 x 32        
        self.conv1a = nn.Conv2d(3,   64,  kernel_size=8, padding=4 )
        # self.conv1b = nn.Conv2d(64,  64,  kernel_size=3, padding=1 )
        self.pool1  = nn.MaxPool2d(2,2)

        # block 2:         64 x 32 x 32 --> 128 x 16 x 16
        self.conv2a = nn.Conv2d(64,  128, kernel_size=4, padding=2 )
        # self.conv2b = nn.Conv2d(128, 128, kernel_size=3, padding=1 )
        self.pool2  = nn.MaxPool2d(2,2)

        # block 3:         128 x 16 x 16 --> 256 x 8 x 8        
        self.conv3a = nn.Conv2d(128, 256, kernel_size=3, padding=1 )
        # self.conv3b = nn.Conv2d(256, 256, kernel_size=3, padding=1 )
        self.pool3  = nn.MaxPool2d(2,2)
        
        #block 4:          256 x 8 x 8 --> 512 x 4 x 4
        self.conv4a = nn.Conv2d(256, 512, kernel_size=3, padding=1 )
        self.pool4  = nn.MaxPool2d(2,2)

        #block 5:          512 x 4 x 4 --> 512 x 2 x 2
        self.conv5a = nn.Conv2d(512, 512, kernel_size=3, padding=1 )
        self.pool5  = nn.MaxPool2d(2,2)

        # linear layers:   512 x 2 x 2 --> 32768 --> 4096 --> 4096 --> 10
        self.linear1 = nn.Linear(2048, 8192)
        self.linear2 = nn.Linear(8192,8192)
        self.linear3 = nn.Linear(8192, 10)


    def forward(self, x):

        # block 1:         3 x 32 x 32 --> 64 x 16 x 16
        x = self.conv1a(x)
        x = torch.relu(x)
        x = self.pool1(x)

        # block 2:         64 x 16 x 16 --> 128 x 8 x 8
        x = self.conv2a(x)
        x = torch.relu(x)

        x = self.pool2(x)

        # block 3:         128 x 8 x 8 --> 256 x 4 x 4
        x = self.conv3a(x)
        x = torch.relu(x)
        x = self.pool3(x)

        #block 4:          256 x 4 x 4 --> 512 x 2 x 2
        x = self.conv4a(x)
        x = torch.relu(x)
        x = self.pool4(x)

        #block 5:          256 x 4 x 4 --> 256 x 2 x 2
        x = self.conv5a(x)
        x = torch.relu(x)
        x = self.pool5(x)

        # linear layers:   512 x 2 x 2 --> 2048 --> 4096 --> 4096 --> 10
        x = x.view(-1, 2048)
        x = self.linear1(x)
        x = torch.relu(x)
        x = self.linear2(x)
        x = torch.relu(x)
        x = self.linear3(x) 
        
        return x

In [9]:
mean= train_data.mean()
print(mean)
std= train_data.std()
print(std)

tensor(0.4272)
tensor(0.2518)


In [10]:
net=CNN()

print(net)
utils.display_num_param(net)

CNN(
  (conv1a): Conv2d(3, 64, kernel_size=(8, 8), stride=(1, 1), padding=(4, 4))
  (pool1): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
  (conv2a): Conv2d(64, 128, kernel_size=(4, 4), stride=(1, 1), padding=(2, 2))
  (pool2): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
  (conv3a): Conv2d(128, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
  (pool3): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
  (conv4a): Conv2d(256, 512, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
  (pool4): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
  (conv5a): Conv2d(512, 512, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
  (pool5): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
  (linear1): Linear(in_features=2048, out_features=8192, bias=True)
  (linear2): Linear(in_features=8192, out_features=8192, bias=True)
  (linear3): Linear(in_features=8

In [11]:
net = net.to(device)
mean = mean.to(device)
std = std.to(device)

In [12]:
criterion = nn.CrossEntropyLoss()
my_lr=0.01 
bs= 16
num_epoch=10

In [13]:
def eval_on_test_set_cnn():

    running_error=0
    num_batches=0

    for i in range(0,100,bs):

        minibatch_data =  test_data[i:i+bs]
        minibatch_label= test_label[i:i+bs]

        minibatch_data=minibatch_data.to(device)
        minibatch_label=minibatch_label.to(device)
        
        inputs = (minibatch_data - mean)/std
        scores=net( inputs ) 
        error = utils.get_error( scores , minibatch_label)
        running_error += error.item()

        num_batches+=1

    total_error = running_error/num_batches
    print( 'error rate on test set =', total_error*100 ,'percent')

In [14]:
start=time.time()

for epoch in range(1,num_epoch):
    
    # divide the learning rate by 2 at epoch 10, 14 and 18
    #if epoch % 25 == 0: 
        #my_lr = my_lr/2
    
    # create a new optimizer at the beginning of each epoch: give the current learning rate.   
    optimizer=torch.optim.SGD( net.parameters() , lr=my_lr )
    #optimizer = torch.optim.Adam(net.parameters(), lr=my_lr)
    # set the running quatities to zero at the beginning of the epoch
    running_loss=0
    running_error=0
    num_batches=0
    
    # set the order in which to visit the image from the training set
    shuffled_indices=torch.randperm(979)
 
    for count in range(0,979,bs):
    
        # Set the gradients to zeros
        optimizer.zero_grad()
        
        # create a minibatch       
        indices=shuffled_indices[count:count+bs]
        minibatch_data =  train_data[indices]
        minibatch_label=  train_label[indices]
        
        # send them to the gpu
        minibatch_data=minibatch_data.to(device)
        minibatch_label=minibatch_label.to(device)  

        inputs = (minibatch_data - mean)/std
        inputs.requires_grad_()
        scores=net( inputs )
        loss =  criterion( scores , minibatch_label)  
        loss.backward()

        optimizer.step() 

        # START COMPUTING STATS       
        # add the loss of this batch to the running loss
        running_loss += loss.detach().item()      
        error = utils.get_error( scores.detach() , minibatch_label)
        running_error += error.item()
        
        num_batches+=1        
    
    # compute stats for the full training set
    total_loss = running_loss/num_batches
    total_error = running_error/num_batches
    elapsed = (time.time()-start)/60
    
    if epoch%10 == 0:
        print('Epoch=',epoch, '\t time=', elapsed,'min','\t lr=', my_lr  ,'\t loss=', total_loss , '\t error=', total_error*100 ,'percent')
        eval_on_test_set_cnn() 
        print('-----------------------------')

## RNN Dog Breed Classificaiton on Gray Scale Images

In [15]:
# Hyper-parameters for RNN
seq_length = 64
input_size = 64
hidden_size = 256
num_layers = 2
num_classes = 10
bs = 50
learning_rate = 0.01
num_epochs=150

In [16]:
train_data=torch.load(dataset_folder+'train_data_gray.pt')
print(train_data.size())
test_data=torch.load(dataset_folder+'test_data_gray.pt')
print(test_data.size())


train_label=torch.load(dataset_folder+'train_label_gray.pt')
print(train_label.size())
test_label=torch.load(dataset_folder+'test_label_gray.pt')
print(test_label.size())
train_dataset = [{'data': train_data[i], 'label': train_label[i]} for i in range(len(train_data)) ]
test_dataset = [{'data': test_data[i], 'label': test_label[i]} for i in range(len(test_data)) ]

# Data loader
train_loader = torch.utils.data.DataLoader(dataset=train_dataset,
                                           batch_size=bs,
                                           shuffle=True)
test_loader = torch.utils.data.DataLoader(dataset=test_dataset,
                                          batch_size=bs,
                                          shuffle=False)

torch.Size([979, 64, 64])
torch.Size([100, 64, 64])
torch.Size([979])
torch.Size([100])


In [17]:
# Recurrent neural network (many-to-one)
class RNN(nn.Module):
    
    def __init__(self, input_size, hidden_size, num_layers, num_classes):
        super(RNN, self).__init__()
        self.hidden_size = hidden_size
        self.num_layers = num_layers
        self.layer1 = nn.LSTM(input_size, hidden_size, num_layers, batch_first=True)
        self.layer2 = nn.Linear(hidden_size, num_classes)

    
    def forward(self, x):
        h0 = torch.zeros(self.num_layers, x.size(0), self.hidden_size).to(device) 
        c0 = torch.zeros(self.num_layers, x.size(0), self.hidden_size).to(device)
        

        out, _ = self.layer1(x, (h0, c0))  # out: tensor of shape (batch_size, seq_length, hidden_size)
        
        out = self.layer2(out[:, -1, :])
        return out

In [18]:
def eval_on_test_set_rnn(net):
    running_loss = 0
    num_batches = 0
    num_prediction = 0
    num_correct_prediction = 0
    
    net.eval()
    for batch_id, batch in enumerate(test_loader):
        
        batch_data_num = batch["data"].shape[0]
        num_prediction += batch_data_num
        
        minibatch_data =  batch["data"].to(device)
        minibatch_label = batch["label"].to(device)
                                  
        output  = net( minibatch_data)
        predicted_label = torch.argmax(output, dim=1)
        num_correct_prediction += torch.sum(minibatch_label == predicted_label).item()
                
        loss = criterion(  output ,  minibatch_label )    

        running_loss += loss.item()
        num_batches += 1        
    
    total_loss = running_loss/num_batches 
    accuracy = num_correct_prediction / num_prediction
    print(f'Test: exp(loss) = {math.exp(total_loss):.4f}\tTest accuracy = {(accuracy*100):.4f}')

In [19]:
hidden_size = 256
num_layers = 2
net=RNN(input_size, hidden_size, num_layers, num_classes).to(device)

In [20]:
criterion = nn.CrossEntropyLoss()
optimizer = torch.optim.Adam(net.parameters(), lr=learning_rate)

In [21]:
start=time.time()
for epoch in range(num_epochs):
    
    # divide the learning rate by 3 except after the first epoch
#     if epoch >= 2:
#         learning_rate = learning_rate / 3
    
    # create a new optimizer at the beginning of each epoch: give the current learning rate.   
    optimizer=torch.optim.SGD( net.parameters() , lr=learning_rate )

    # set the running quatities to zero at the beginning of the epoch
    running_loss=0
    num_batches=0    
    net.train()
    num_prediction = 0
    num_correct_prediction = 0
    for batch_id, batch in enumerate(train_loader):
        
        # Set the gradients to zeros
        optimizer.zero_grad()
        batch_data_num = batch["data"].shape[0]
        num_prediction += batch_data_num
        
        # create a minibatch
        # send them to the gpu
        minibatch_data =  batch["data"].to(device)
        minibatch_label = batch["label"].to(device)
        
        # forward the minibatch through the net        
        output  = net( minibatch_data)
        predicted_label = torch.argmax(output, dim = 1)
        num_correct_prediction += torch.sum(minibatch_label == predicted_label).item()
        
        # Compute the average of the losses of the data points in this huge batch
        loss = criterion(  output ,  minibatch_label )
        loss.backward()

        # do one step of stochastic gradient descent: R=R-lr(dL/dR), V=V-lr(dL/dV), ...
        utils.normalize_gradient(net)
        optimizer.step()
        
        # update the running loss  
        running_loss += loss.item()
        num_batches += 1
        
    accuracy = num_correct_prediction / num_prediction
    # compute stats for the full training set
    total_loss = running_loss / num_batches
    elapsed = time.time() - start
    
    if epoch%10 == 0:
        print(f'Epoch {epoch} / {num_epochs}\ttime = {elapsed:.3f}\tlr = {learning_rate}\nTrain: exp(loss) = {math.exp(total_loss):.4f}\tTrain accuracy = {(accuracy*100):.4f}')
        eval_on_test_set_rnn(net)
        print(f'-----------------------------------')

Epoch 0 / 150	time = 0.312	lr = 0.01
Train: exp(loss) = 10.0121	Train accuracy = 8.2737
Test: exp(loss) = 9.9882	Test accuracy = 10.0000
-----------------------------------
Epoch 10 / 150	time = 3.600	lr = 0.01
Train: exp(loss) = 9.9144	Train accuracy = 11.9510
Test: exp(loss) = 9.9650	Test accuracy = 11.0000
-----------------------------------
Epoch 20 / 150	time = 6.848	lr = 0.01
Train: exp(loss) = 9.8420	Train accuracy = 13.5853
Test: exp(loss) = 9.8909	Test accuracy = 14.0000
-----------------------------------
Epoch 30 / 150	time = 10.405	lr = 0.01
Train: exp(loss) = 9.7014	Train accuracy = 17.4668
Test: exp(loss) = 9.6654	Test accuracy = 19.0000
-----------------------------------
Epoch 40 / 150	time = 14.128	lr = 0.01
Train: exp(loss) = 9.0744	Train accuracy = 20.2247
Test: exp(loss) = 8.7461	Test accuracy = 23.0000
-----------------------------------
Epoch 50 / 150	time = 17.491	lr = 0.01
Train: exp(loss) = 8.2742	Train accuracy = 21.7569
Test: exp(loss) = 7.9707	Test accuracy 