In [8]:
import os
import glob
import torch
import pandas as pd
import numpy as np 
import xml.etree.ElementTree as ET
from scipy.io import loadmat
from torchvision import transforms
from torch.utils.data import Dataset, DataLoader
from PIL import Image
from tqdm.notebook import tqdm
from sklearn import metrics

In [10]:
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
print(torch.cuda.is_available())
print(torch.cuda.get_device_name(0))

def load_dataset():

    # Get train list
    f = loadmat("lists/train_list.mat")
    train_images = [x[0][0] for x in f['file_list']]
    train_labels = [x[0] for x in f['labels']]

    # Get file list
    f = loadmat("lists/test_list.mat")
    test_images = [x[0][0] for x in f['file_list']]
    test_labels = [x[0] for x in f['labels']]

    # Gather data
    train_data = []
    test_data = []

    # Record category ids
    categories = {}

    for i in range(len(train_images) + len(test_images)):

        # Determine if train or test
        image = train_images[i] if i < len(train_images) else test_images[i - len(train_images)]
        label = train_labels[i] if i < len(train_images) else test_labels[i - len(train_images)]
        label_name = os.path.split(image)[0]
        # Label -1 to make it 0-indexed
        categories[label_name] = label-1
        annotation_path = os.path.join("Annotation", image.replace(".jpg", ""))

        # Read XML
        tree = ET.parse(annotation_path)
        root = tree.getroot()

        width = int(root.find("size").find("width").text)
        height = int(root.find("size").find("height").text)

        bndbox = root.find("object").find("bndbox")
        xmin = int(bndbox.find("xmin").text)
        ymin = int(bndbox.find("ymin").text)
        xmax = int(bndbox.find("xmax").text)
        ymax = int(bndbox.find("ymax").text)

        # Append to data
        if i < len(train_images):
            train_data.append(dict(
                image=os.path.join("Images", image),
                label=label-1,
                label_name=label_name,
                width=width,
                height=height,
                xmin=xmin,
                ymin=ymin,
                xmax=xmax,
                ymax=ymax
            ))
        else:
            test_data.append(dict(
                image=os.path.join("Images", image),
                label=label-1,
                label_name=label_name,
                width=width,
                height=height,
                xmin=xmin,
                ymin=ymin,
                xmax=xmax,
                ymax=ymax
            ))


    return train_data, test_data, categories

# Read dataset and gather into dataframe
train_data, test_data, categories = load_dataset()
dftrain = pd.DataFrame(train_data)
dftest = pd.DataFrame(test_data)

True
NVIDIA GeForce GTX 1070


In [11]:
# Get the classes summary
print("Number of classes: ", len(categories))
print("Number of training samples: ", len(dftrain))
print("Number of testing samples: ", len(dftest)//2)
print("Number of validation samples: ", len(dftest)//2)

Number of classes:  120
Number of training samples:  12000
Number of testing samples:  4290
Number of validation samples:  4290


In [12]:
# Inherit from Dataset
class CustomDataset(Dataset):

    def __init__(self, df, transform=None):
        self.df = df
        self.transform = transform

    def __len__(self):
        return len(self.df)
    
    def __getitem__(self, idx):
        row = self.df.iloc[idx]
        image = Image.open(row['image'])
        image = image.convert('RGB')
        label = row['label']
        if self.transform:
            image = self.transform(image)
        return image, label
        
#Pre-Processing
train_transforms = transforms.Compose([
    # Randomly resize and crop the image to 224
    transforms.RandomResizedCrop(224),
    # Randomly flip the image horizontally
    transforms.RandomHorizontalFlip(),
    # Convert the image to a PyTorch Tensor
    transforms.ToTensor(),
    # Normalize the image
    transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225])
])

test_transforms = transforms.Compose([
    # Resize the image to 256
    transforms.Resize(256),
    # Crop the center of the image
    transforms.CenterCrop(224),
    # Convert the image to a PyTorch Tensor
    transforms.ToTensor(),
    # Normalize the image
    transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225])
])

train_dataset = CustomDataset(dftrain, transform=train_transforms)
#Split test set in half to test and validation sets
test_dataset = CustomDataset(dftest.iloc[0:4290], transform=test_transforms)
validation_dataset = CustomDataset(dftest.iloc[4290:], transform=test_transforms)

# Test
train_dataset[0]

(tensor([[[ 1.6495,  1.6153,  1.5639,  ..., -0.9363, -0.9363, -0.9363],
          [ 1.6667,  1.6324,  1.5810,  ..., -0.9363, -0.9363, -0.9363],
          [ 1.6838,  1.6495,  1.5982,  ..., -0.9363, -0.9363, -0.9363],
          ...,
          [-1.2788, -1.2788, -1.2617,  ..., -0.0287, -0.0801, -0.1143],
          [-1.4672, -1.4672, -1.4843,  ..., -0.0116, -0.0629, -0.0972],
          [-1.5528, -1.5699, -1.5870,  ..., -0.0116, -0.0458, -0.0801]],
 
         [[ 1.0280,  0.9930,  0.9405,  ..., -1.0903, -1.0903, -1.0903],
          [ 1.0455,  1.0105,  0.9580,  ..., -1.0903, -1.0903, -1.0903],
          [ 1.0630,  1.0280,  0.9755,  ..., -1.0903, -1.0903, -1.0903],
          ...,
          [ 0.3452,  0.3803,  0.4328,  ..., -0.1099, -0.0924, -0.0749],
          [ 0.1877,  0.1877,  0.2052,  ..., -0.0924, -0.0749, -0.0574],
          [ 0.1001,  0.1001,  0.1001,  ..., -0.0924, -0.0574, -0.0399]],
 
         [[ 0.8797,  0.8448,  0.7925,  ..., -1.5430, -1.5430, -1.5430],
          [ 0.8971,  0.8622,

In [13]:
from torchvision.models import resnet18

# Load the pre-trained model
model = resnet18(weights=None)
model.load_state_dict(torch.load("resnet18-f37072fd.pth"))

#Freeze layers
"""
ct = 0
for child in model.children():
    ct += 1
    if ct > 4:
        break
    for param in child.parameters():
        param.requires_grad = False"""

# Change the output layer
model.fc = torch.nn.Linear(512, len(categories))
model = model.to(device)



def train(epochs = 12, batches = 32, learing_rate = 1e-4):

    # Define the loss function
    criterion = torch.nn.CrossEntropyLoss()
    optimizer = torch.optim.Adam(model.parameters(), lr= learing_rate)
    scheduler = torch.optim.lr_scheduler.StepLR(optimizer, step_size=5, gamma=0.1)
    
    # Define the data loaders
    train_loader = DataLoader(train_dataset, batch_size= batches, shuffle=True)
    validation_loader = DataLoader(test_dataset, batch_size= batches, shuffle=False)
    
    # Train the model
    model.train()
    
    epoch_accuracy = []
    for epoch in range(epochs):
        for images, labels in tqdm(train_loader, desc="Train Epoch " + str(epoch)):
            images = images.to(device)
            labels = labels.to(device)
            optimizer.zero_grad()
            outputs = model(images)
            loss = criterion(outputs, labels.long())
            loss.backward()
            optimizer.step()
    
        # Evaluate the model using validation set
        model.eval()
        y_true = []
        y_pred = []
        with torch.no_grad():
            for images, labels in tqdm(validation_loader, desc="Validation Epoch " + str(epoch)):
                images = images.to(device)
                labels = labels.to(device)
                outputs = model(images)
                _, predicted = torch.max(outputs, 1)
                y_true.extend(labels.cpu().numpy())
                y_pred.extend(predicted.cpu().numpy())
        model.train()
        accuracy = metrics.accuracy_score(y_true, y_pred)
        print("Epoch: ", epoch)
        print("Accurarcy: ", accuracy)
        print("F1 Score: ", metrics.f1_score(y_true, y_pred, average='macro'))
        print("Confusion Matrix: ", metrics.confusion_matrix(y_true, y_pred))
        epoch_accuracy.append(accuracy)

    return epoch_accuracy , accuracy

In [14]:
import matplotlib.pyplot as plt

In [15]:
def eval_params(epoch = False,batch = False,learnr = False):
    if epoch:
        #evaluate best epoch sizes
        ep1 , _ = train(epochs = 40)
        print(ep1)
        
        #Plot the results
        plt.clf()
        plt.plot(range(len(ep1)),ep1)
        plt.title("Epoch Size vs Accuracy")
        plt.xlabel("Epoch Size")
        plt.ylabel("Accuracy")
        plt.savefig("epochs.png")

    if batch:
        # evaluate best batch sizes
        batch_sizes = [10,32,64,128]
        batch_accuracy = []
        for size in batch_sizes:
            _ ,acc = train(batches = size)
            batch_accuracy.append(acc)

        #Plot the results
        plt.clf()
        plt.plot(batch_sizes,batch_accuracy)
        plt.title("Batch Size vs Accuracy")
        plt.xlabel("batch Size")
        plt.ylabel("Accuracy")
        plt.savefig("batch.png")

    if learnr:
        # evaluate learning rates
        learning_rates = [1e-3,1e-4,1e-5,1e-6]
        lr_accuracy = []
        for lr in learning_rates:
            _ ,acc = train(learing_rate = lr)
            lr_accuracy.append(acc)

        #Plot the results
        plt.clf()
        plt.plot(learning_rates,lr_accuracy)
        plt.title("LR vs Accuracy")
        plt.xlabel("Learning Rate")
        plt.ylabel("Accuracy")
        plt.savefig("learning_rates.png")

        

Run in order to comapre hyperparamters contained within eval_parmas: select paramters to evaluate by setting values to True

In [None]:
eval_params(epoch = False, batch = True, learnr = True)

Train Epoch 0:   0%|          | 0/1200 [00:00<?, ?it/s]

  return Variable._execution_engine.run_backward(  # Calls into the C++ engine to run the backward pass


Validation Epoch 0:   0%|          | 0/429 [00:00<?, ?it/s]

Epoch:  0
Accurarcy:  0.5841491841491842
F1 Score:  0.2869756977195615
Confusion Matrix:  [[21  0  0 ...  0  1  0]
 [ 0 50  0 ...  0  0  0]
 [ 0  0 86 ...  0  0  0]
 ...
 [ 0  0  0 ...  0  0  0]
 [ 0  0  0 ...  0  0  0]
 [ 0  0  0 ...  0  0  0]]


Train Epoch 1:   0%|          | 0/1200 [00:00<?, ?it/s]

Validation Epoch 1:   0%|          | 0/429 [00:00<?, ?it/s]

Epoch:  1
Accurarcy:  0.6461538461538462
F1 Score:  0.3264267874441229
Confusion Matrix:  [[ 32   0   0 ...   2   0   0]
 [  0  67   0 ...   0   0   0]
 [  1   0 104 ...   0   0   0]
 ...
 [  0   0   0 ...   0   0   0]
 [  0   0   0 ...   0   0   0]
 [  0   0   0 ...   0   0   0]]


Train Epoch 2:   0%|          | 0/1200 [00:00<?, ?it/s]

Validation Epoch 2:   0%|          | 0/429 [00:00<?, ?it/s]

Epoch:  2
Accurarcy:  0.6983682983682984
F1 Score:  0.3303270579608136
Confusion Matrix:  [[ 16   0   0 ...   0   0   0]
 [  0  72   0 ...   0   0   0]
 [  0   0 113 ...   0   0   0]
 ...
 [  0   0   0 ...   0   0   0]
 [  0   0   0 ...   0   0   0]
 [  0   0   0 ...   0   0   0]]


Train Epoch 3:   0%|          | 0/1200 [00:00<?, ?it/s]

Validation Epoch 3:   0%|          | 0/429 [00:00<?, ?it/s]

Epoch:  3
Accurarcy:  0.6846153846153846
F1 Score:  0.32580656777692185
Confusion Matrix:  [[ 40   0   0 ...   1   0   0]
 [  0  73   0 ...   0   0   0]
 [  0   0 105 ...   0   0   0]
 ...
 [  0   0   0 ...   0   0   0]
 [  0   0   0 ...   0   0   0]
 [  0   0   0 ...   0   0   0]]


Train Epoch 4:   0%|          | 0/1200 [00:00<?, ?it/s]

Validation Epoch 4:   0%|          | 0/429 [00:00<?, ?it/s]

Epoch:  4
Accurarcy:  0.7046620046620047
F1 Score:  0.3355463549146909
Confusion Matrix:  [[ 28   0   0 ...   0   0   0]
 [  0  73   0 ...   0   0   0]
 [  0   1 125 ...   0   0   0]
 ...
 [  0   0   0 ...   0   0   0]
 [  0   0   0 ...   0   0   0]
 [  0   0   0 ...   0   0   0]]


Train Epoch 5:   0%|          | 0/1200 [00:00<?, ?it/s]

Validation Epoch 5:   0%|          | 0/429 [00:00<?, ?it/s]

Epoch:  5
Accurarcy:  0.6944055944055944
F1 Score:  0.3335407846941762
Confusion Matrix:  [[ 31   0   0 ...   0   1   0]
 [  0  75   0 ...   0   0   0]
 [  1   1 117 ...   0   0   0]
 ...
 [  0   0   0 ...   0   0   0]
 [  0   0   0 ...   0   0   0]
 [  0   0   0 ...   0   0   0]]


Train Epoch 6:   0%|          | 0/1200 [00:00<?, ?it/s]

Validation Epoch 6:   0%|          | 0/429 [00:00<?, ?it/s]

Epoch:  6
Accurarcy:  0.7116550116550117
F1 Score:  0.3589262847198806
Confusion Matrix:  [[ 24   0   0 ...   2   0   0]
 [  0  74   0 ...   0   0   0]
 [  0   1 106 ...   0   0   0]
 ...
 [  0   0   0 ...   0   0   0]
 [  0   0   0 ...   0   0   0]
 [  0   0   0 ...   0   0   0]]


Train Epoch 7:   0%|          | 0/1200 [00:00<?, ?it/s]

Validation Epoch 7:   0%|          | 0/429 [00:00<?, ?it/s]

Epoch:  7
Accurarcy:  0.7011655011655011
F1 Score:  0.33831113665478957
Confusion Matrix:  [[ 34   0   0 ...   0   0   0]
 [  0  73   0 ...   0   0   0]
 [  0   0 123 ...   0   0   0]
 ...
 [  0   0   0 ...   0   0   0]
 [  0   0   0 ...   0   0   0]
 [  0   0   0 ...   0   0   0]]


Train Epoch 8:   0%|          | 0/1200 [00:00<?, ?it/s]

Run this Section in order to train and evaluate the model on the test set

In [None]:
# Evaluate on the test set
train()
model.eval()
test_loader = DataLoader(test_dataset, batch_size= batches, shuffle=False)

y_true = []
y_pred = []
with torch.no_grad():
    for images, labels in tqdm(test_loader):
        images = images.to(device)
        labels = labels.to(device)
        outputs = model(images)
        _, predicted = torch.max(outputs, 1)
        y_true.extend(labels.cpu().numpy())
        y_pred.extend(predicted.cpu().numpy())

print("Accuracy: ", metrics.accuracy_score(y_true, y_pred))

In [68]:
print(metrics.classification_report(y_true, y_pred, target_names=list(categories.keys())))

                                          precision    recall  f1-score   support

                     n02085620-Chihuahua       0.64      0.71      0.67        52
              n02085782-Japanese_spaniel       0.82      0.87      0.85        85
                   n02085936-Maltese_dog       0.87      0.76      0.81       152
                      n02086079-Pekinese       0.73      0.78      0.75        49
                      n02086240-Shih-Tzu       0.69      0.72      0.70       114
              n02086646-Blenheim_spaniel       0.87      0.86      0.87        88
                      n02086910-papillon       0.97      0.91      0.94        96
                   n02087046-toy_terrier       0.83      0.67      0.74        72
           n02087394-Rhodesian_ridgeback       0.72      0.40      0.52        72
                  n02088094-Afghan_hound       0.95      0.95      0.95       139
                        n02088238-basset       0.71      0.80      0.75        75
               